Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-04-29 02:41:33

0001 //#define BROKENLINE_DEBUG
0002 //#define BL_DUMP_HITS
0003 
0004 #include <cstdint>
0005 
0006 #include <alpaka/alpaka.hpp>
0007 
0008 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0011 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0012 #include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h"
0013 
0014 #include "HelixFit.h"
0015 
0016 template <typename TrackerTraits>
0017 using Tuples = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0018 template <typename TrackerTraits>
0019 using OutputSoAView = reco::TrackSoAView<TrackerTraits>;
0020 template <typename TrackerTraits>
0021 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0022 
0023 // #define BL_DUMP_HITS
0024 
0025 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0026   template <int N, typename TrackerTraits>
0027   class Kernel_BLFastFit {
0028   public:
0029     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0030     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0031                                   Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0032                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0033                                   TrackingRecHitSoAConstView<TrackerTraits> hh,
0034                                   pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
0035                                   typename TrackerTraits::tindex_type *__restrict__ ptkids,
0036                                   double *__restrict__ phits,
0037                                   float *__restrict__ phits_ge,
0038                                   double *__restrict__ pfast_fit,
0039                                   uint32_t nHitsL,
0040                                   uint32_t nHitsH,
0041                                   int32_t offset) const {
0042       constexpr uint32_t hitsInFit = N;
0043       constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0044 
0045       ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL);
0046       ALPAKA_ASSERT_ACC(nHitsL <= nHitsH);
0047       ALPAKA_ASSERT_ACC(phits);
0048       ALPAKA_ASSERT_ACC(pfast_fit);
0049       ALPAKA_ASSERT_ACC(foundNtuplets);
0050       ALPAKA_ASSERT_ACC(tupleMultiplicity);
0051 
0052       // look in bin for this hit multiplicity
0053       int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0054       ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size()));
0055       ALPAKA_ASSERT_ACC(totTK >= 0);
0056 
0057 #ifdef BROKENLINE_DEBUG
0058       const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0059       if (cms::alpakatools::once_per_grid(acc)) {
0060         printf("%d total Ntuple\n", tupleMultiplicity->size());
0061         printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0062       }
0063 #endif
0064       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0065       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0066         auto tuple_idx = local_idx + offset;
0067         if ((int)tuple_idx >= totTK) {
0068           ptkids[local_idx] = invalidTkId;
0069           break;
0070         }
0071         // get it from the ntuple container (one to one to helix)
0072         auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0073         ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0074 
0075         ptkids[local_idx] = tkid;
0076 
0077         auto nHits = foundNtuplets->size(tkid);
0078 
0079         ALPAKA_ASSERT_ACC(nHits >= nHitsL);
0080         ALPAKA_ASSERT_ACC(nHits <= nHitsH);
0081 
0082         riemannFit::Map3xNd<N> hits(phits + local_idx);
0083         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0084         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0085 
0086 #ifdef BL_DUMP_HITS
0087         auto &&done = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0088         done = 0;
0089         alpaka::syncBlockThreads(acc);
0090         bool dump =
0091             (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
0092 #endif
0093 
0094         // Prepare data structure
0095         auto const *hitId = foundNtuplets->begin(tkid);
0096 
0097         // #define YERR_FROM_DC
0098 #ifdef YERR_FROM_DC
0099         // try to compute more precise error in y
0100         auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
0101         auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
0102         auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
0103         float ux, uy, uz;
0104 #endif
0105 
0106         float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0107         float n = 0;
0108         for (uint32_t i = 0; i < hitsInFit; ++i) {
0109           int j = int(n + 0.5f);  // round
0110           if (hitsInFit - 1 == i)
0111             j = nHits - 1;  // force last hit to ensure max lever arm.
0112           ALPAKA_ASSERT_ACC(j < int(nHits));
0113           n += incr;
0114           auto hit = hitId[j];
0115           float ge[6];
0116 
0117 #ifdef YERR_FROM_DC
0118           auto const &dp = cpeParams->detParams(hh.detectorIndex(hit));
0119           auto status = hh[hit].chargeAndStatus().status;
0120           int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0121           ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5);
0122           bool nok = (status.isBigY | status.isOneY);
0123           // compute cotanbeta and use it to recompute error
0124           dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0125           auto cb = std::abs(uy / uz);
0126           int bin =
0127               int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0128           int low_value = 0;
0129           int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0130           // return estimated bin value truncated to [0, 15]
0131           bin = std::clamp(bin, low_value, high_value);
0132           float yerr = dp.sigmay[bin] * 1.e-4f;  // toCM
0133           yerr *= dp.yfact[qbin];                // inflate
0134           yerr *= yerr;
0135           yerr += dp.apeYY;
0136           yerr = nok ? hh[hit].yerrLocal() : yerr;
0137           dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
0138 #else
0139           cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0140 #endif
0141 
0142 #ifdef BL_DUMP_HITS
0143           bool dump = foundNtuplets->size(tkid) == 5;
0144           if (dump) {
0145             printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0146                    local_idx,
0147                    tkid,
0148                    hit,
0149                    hh[hit].detectorIndex(),
0150                    i,
0151                    hh[hit].xGlobal(),
0152                    hh[hit].yGlobal(),
0153                    hh[hit].zGlobal());
0154             printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0155           }
0156 #endif
0157 
0158           hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0159           hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0160         }
0161         brokenline::fastFit(acc, hits, fast_fit);
0162 
0163 #ifdef BROKENLINE_DEBUG
0164         // any NaN value should cause the track to be rejected at a later stage
0165         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0166         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0167         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0168         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0169 #endif
0170       }
0171     }
0172   };
0173 
0174   template <int N, typename TrackerTraits>
0175   struct Kernel_BLFit {
0176   public:
0177     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0178     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0179                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0180                                   double bField,
0181                                   OutputSoAView<TrackerTraits> results_view,
0182                                   typename TrackerTraits::tindex_type const *__restrict__ ptkids,
0183                                   double *__restrict__ phits,
0184                                   float *__restrict__ phits_ge,
0185                                   double *__restrict__ pfast_fit) const {
0186       // workaround for #47808
0187       debug::do_not_optimise(results_view);
0188 
0189       ALPAKA_ASSERT_ACC(results_view.pt());
0190       ALPAKA_ASSERT_ACC(results_view.eta());
0191       ALPAKA_ASSERT_ACC(results_view.chi2());
0192       ALPAKA_ASSERT_ACC(pfast_fit);
0193       constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0194 
0195       // same as above...
0196       // look in bin for this hit multiplicity
0197       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0198       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0199         if (invalidTkId == ptkids[local_idx])
0200           break;
0201         auto tkid = ptkids[local_idx];
0202 
0203         ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples);
0204 
0205         riemannFit::Map3xNd<N> hits(phits + local_idx);
0206         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0207         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0208 
0209         brokenline::PreparedBrokenLineData<N> data;
0210 
0211         brokenline::karimaki_circle_fit circle;
0212         riemannFit::LineFit line;
0213 
0214         brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data);
0215         brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
0216         brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
0217 
0218         TracksUtilities<TrackerTraits>::copyFromCircle(
0219             results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0220         results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
0221         results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
0222         results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
0223 
0224 #ifdef BROKENLINE_DEBUG
0225         if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0226           printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0227         printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0228                N,
0229                N,
0230                tkid,
0231                circle.par(0),
0232                circle.par(1),
0233                circle.par(2));
0234         printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0235         printf("kernelBLHits chi2 cov %f/%f  %e,%e,%e,%e,%e\n",
0236                circle.chi2,
0237                line.chi2,
0238                circle.cov(0, 0),
0239                circle.cov(1, 1),
0240                circle.cov(2, 2),
0241                line.cov(0, 0),
0242                line.cov(1, 1));
0243 #endif
0244       }
0245     }
0246   };
0247 
0248   template <typename TrackerTraits>
0249   void HelixFit<TrackerTraits>::launchBrokenLineKernels(
0250       const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0251       pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *cpeParams,
0252       uint32_t hitsInFit,
0253       uint32_t maxNumberOfTuples,
0254       Queue &queue) {
0255     ALPAKA_ASSERT_ACC(tuples_);
0256 
0257     uint32_t blockSize = 64;
0258     uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
0259     const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0260     const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0261 
0262     //  Fit internals
0263     auto tkidDevice =
0264         cms::alpakatools::make_device_buffer<typename TrackerTraits::tindex_type[]>(queue, maxNumberOfConcurrentFits_);
0265     auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0266         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
0267     auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0268         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
0269     auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0270         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0271 
0272     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0273       // fit triplets
0274 
0275       alpaka::exec<Acc1D>(queue,
0276                           workDivTriplets,
0277                           Kernel_BLFastFit<3, TrackerTraits>{},
0278                           tuples_,
0279                           tupleMultiplicity_,
0280                           hv,
0281                           cpeParams,
0282                           tkidDevice.data(),
0283                           hitsDevice.data(),
0284                           hits_geDevice.data(),
0285                           fast_fit_resultsDevice.data(),
0286                           3,
0287                           3,
0288                           offset);
0289 
0290       alpaka::exec<Acc1D>(queue,
0291                           workDivTriplets,
0292                           Kernel_BLFit<3, TrackerTraits>{},
0293                           tupleMultiplicity_,
0294                           bField_,
0295                           outputSoa_,
0296                           tkidDevice.data(),
0297                           hitsDevice.data(),
0298                           hits_geDevice.data(),
0299                           fast_fit_resultsDevice.data());
0300 
0301       if (fitNas4_) {
0302         // fit all as 4
0303         riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
0304                                                                        &hv,
0305                                                                        &cpeParams,
0306                                                                        &tkidDevice,
0307                                                                        &hitsDevice,
0308                                                                        &hits_geDevice,
0309                                                                        &fast_fit_resultsDevice,
0310                                                                        &offset,
0311                                                                        &queue,
0312                                                                        &workDivQuadsPenta](auto i) {
0313           alpaka::exec<Acc1D>(queue,
0314                               workDivQuadsPenta,
0315                               Kernel_BLFastFit<4, TrackerTraits>{},
0316                               tuples_,
0317                               tupleMultiplicity_,
0318                               hv,
0319                               cpeParams,
0320                               tkidDevice.data(),
0321                               hitsDevice.data(),
0322                               hits_geDevice.data(),
0323                               fast_fit_resultsDevice.data(),
0324                               4,
0325                               4,
0326                               offset);
0327 
0328           alpaka::exec<Acc1D>(queue,
0329                               workDivQuadsPenta,
0330                               Kernel_BLFit<4, TrackerTraits>{},
0331                               tupleMultiplicity_,
0332                               bField_,
0333                               outputSoa_,
0334                               tkidDevice.data(),
0335                               hitsDevice.data(),
0336                               hits_geDevice.data(),
0337                               fast_fit_resultsDevice.data());
0338         });
0339 
0340       } else {
0341         riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
0342                                                                                  &hv,
0343                                                                                  &cpeParams,
0344                                                                                  &tkidDevice,
0345                                                                                  &hitsDevice,
0346                                                                                  &hits_geDevice,
0347                                                                                  &fast_fit_resultsDevice,
0348                                                                                  &offset,
0349                                                                                  &queue,
0350                                                                                  &workDivQuadsPenta](auto i) {
0351           alpaka::exec<Acc1D>(queue,
0352                               workDivQuadsPenta,
0353                               Kernel_BLFastFit<i, TrackerTraits>{},
0354                               tuples_,
0355                               tupleMultiplicity_,
0356                               hv,
0357                               cpeParams,
0358                               tkidDevice.data(),
0359                               hitsDevice.data(),
0360                               hits_geDevice.data(),
0361                               fast_fit_resultsDevice.data(),
0362                               i,
0363                               i,
0364                               offset);
0365 
0366           alpaka::exec<Acc1D>(queue,
0367                               workDivQuadsPenta,
0368                               Kernel_BLFit<i, TrackerTraits>{},
0369                               tupleMultiplicity_,
0370                               bField_,
0371                               outputSoa_,
0372                               tkidDevice.data(),
0373                               hitsDevice.data(),
0374                               hits_geDevice.data(),
0375                               fast_fit_resultsDevice.data());
0376         });
0377 
0378         static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
0379 
0380         //Fit all the rest using the maximum from previous call
0381         alpaka::exec<Acc1D>(queue,
0382                             workDivQuadsPenta,
0383                             Kernel_BLFastFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0384                             tuples_,
0385                             tupleMultiplicity_,
0386                             hv,
0387                             cpeParams,
0388                             tkidDevice.data(),
0389                             hitsDevice.data(),
0390                             hits_geDevice.data(),
0391                             fast_fit_resultsDevice.data(),
0392                             TrackerTraits::maxHitsOnTrackForFullFit,
0393                             TrackerTraits::maxHitsOnTrack - 1,
0394                             offset);
0395 
0396         alpaka::exec<Acc1D>(queue,
0397                             workDivQuadsPenta,
0398                             Kernel_BLFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0399                             tupleMultiplicity_,
0400                             bField_,
0401                             outputSoa_,
0402                             tkidDevice.data(),
0403                             hitsDevice.data(),
0404                             hits_geDevice.data(),
0405                             fast_fit_resultsDevice.data());
0406       }
0407 
0408     }  // loop on concurrent fits
0409   }
0410 
0411   template class HelixFit<pixelTopology::Phase1>;
0412   template class HelixFit<pixelTopology::Phase2>;
0413   template class HelixFit<pixelTopology::HIonPhase1>;
0414 
0415 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE