Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-07-03 00:42:40

0001 // #define BROKENLINE_DEBUG
0002 // #define BL_DUMP_HITS
0003 // #define GPU_DEBUG
0004 #include <cstdint>
0005 
0006 #include <alpaka/alpaka.hpp>
0007 
0008 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0011 #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h"
0012 #include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h"
0013 
0014 #include "HelixFit.h"
0015 
0016 using OutputSoAView = reco::TrackSoAView;
0017 using TupleMultiplicity = caStructures::GenericContainer;
0018 using Tuples = caStructures::SequentialContainer;
0019 
0020 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0021 
0022   template <int N>
0023   class Kernel_BLFastFit {
0024   public:
0025     ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0026                                   Tuples const *__restrict__ foundNtuplets,
0027                                   TupleMultiplicity const *__restrict__ tupleMultiplicity,
0028                                   ::reco::TrackingRecHitConstView hh,
0029                                   ::reco::CAModulesConstView cm,
0030                                   typename caStructures::tindex_type *__restrict__ ptkids,
0031                                   double *__restrict__ phits,
0032                                   float *__restrict__ phits_ge,
0033                                   double *__restrict__ pfast_fit,
0034                                   uint32_t nHitsL,
0035                                   uint32_t nHitsH,
0036                                   int32_t offset) const {
0037       constexpr uint32_t hitsInFit = N;
0038       constexpr auto invalidTkId = std::numeric_limits<typename caStructures::tindex_type>::max();
0039 
0040       ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL);
0041       ALPAKA_ASSERT_ACC(nHitsL <= nHitsH);
0042       ALPAKA_ASSERT_ACC(phits);
0043       ALPAKA_ASSERT_ACC(pfast_fit);
0044       ALPAKA_ASSERT_ACC(foundNtuplets);
0045       ALPAKA_ASSERT_ACC(tupleMultiplicity);
0046 
0047       // look in bin for this hit multiplicity
0048       int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0049       ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size()));
0050       ALPAKA_ASSERT_ACC(totTK >= 0);
0051 
0052 #ifdef BROKENLINE_DEBUG
0053       if (cms::alpakatools::once_per_grid(acc)) {
0054         printf("%d total Ntuple\n", tupleMultiplicity->size());
0055         printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0056       }
0057 #endif
0058       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0059       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0060         auto tuple_idx = local_idx + offset;
0061         if ((int)tuple_idx >= totTK) {
0062           ptkids[local_idx] = invalidTkId;
0063           break;
0064         }
0065         // get it from the ntuple container (one to one to helix)
0066         auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0067         ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0068 
0069         ptkids[local_idx] = tkid;
0070 
0071         auto nHits = foundNtuplets->size(tkid);
0072 
0073         ALPAKA_ASSERT_ACC(nHits >= nHitsL);
0074         ALPAKA_ASSERT_ACC(nHits <= nHitsH);
0075 
0076         riemannFit::Map3xNd<N> hits(phits + local_idx);
0077         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0078         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0079 
0080 #ifdef BL_DUMP_HITS
0081         auto &&done = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0082         done = 0;
0083         alpaka::syncBlockThreads(acc);
0084         bool dump =
0085             (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
0086 #endif
0087 
0088         // Prepare data structure
0089         auto const *hitId = foundNtuplets->begin(tkid);
0090 
0091         // #define YERR_FROM_DC
0092 #ifdef YERR_FROM_DC
0093         // try to compute more precise error in y
0094         auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
0095         auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
0096         auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
0097         float ux, uy, uz;
0098 #endif
0099 
0100         float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0101         float n = 0;
0102         for (uint32_t i = 0; i < hitsInFit; ++i) {
0103           int j = int(n + 0.5f);  // round
0104           if (hitsInFit - 1 == i)
0105             j = nHits - 1;  // force last hit to ensure max lever arm.
0106           ALPAKA_ASSERT_ACC(j < int(nHits));
0107           n += incr;
0108           auto hit = hitId[j];
0109           float ge[6];
0110 
0111 #ifdef YERR_FROM_DC
0112           auto const &dp = cm->detParams(hh.detectorIndex(hit));
0113           auto status = hh[hit].chargeAndStatus().status;
0114           int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0115           ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5);
0116           bool nok = (status.isBigY | status.isOneY);
0117           // compute cotanbeta and use it to recompute error
0118           dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0119           auto cb = std::abs(uy / uz);
0120           int bin =
0121               int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0122           int low_value = 0;
0123           int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0124           // return estimated bin value truncated to [0, 15]
0125           bin = std::clamp(bin, low_value, high_value);
0126           float yerr = dp.sigmay[bin] * 1.e-4f;  // toCM
0127           yerr *= dp.yfact[qbin];                // inflate
0128           yerr *= yerr;
0129           yerr += dp.apeYY;
0130           yerr = nok ? hh[hit].yerrLocal() : yerr;
0131           dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
0132 #else
0133           auto const &frame = cm.detFrame(hh.detectorIndex(hit));
0134           frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0135 #endif
0136 
0137 #ifdef BL_DUMP_HITS
0138           bool dump = foundNtuplets->size(tkid) == 5;
0139           if (dump) {
0140             printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0141                    local_idx,
0142                    tkid,
0143                    hit,
0144                    hh[hit].detectorIndex(),
0145                    i,
0146                    hh[hit].xGlobal(),
0147                    hh[hit].yGlobal(),
0148                    hh[hit].zGlobal());
0149             printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0150           }
0151 #endif
0152 
0153           hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0154           hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0155         }
0156         brokenline::fastFit(acc, hits, fast_fit);
0157 
0158 #ifdef BROKENLINE_DEBUG
0159         // any NaN value should cause the track to be rejected at a later stage
0160         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0161         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0162         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0163         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0164 #endif
0165       }
0166     }
0167   };
0168 
0169   template <int N, typename TrackerTraits>
0170   struct Kernel_BLFit {
0171   public:
0172     ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0173                                   TupleMultiplicity const *__restrict__ tupleMultiplicity,
0174                                   double bField,
0175                                   OutputSoAView results_view,
0176                                   typename caStructures::tindex_type const *__restrict__ ptkids,
0177                                   double *__restrict__ phits,
0178                                   float *__restrict__ phits_ge,
0179                                   double *__restrict__ pfast_fit) const {
0180       // workaround for #47808
0181       debug::do_not_optimise(results_view);
0182 
0183       ALPAKA_ASSERT_ACC(results_view.pt());
0184       ALPAKA_ASSERT_ACC(results_view.eta());
0185       ALPAKA_ASSERT_ACC(results_view.chi2());
0186       ALPAKA_ASSERT_ACC(pfast_fit);
0187 
0188       constexpr auto invalidTkId = std::numeric_limits<typename caStructures::tindex_type>::max();
0189 
0190       // same as above...
0191       // look in bin for this hit multiplicity
0192       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0193       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0194         if (invalidTkId == ptkids[local_idx])
0195           break;
0196         auto tkid = ptkids[local_idx];
0197 
0198         ALPAKA_ASSERT_ACC(int(tkid) < tupleMultiplicity->capacity());
0199 
0200         riemannFit::Map3xNd<N> hits(phits + local_idx);
0201         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0202         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0203 
0204         brokenline::PreparedBrokenLineData<N> data;
0205 
0206         brokenline::karimaki_circle_fit circle;
0207         riemannFit::LineFit line;
0208 
0209         brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data);
0210         brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
0211         brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
0212 
0213         reco::copyFromCircle(results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0214         results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
0215         results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
0216         results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
0217 
0218 #ifdef BROKENLINE_DEBUG
0219         if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0220           printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0221         printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0222                N,
0223                N,
0224                tkid,
0225                circle.par(0),
0226                circle.par(1),
0227                circle.par(2));
0228         printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0229         printf("kernelBLHits chi2 cov %f/%f  %e,%e,%e,%e,%e\n",
0230                circle.chi2,
0231                line.chi2,
0232                circle.cov(0, 0),
0233                circle.cov(1, 1),
0234                circle.cov(2, 2),
0235                line.cov(0, 0),
0236                line.cov(1, 1));
0237 #endif
0238       }
0239     }
0240   };
0241 
0242   template <typename TrackerTraits>
0243   void HelixFit<TrackerTraits>::launchBrokenLineKernels(const ::reco::TrackingRecHitConstView &hv,
0244                                                         const ::reco::CAModulesConstView &cm,
0245                                                         uint32_t hitsInFit,
0246                                                         uint32_t maxNumberOfTuples,
0247                                                         Queue &queue) {
0248     ALPAKA_ASSERT_ACC(tuples_);
0249 
0250 #ifdef GPU_DEBUG
0251     alpaka::wait(queue);
0252     std::cout << "Starting HelixFit<TrackerTraits>::launchBrokenLineKernels" << std::endl;
0253 #endif
0254 
0255     uint32_t blockSize = 64;
0256     uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
0257     const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0258     const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0259 
0260     //  Fit internals
0261     auto tkidDevice =
0262         cms::alpakatools::make_device_buffer<typename caStructures::tindex_type[]>(queue, maxNumberOfConcurrentFits_);
0263     auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0264         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
0265     auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0266         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
0267     auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0268         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0269 
0270     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0271       // fit triplets
0272 
0273       alpaka::exec<Acc1D>(queue,
0274                           workDivTriplets,
0275                           Kernel_BLFastFit<3>{},
0276                           tuples_,
0277                           tupleMultiplicity_,
0278                           hv,
0279                           cm,
0280                           tkidDevice.data(),
0281                           hitsDevice.data(),
0282                           hits_geDevice.data(),
0283                           fast_fit_resultsDevice.data(),
0284                           3,
0285                           3,
0286                           offset);
0287 #ifdef GPU_DEBUG
0288       alpaka::wait(queue);
0289       std::cout << "Kernel_BLFastFit(3) -> done! " << std::endl;
0290 #endif
0291       alpaka::exec<Acc1D>(queue,
0292                           workDivTriplets,
0293                           Kernel_BLFit<3, TrackerTraits>{},
0294                           tupleMultiplicity_,
0295                           bField_,
0296                           outputSoa_,
0297                           tkidDevice.data(),
0298                           hitsDevice.data(),
0299                           hits_geDevice.data(),
0300                           fast_fit_resultsDevice.data());
0301 #ifdef GPU_DEBUG
0302       alpaka::wait(queue);
0303       std::cout << "Kernel_BLFit(3) -> done! " << std::endl;
0304 #endif
0305 
0306       if (fitNas4_) {
0307         // fit all as 4
0308         riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
0309                                                                        &hv,
0310                                                                        &cm,
0311                                                                        &tkidDevice,
0312                                                                        &hitsDevice,
0313                                                                        &hits_geDevice,
0314                                                                        &fast_fit_resultsDevice,
0315                                                                        &offset,
0316                                                                        &queue,
0317                                                                        &workDivQuadsPenta](auto i) {
0318           alpaka::exec<Acc1D>(queue,
0319                               workDivQuadsPenta,
0320                               Kernel_BLFastFit<4>{},
0321                               tuples_,
0322                               tupleMultiplicity_,
0323                               hv,
0324                               cm,
0325                               tkidDevice.data(),
0326                               hitsDevice.data(),
0327                               hits_geDevice.data(),
0328                               fast_fit_resultsDevice.data(),
0329                               4,
0330                               4,
0331                               offset);
0332 
0333           alpaka::exec<Acc1D>(queue,
0334                               workDivQuadsPenta,
0335                               Kernel_BLFit<4, TrackerTraits>{},
0336                               tupleMultiplicity_,
0337                               bField_,
0338                               outputSoa_,
0339                               tkidDevice.data(),
0340                               hitsDevice.data(),
0341                               hits_geDevice.data(),
0342                               fast_fit_resultsDevice.data());
0343         });
0344       } else {
0345         riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
0346                                                                                  &hv,
0347                                                                                  &cm,
0348                                                                                  &tkidDevice,
0349                                                                                  &hitsDevice,
0350                                                                                  &hits_geDevice,
0351                                                                                  &fast_fit_resultsDevice,
0352                                                                                  &offset,
0353                                                                                  &queue,
0354                                                                                  &workDivQuadsPenta](auto i) {
0355           alpaka::exec<Acc1D>(queue,
0356                               workDivQuadsPenta,
0357                               Kernel_BLFastFit<i>{},
0358                               tuples_,
0359                               tupleMultiplicity_,
0360                               hv,
0361                               cm,
0362                               tkidDevice.data(),
0363                               hitsDevice.data(),
0364                               hits_geDevice.data(),
0365                               fast_fit_resultsDevice.data(),
0366                               i,
0367                               i,
0368                               offset);
0369 
0370           alpaka::exec<Acc1D>(queue,
0371                               workDivQuadsPenta,
0372                               Kernel_BLFit<i, TrackerTraits>{},
0373                               tupleMultiplicity_,
0374                               bField_,
0375                               outputSoa_,
0376                               tkidDevice.data(),
0377                               hitsDevice.data(),
0378                               hits_geDevice.data(),
0379                               fast_fit_resultsDevice.data());
0380 #ifdef GPU_DEBUG
0381           alpaka::wait(queue);
0382           std::cout << "Kernel_BLFastFit(" << i << ") and Kernel_BLFit(" << i << ") -> done! " << std::endl;
0383 #endif
0384         });
0385 
0386         static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
0387 
0388         //Fit all the rest using the maximum from previous call
0389         alpaka::exec<Acc1D>(queue,
0390                             workDivQuadsPenta,
0391                             Kernel_BLFastFit<TrackerTraits::maxHitsOnTrackForFullFit>{},
0392                             tuples_,
0393                             tupleMultiplicity_,
0394                             hv,
0395                             cm,
0396                             tkidDevice.data(),
0397                             hitsDevice.data(),
0398                             hits_geDevice.data(),
0399                             fast_fit_resultsDevice.data(),
0400                             TrackerTraits::maxHitsOnTrackForFullFit,
0401                             TrackerTraits::maxHitsOnTrack - 1,
0402                             offset);
0403 
0404         alpaka::exec<Acc1D>(queue,
0405                             workDivQuadsPenta,
0406                             Kernel_BLFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0407                             tupleMultiplicity_,
0408                             bField_,
0409                             outputSoa_,
0410                             tkidDevice.data(),
0411                             hitsDevice.data(),
0412                             hits_geDevice.data(),
0413                             fast_fit_resultsDevice.data());
0414       }
0415 
0416     }  // loop on concurrent fits
0417   }
0418 
0419   template class HelixFit<pixelTopology::Phase1>;
0420   template class HelixFit<pixelTopology::Phase2>;
0421   template class HelixFit<pixelTopology::HIonPhase1>;
0422 
0423 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE