Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-07-03 00:42:42

0001 #include <cstdint>
0002 
0003 #include <alpaka/alpaka.hpp>
0004 
0005 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0006 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0009 #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h"
0010 #include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h"
0011 
0012 #include "HelixFit.h"
0013 #include "CAStructures.h"
0014 
0015 using OutputSoAView = reco::TrackSoAView;
0016 using TupleMultiplicity = caStructures::GenericContainer;
0017 using Tuples = caStructures::SequentialContainer;
0018 
0019 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0020   using namespace alpaka;
0021   using namespace cms::alpakatools;
0022 
0023   template <int N, typename TrackerTraits>
0024   class Kernel_FastFit {
0025   public:
0026     ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0027                                   Tuples const *__restrict__ foundNtuplets,
0028                                   TupleMultiplicity const *__restrict__ tupleMultiplicity,
0029                                   uint32_t nHits,
0030                                   ::reco::TrackingRecHitConstView hh,
0031                                   ::reco::CAModulesConstView cm,
0032                                   double *__restrict__ phits,
0033                                   float *__restrict__ phits_ge,
0034                                   double *__restrict__ pfast_fit,
0035                                   uint32_t offset) const {
0036       constexpr uint32_t hitsInFit = N;
0037 
0038       ALPAKA_ASSERT_ACC(hitsInFit <= nHits);
0039 
0040       ALPAKA_ASSERT_ACC(pfast_fit);
0041       ALPAKA_ASSERT_ACC(foundNtuplets);
0042       ALPAKA_ASSERT_ACC(tupleMultiplicity);
0043 
0044       // look in bin for this hit multiplicity
0045 
0046 #ifdef RIEMANN_DEBUG
0047       const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0048       if (cms::alpakatools::once_per_grid(acc))
0049         printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
0050 #endif
0051 
0052       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0053       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0054         auto tuple_idx = local_idx + offset;
0055         if (tuple_idx >= tupleMultiplicity->size(nHits))
0056           break;
0057 
0058         // get it from the ntuple container (one to one to helix)
0059         auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0060         ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0061 
0062         ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits);
0063 
0064         riemannFit::Map3xNd<N> hits(phits + local_idx);
0065         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0066         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0067 
0068         // Prepare data structure
0069         auto const *hitId = foundNtuplets->begin(tkid);
0070         for (unsigned int i = 0; i < hitsInFit; ++i) {
0071           auto hit = hitId[i];
0072           float ge[6];
0073           cm.detFrame(hh.detectorIndex(hit)).toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0074           hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0075           hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0076         }
0077         riemannFit::fastFit(acc, hits, fast_fit);
0078 
0079 #ifdef RIEMANN_DEBUG
0080         // any NaN value should cause the track to be rejected at a later stage
0081         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0082         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0083         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0084         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0085 #endif
0086       }
0087     }
0088   };
0089 
0090   template <int N, typename TrackerTraits>
0091   class Kernel_CircleFit {
0092   public:
0093     ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0094                                   TupleMultiplicity const *__restrict__ tupleMultiplicity,
0095                                   uint32_t nHits,
0096                                   double bField,
0097                                   double *__restrict__ phits,
0098                                   float *__restrict__ phits_ge,
0099                                   double *__restrict__ pfast_fit_input,
0100                                   riemannFit::CircleFit *circle_fit,
0101                                   uint32_t offset) const {
0102       ALPAKA_ASSERT_ACC(circle_fit);
0103       ALPAKA_ASSERT_ACC(N <= nHits);
0104 
0105       // same as above...
0106 
0107       // look in bin for this hit multiplicity
0108       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0109       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0110         auto tuple_idx = local_idx + offset;
0111         if (tuple_idx >= tupleMultiplicity->size(nHits))
0112           break;
0113 
0114         riemannFit::Map3xNd<N> hits(phits + local_idx);
0115         riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0116         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0117 
0118         riemannFit::VectorNd<N> rad = (hits.block(0, 0, 2, N).colwise().norm());
0119 
0120         riemannFit::Matrix2Nd<N> hits_cov = riemannFit::Matrix2Nd<N>::Zero();
0121         riemannFit::loadCovariance2D(acc, hits_ge, hits_cov);
0122 
0123         circle_fit[local_idx] =
0124             riemannFit::circleFit(acc, hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true);
0125 
0126 #ifdef RIEMANN_DEBUG
0127 //    auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0128 //  printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid,
0129 //         circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2));
0130 #endif
0131       }
0132     }
0133   };
0134 
0135   template <int N, typename TrackerTraits>
0136   class Kernel_LineFit {
0137   public:
0138     ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0139                                   TupleMultiplicity const *__restrict__ tupleMultiplicity,
0140                                   uint32_t nHits,
0141                                   double bField,
0142                                   OutputSoAView results_view,
0143                                   double *__restrict__ phits,
0144                                   float *__restrict__ phits_ge,
0145                                   double *__restrict__ pfast_fit_input,
0146                                   riemannFit::CircleFit *__restrict__ circle_fit,
0147                                   uint32_t offset) const {
0148       ALPAKA_ASSERT_ACC(circle_fit);
0149       ALPAKA_ASSERT_ACC(N <= nHits);
0150 
0151       // same as above...
0152 
0153       // look in bin for this hit multiplicity
0154       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0155       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0156         auto tuple_idx = local_idx + offset;
0157         if (tuple_idx >= tupleMultiplicity->size(nHits))
0158           break;
0159 
0160         // get it for the ntuple container (one to one to helix)
0161         int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0162 
0163         riemannFit::Map3xNd<N> hits(phits + local_idx);
0164         riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0165         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0166 
0167         auto const &line_fit = riemannFit::lineFit(acc, hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true);
0168 
0169         riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]);
0170 
0171         reco::copyFromCircle(results_view,
0172                              circle_fit[local_idx].par,
0173                              circle_fit[local_idx].cov,
0174                              line_fit.par,
0175                              line_fit.cov,
0176                              1.f / float(bField),
0177                              tkid);
0178         results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2));
0179         results_view[tkid].eta() = asinhf(line_fit.par(0));
0180         results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5);
0181 
0182 #ifdef RIEMANN_DEBUG
0183         printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0184                N,
0185                nHits,
0186                tkid,
0187                circle_fit[local_idx].par(0),
0188                circle_fit[local_idx].par(1),
0189                circle_fit[local_idx].par(2));
0190         printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1));
0191         printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0192                circle_fit[local_idx].chi2,
0193                line_fit.chi2,
0194                circle_fit[local_idx].cov(0, 0),
0195                circle_fit[local_idx].cov(1, 1),
0196                circle_fit[local_idx].cov(2, 2),
0197                line_fit.cov(0, 0),
0198                line_fit.cov(1, 1));
0199 #endif
0200       }
0201     }
0202   };
0203 
0204   template <typename TrackerTraits>
0205   void HelixFit<TrackerTraits>::launchRiemannKernels(const ::reco::TrackingRecHitConstView &hv,
0206                                                      const ::reco::CAModulesConstView &cm,
0207                                                      uint32_t nhits,
0208                                                      uint32_t maxNumberOfTuples,
0209                                                      Queue &queue) {
0210     assert(tuples_);
0211 
0212     auto blockSize = 64;
0213     auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0214     const auto workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0215     const auto workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0216 
0217     //  Fit internals
0218     auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0219         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double));
0220     auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0221         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float));
0222     auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0223         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0224     auto circle_fit_resultsDevice_holder =
0225         cms::alpakatools::make_device_buffer<char[]>(queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit));
0226     riemannFit::CircleFit *circle_fit_resultsDevice_ =
0227         (riemannFit::CircleFit *)(circle_fit_resultsDevice_holder.data());
0228 
0229     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0230       // triplets
0231       alpaka::exec<Acc1D>(queue,
0232                           workDivTriplets,
0233                           Kernel_FastFit<3, TrackerTraits>{},
0234                           tuples_,
0235                           tupleMultiplicity_,
0236                           3,
0237                           hv,
0238                           cm,
0239                           hitsDevice.data(),
0240                           hits_geDevice.data(),
0241                           fast_fit_resultsDevice.data(),
0242                           offset);
0243 
0244       alpaka::exec<Acc1D>(queue,
0245                           workDivTriplets,
0246                           Kernel_CircleFit<3, TrackerTraits>{},
0247                           tupleMultiplicity_,
0248                           3,
0249                           bField_,
0250                           hitsDevice.data(),
0251                           hits_geDevice.data(),
0252                           fast_fit_resultsDevice.data(),
0253                           circle_fit_resultsDevice_,
0254                           offset);
0255 
0256       alpaka::exec<Acc1D>(queue,
0257                           workDivTriplets,
0258                           Kernel_LineFit<3, TrackerTraits>{},
0259                           tupleMultiplicity_,
0260                           3,
0261                           bField_,
0262                           outputSoa_,
0263                           hitsDevice.data(),
0264                           hits_geDevice.data(),
0265                           fast_fit_resultsDevice.data(),
0266                           circle_fit_resultsDevice_,
0267                           offset);
0268 
0269       // quads
0270       alpaka::exec<Acc1D>(queue,
0271                           workDivQuadsPenta,
0272                           Kernel_FastFit<4, TrackerTraits>{},
0273                           tuples_,
0274                           tupleMultiplicity_,
0275                           4,
0276                           hv,
0277                           cm,
0278                           hitsDevice.data(),
0279                           hits_geDevice.data(),
0280                           fast_fit_resultsDevice.data(),
0281                           offset);
0282 
0283       alpaka::exec<Acc1D>(queue,
0284                           workDivQuadsPenta,
0285                           Kernel_CircleFit<4, TrackerTraits>{},
0286                           tupleMultiplicity_,
0287                           4,
0288                           bField_,
0289                           hitsDevice.data(),
0290                           hits_geDevice.data(),
0291                           fast_fit_resultsDevice.data(),
0292                           circle_fit_resultsDevice_,
0293                           offset);
0294 
0295       alpaka::exec<Acc1D>(queue,
0296                           workDivQuadsPenta,
0297                           Kernel_LineFit<4, TrackerTraits>{},
0298                           tupleMultiplicity_,
0299                           4,
0300                           bField_,
0301                           outputSoa_,
0302                           hitsDevice.data(),
0303                           hits_geDevice.data(),
0304                           fast_fit_resultsDevice.data(),
0305                           circle_fit_resultsDevice_,
0306                           offset);
0307 
0308       if (fitNas4_) {
0309         // penta
0310         alpaka::exec<Acc1D>(queue,
0311                             workDivQuadsPenta,
0312                             Kernel_FastFit<4, TrackerTraits>{},
0313                             tuples_,
0314                             tupleMultiplicity_,
0315                             5,
0316                             hv,
0317                             cm,
0318                             hitsDevice.data(),
0319                             hits_geDevice.data(),
0320                             fast_fit_resultsDevice.data(),
0321                             offset);
0322 
0323         alpaka::exec<Acc1D>(queue,
0324                             workDivQuadsPenta,
0325                             Kernel_CircleFit<4, TrackerTraits>{},
0326                             tupleMultiplicity_,
0327                             5,
0328                             bField_,
0329                             hitsDevice.data(),
0330                             hits_geDevice.data(),
0331                             fast_fit_resultsDevice.data(),
0332                             circle_fit_resultsDevice_,
0333                             offset);
0334 
0335         alpaka::exec<Acc1D>(queue,
0336                             workDivQuadsPenta,
0337                             Kernel_LineFit<4, TrackerTraits>{},
0338                             tupleMultiplicity_,
0339                             5,
0340                             bField_,
0341                             outputSoa_,
0342                             hitsDevice.data(),
0343                             hits_geDevice.data(),
0344                             fast_fit_resultsDevice.data(),
0345                             circle_fit_resultsDevice_,
0346                             offset);
0347       } else {
0348         // penta all 5
0349         alpaka::exec<Acc1D>(queue,
0350                             workDivQuadsPenta,
0351                             Kernel_FastFit<5, TrackerTraits>{},
0352                             tuples_,
0353                             tupleMultiplicity_,
0354                             5,
0355                             hv,
0356                             cm,
0357                             hitsDevice.data(),
0358                             hits_geDevice.data(),
0359                             fast_fit_resultsDevice.data(),
0360                             offset);
0361 
0362         alpaka::exec<Acc1D>(queue,
0363                             workDivQuadsPenta,
0364                             Kernel_CircleFit<5, TrackerTraits>{},
0365                             tupleMultiplicity_,
0366                             5,
0367                             bField_,
0368                             hitsDevice.data(),
0369                             hits_geDevice.data(),
0370                             fast_fit_resultsDevice.data(),
0371                             circle_fit_resultsDevice_,
0372                             offset);
0373 
0374         alpaka::exec<Acc1D>(queue,
0375                             workDivQuadsPenta,
0376                             Kernel_LineFit<5, TrackerTraits>{},
0377                             tupleMultiplicity_,
0378                             5,
0379                             bField_,
0380                             outputSoa_,
0381                             hitsDevice.data(),
0382                             hits_geDevice.data(),
0383                             fast_fit_resultsDevice.data(),
0384                             circle_fit_resultsDevice_,
0385                             offset);
0386       }
0387     }
0388   }
0389 
0390   template class HelixFit<pixelTopology::Phase1>;
0391   template class HelixFit<pixelTopology::Phase2>;
0392   template class HelixFit<pixelTopology::HIonPhase1>;
0393 
0394 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE