Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-23 22:56:30

0001 #include <cstdint>
0002 
0003 #include <alpaka/alpaka.hpp>
0004 
0005 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0006 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0009 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0010 #include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h"
0011 
0012 #include "HelixFit.h"
0013 #include "CAStructures.h"
0014 
0015 template <typename TrackerTraits>
0016 using Tuples = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0017 template <typename TrackerTraits>
0018 using OutputSoAView = reco::TrackSoAView<TrackerTraits>;
0019 template <typename TrackerTraits>
0020 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0021 
0022 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0023   using namespace alpaka;
0024   using namespace cms::alpakatools;
0025 
0026   template <int N, typename TrackerTraits>
0027   class Kernel_FastFit {
0028   public:
0029     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0030     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0031                                   Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0032                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0033                                   uint32_t nHits,
0034                                   TrackingRecHitSoAConstView<TrackerTraits> hh,
0035                                   pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
0036                                   double *__restrict__ phits,
0037                                   float *__restrict__ phits_ge,
0038                                   double *__restrict__ pfast_fit,
0039                                   uint32_t offset) const {
0040       constexpr uint32_t hitsInFit = N;
0041 
0042       ALPAKA_ASSERT_ACC(hitsInFit <= nHits);
0043 
0044       ALPAKA_ASSERT_ACC(pfast_fit);
0045       ALPAKA_ASSERT_ACC(foundNtuplets);
0046       ALPAKA_ASSERT_ACC(tupleMultiplicity);
0047 
0048       // look in bin for this hit multiplicity
0049 
0050 #ifdef RIEMANN_DEBUG
0051       const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0052       if (cms::alpakatools::once_per_grid(acc))
0053         printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
0054 #endif
0055 
0056       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0057       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0058         auto tuple_idx = local_idx + offset;
0059         if (tuple_idx >= tupleMultiplicity->size(nHits))
0060           break;
0061 
0062         // get it from the ntuple container (one to one to helix)
0063         auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0064         ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0065 
0066         ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits);
0067 
0068         riemannFit::Map3xNd<N> hits(phits + local_idx);
0069         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0070         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0071 
0072         // Prepare data structure
0073         auto const *hitId = foundNtuplets->begin(tkid);
0074         for (unsigned int i = 0; i < hitsInFit; ++i) {
0075           auto hit = hitId[i];
0076           float ge[6];
0077           cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0078 
0079           hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0080           hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0081         }
0082         riemannFit::fastFit(acc, hits, fast_fit);
0083 
0084 #ifdef RIEMANN_DEBUG
0085         // any NaN value should cause the track to be rejected at a later stage
0086         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0087         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0088         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0089         ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0090 #endif
0091       }
0092     }
0093   };
0094 
0095   template <int N, typename TrackerTraits>
0096   class Kernel_CircleFit {
0097   public:
0098     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0099     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0100                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0101                                   uint32_t nHits,
0102                                   double bField,
0103                                   double *__restrict__ phits,
0104                                   float *__restrict__ phits_ge,
0105                                   double *__restrict__ pfast_fit_input,
0106                                   riemannFit::CircleFit *circle_fit,
0107                                   uint32_t offset) const {
0108       ALPAKA_ASSERT_ACC(circle_fit);
0109       ALPAKA_ASSERT_ACC(N <= nHits);
0110 
0111       // same as above...
0112 
0113       // look in bin for this hit multiplicity
0114       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0115       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0116         auto tuple_idx = local_idx + offset;
0117         if (tuple_idx >= tupleMultiplicity->size(nHits))
0118           break;
0119 
0120         riemannFit::Map3xNd<N> hits(phits + local_idx);
0121         riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0122         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0123 
0124         riemannFit::VectorNd<N> rad = (hits.block(0, 0, 2, N).colwise().norm());
0125 
0126         riemannFit::Matrix2Nd<N> hits_cov = riemannFit::Matrix2Nd<N>::Zero();
0127         riemannFit::loadCovariance2D(acc, hits_ge, hits_cov);
0128 
0129         circle_fit[local_idx] =
0130             riemannFit::circleFit(acc, hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true);
0131 
0132 #ifdef RIEMANN_DEBUG
0133 //    auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0134 //  printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid,
0135 //         circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2));
0136 #endif
0137       }
0138     }
0139   };
0140 
0141   template <int N, typename TrackerTraits>
0142   class Kernel_LineFit {
0143   public:
0144     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0145     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0146                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0147                                   uint32_t nHits,
0148                                   double bField,
0149                                   OutputSoAView<TrackerTraits> results_view,
0150                                   double *__restrict__ phits,
0151                                   float *__restrict__ phits_ge,
0152                                   double *__restrict__ pfast_fit_input,
0153                                   riemannFit::CircleFit *__restrict__ circle_fit,
0154                                   uint32_t offset) const {
0155       ALPAKA_ASSERT_ACC(circle_fit);
0156       ALPAKA_ASSERT_ACC(N <= nHits);
0157 
0158       // same as above...
0159 
0160       // look in bin for this hit multiplicity
0161       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0162       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0163         auto tuple_idx = local_idx + offset;
0164         if (tuple_idx >= tupleMultiplicity->size(nHits))
0165           break;
0166 
0167         // get it for the ntuple container (one to one to helix)
0168         int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0169 
0170         riemannFit::Map3xNd<N> hits(phits + local_idx);
0171         riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0172         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0173 
0174         auto const &line_fit = riemannFit::lineFit(acc, hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true);
0175 
0176         riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]);
0177 
0178         TracksUtilities<TrackerTraits>::copyFromCircle(results_view,
0179                                                        circle_fit[local_idx].par,
0180                                                        circle_fit[local_idx].cov,
0181                                                        line_fit.par,
0182                                                        line_fit.cov,
0183                                                        1.f / float(bField),
0184                                                        tkid);
0185         results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2));
0186         results_view[tkid].eta() = asinhf(line_fit.par(0));
0187         results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5);
0188 
0189 #ifdef RIEMANN_DEBUG
0190         printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0191                N,
0192                nHits,
0193                tkid,
0194                circle_fit[local_idx].par(0),
0195                circle_fit[local_idx].par(1),
0196                circle_fit[local_idx].par(2));
0197         printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1));
0198         printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0199                circle_fit[local_idx].chi2,
0200                line_fit.chi2,
0201                circle_fit[local_idx].cov(0, 0),
0202                circle_fit[local_idx].cov(1, 1),
0203                circle_fit[local_idx].cov(2, 2),
0204                line_fit.cov(0, 0),
0205                line_fit.cov(1, 1));
0206 #endif
0207       }
0208     }
0209   };
0210 
0211   template <typename TrackerTraits>
0212   void HelixFit<TrackerTraits>::launchRiemannKernels(const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0213                                                      pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *cpeParams,
0214                                                      uint32_t nhits,
0215                                                      uint32_t maxNumberOfTuples,
0216                                                      Queue &queue) {
0217     assert(tuples_);
0218 
0219     auto blockSize = 64;
0220     auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0221     const auto workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0222     const auto workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0223 
0224     //  Fit internals
0225     auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0226         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double));
0227     auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0228         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float));
0229     auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0230         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0231     auto circle_fit_resultsDevice_holder =
0232         cms::alpakatools::make_device_buffer<char[]>(queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit));
0233     riemannFit::CircleFit *circle_fit_resultsDevice_ =
0234         (riemannFit::CircleFit *)(circle_fit_resultsDevice_holder.data());
0235 
0236     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0237       // triplets
0238       alpaka::exec<Acc1D>(queue,
0239                           workDivTriplets,
0240                           Kernel_FastFit<3, TrackerTraits>{},
0241                           tuples_,
0242                           tupleMultiplicity_,
0243                           3,
0244                           hv,
0245                           cpeParams,
0246                           hitsDevice.data(),
0247                           hits_geDevice.data(),
0248                           fast_fit_resultsDevice.data(),
0249                           offset);
0250 
0251       alpaka::exec<Acc1D>(queue,
0252                           workDivTriplets,
0253                           Kernel_CircleFit<3, TrackerTraits>{},
0254                           tupleMultiplicity_,
0255                           3,
0256                           bField_,
0257                           hitsDevice.data(),
0258                           hits_geDevice.data(),
0259                           fast_fit_resultsDevice.data(),
0260                           circle_fit_resultsDevice_,
0261                           offset);
0262 
0263       alpaka::exec<Acc1D>(queue,
0264                           workDivTriplets,
0265                           Kernel_LineFit<3, TrackerTraits>{},
0266                           tupleMultiplicity_,
0267                           3,
0268                           bField_,
0269                           outputSoa_,
0270                           hitsDevice.data(),
0271                           hits_geDevice.data(),
0272                           fast_fit_resultsDevice.data(),
0273                           circle_fit_resultsDevice_,
0274                           offset);
0275 
0276       // quads
0277       alpaka::exec<Acc1D>(queue,
0278                           workDivQuadsPenta,
0279                           Kernel_FastFit<4, TrackerTraits>{},
0280                           tuples_,
0281                           tupleMultiplicity_,
0282                           4,
0283                           hv,
0284                           cpeParams,
0285                           hitsDevice.data(),
0286                           hits_geDevice.data(),
0287                           fast_fit_resultsDevice.data(),
0288                           offset);
0289 
0290       alpaka::exec<Acc1D>(queue,
0291                           workDivQuadsPenta,
0292                           Kernel_CircleFit<4, TrackerTraits>{},
0293                           tupleMultiplicity_,
0294                           4,
0295                           bField_,
0296                           hitsDevice.data(),
0297                           hits_geDevice.data(),
0298                           fast_fit_resultsDevice.data(),
0299                           circle_fit_resultsDevice_,
0300                           offset);
0301 
0302       alpaka::exec<Acc1D>(queue,
0303                           workDivQuadsPenta,
0304                           Kernel_LineFit<4, TrackerTraits>{},
0305                           tupleMultiplicity_,
0306                           4,
0307                           bField_,
0308                           outputSoa_,
0309                           hitsDevice.data(),
0310                           hits_geDevice.data(),
0311                           fast_fit_resultsDevice.data(),
0312                           circle_fit_resultsDevice_,
0313                           offset);
0314 
0315       if (fitNas4_) {
0316         // penta
0317         alpaka::exec<Acc1D>(queue,
0318                             workDivQuadsPenta,
0319                             Kernel_FastFit<4, TrackerTraits>{},
0320                             tuples_,
0321                             tupleMultiplicity_,
0322                             5,
0323                             hv,
0324                             cpeParams,
0325                             hitsDevice.data(),
0326                             hits_geDevice.data(),
0327                             fast_fit_resultsDevice.data(),
0328                             offset);
0329 
0330         alpaka::exec<Acc1D>(queue,
0331                             workDivQuadsPenta,
0332                             Kernel_CircleFit<4, TrackerTraits>{},
0333                             tupleMultiplicity_,
0334                             5,
0335                             bField_,
0336                             hitsDevice.data(),
0337                             hits_geDevice.data(),
0338                             fast_fit_resultsDevice.data(),
0339                             circle_fit_resultsDevice_,
0340                             offset);
0341 
0342         alpaka::exec<Acc1D>(queue,
0343                             workDivQuadsPenta,
0344                             Kernel_LineFit<4, TrackerTraits>{},
0345                             tupleMultiplicity_,
0346                             5,
0347                             bField_,
0348                             outputSoa_,
0349                             hitsDevice.data(),
0350                             hits_geDevice.data(),
0351                             fast_fit_resultsDevice.data(),
0352                             circle_fit_resultsDevice_,
0353                             offset);
0354       } else {
0355         // penta all 5
0356         alpaka::exec<Acc1D>(queue,
0357                             workDivQuadsPenta,
0358                             Kernel_FastFit<5, TrackerTraits>{},
0359                             tuples_,
0360                             tupleMultiplicity_,
0361                             5,
0362                             hv,
0363                             cpeParams,
0364                             hitsDevice.data(),
0365                             hits_geDevice.data(),
0366                             fast_fit_resultsDevice.data(),
0367                             offset);
0368 
0369         alpaka::exec<Acc1D>(queue,
0370                             workDivQuadsPenta,
0371                             Kernel_CircleFit<5, TrackerTraits>{},
0372                             tupleMultiplicity_,
0373                             5,
0374                             bField_,
0375                             hitsDevice.data(),
0376                             hits_geDevice.data(),
0377                             fast_fit_resultsDevice.data(),
0378                             circle_fit_resultsDevice_,
0379                             offset);
0380 
0381         alpaka::exec<Acc1D>(queue,
0382                             workDivQuadsPenta,
0383                             Kernel_LineFit<5, TrackerTraits>{},
0384                             tupleMultiplicity_,
0385                             5,
0386                             bField_,
0387                             outputSoa_,
0388                             hitsDevice.data(),
0389                             hits_geDevice.data(),
0390                             fast_fit_resultsDevice.data(),
0391                             circle_fit_resultsDevice_,
0392                             offset);
0393       }
0394     }
0395   }
0396 
0397   template class HelixFit<pixelTopology::Phase1>;
0398   template class HelixFit<pixelTopology::Phase2>;
0399   template class HelixFit<pixelTopology::HIonPhase1>;
0400 
0401 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE