Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:32

0001 #include <cstdint>
0002 
0003 #include <alpaka/alpaka.hpp>
0004 
0005 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0006 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
0010 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0011 #include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h"
0012 
0013 #include "HelixFit.h"
0014 #include "CAStructures.h"
0015 
0016 template <typename TrackerTraits>
0017 using Tuples = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0018 template <typename TrackerTraits>
0019 using OutputSoAView = reco::TrackSoAView<TrackerTraits>;
0020 template <typename TrackerTraits>
0021 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0022 
0023 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0024   using namespace alpaka;
0025   using namespace cms::alpakatools;
0026 
0027   template <int N, typename TrackerTraits>
0028   class Kernel_FastFit {
0029   public:
0030     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0031     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0032                                   Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0033                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0034                                   uint32_t nHits,
0035                                   TrackingRecHitSoAConstView<TrackerTraits> hh,
0036                                   pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
0037                                   double *__restrict__ phits,
0038                                   float *__restrict__ phits_ge,
0039                                   double *__restrict__ pfast_fit,
0040                                   uint32_t offset) const {
0041       constexpr uint32_t hitsInFit = N;
0042 
0043       ALPAKA_ASSERT_ACC(hitsInFit <= nHits);
0044 
0045       ALPAKA_ASSERT_ACC(pfast_fit);
0046       ALPAKA_ASSERT_ACC(foundNtuplets);
0047       ALPAKA_ASSERT_ACC(tupleMultiplicity);
0048 
0049       // look in bin for this hit multiplicity
0050 
0051 #ifdef RIEMANN_DEBUG
0052       const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0053       if (cms::alpakatools::once_per_grid(acc))
0054         printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
0055 #endif
0056 
0057       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0058       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0059         auto tuple_idx = local_idx + offset;
0060         if (tuple_idx >= tupleMultiplicity->size(nHits))
0061           break;
0062 
0063         // get it from the ntuple container (one to one to helix)
0064         auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0065         ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0066 
0067         ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits);
0068 
0069         riemannFit::Map3xNd<N> hits(phits + local_idx);
0070         riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0071         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0072 
0073         // Prepare data structure
0074         auto const *hitId = foundNtuplets->begin(tkid);
0075         for (unsigned int i = 0; i < hitsInFit; ++i) {
0076           auto hit = hitId[i];
0077           float ge[6];
0078           cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0079 
0080           hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0081           hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0082         }
0083         riemannFit::fastFit(acc, hits, fast_fit);
0084 
0085         // no NaN here....
0086         ALPAKA_ASSERT_ACC(fast_fit(0) == fast_fit(0));
0087         ALPAKA_ASSERT_ACC(fast_fit(1) == fast_fit(1));
0088         ALPAKA_ASSERT_ACC(fast_fit(2) == fast_fit(2));
0089         ALPAKA_ASSERT_ACC(fast_fit(3) == fast_fit(3));
0090       }
0091     }
0092   };
0093 
0094   template <int N, typename TrackerTraits>
0095   class Kernel_CircleFit {
0096   public:
0097     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0098     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0099                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0100                                   uint32_t nHits,
0101                                   double bField,
0102                                   double *__restrict__ phits,
0103                                   float *__restrict__ phits_ge,
0104                                   double *__restrict__ pfast_fit_input,
0105                                   riemannFit::CircleFit *circle_fit,
0106                                   uint32_t offset) const {
0107       ALPAKA_ASSERT_ACC(circle_fit);
0108       ALPAKA_ASSERT_ACC(N <= nHits);
0109 
0110       // same as above...
0111 
0112       // look in bin for this hit multiplicity
0113       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0114       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0115         auto tuple_idx = local_idx + offset;
0116         if (tuple_idx >= tupleMultiplicity->size(nHits))
0117           break;
0118 
0119         riemannFit::Map3xNd<N> hits(phits + local_idx);
0120         riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0121         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0122 
0123         riemannFit::VectorNd<N> rad = (hits.block(0, 0, 2, N).colwise().norm());
0124 
0125         riemannFit::Matrix2Nd<N> hits_cov = riemannFit::Matrix2Nd<N>::Zero();
0126         riemannFit::loadCovariance2D(acc, hits_ge, hits_cov);
0127 
0128         circle_fit[local_idx] =
0129             riemannFit::circleFit(acc, hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true);
0130 
0131 #ifdef RIEMANN_DEBUG
0132 //    auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0133 //  printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid,
0134 //         circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2));
0135 #endif
0136       }
0137     }
0138   };
0139 
0140   template <int N, typename TrackerTraits>
0141   class Kernel_LineFit {
0142   public:
0143     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0144     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0145                                   TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0146                                   uint32_t nHits,
0147                                   double bField,
0148                                   OutputSoAView<TrackerTraits> results_view,
0149                                   double *__restrict__ phits,
0150                                   float *__restrict__ phits_ge,
0151                                   double *__restrict__ pfast_fit_input,
0152                                   riemannFit::CircleFit *__restrict__ circle_fit,
0153                                   uint32_t offset) const {
0154       ALPAKA_ASSERT_ACC(circle_fit);
0155       ALPAKA_ASSERT_ACC(N <= nHits);
0156 
0157       // same as above...
0158 
0159       // look in bin for this hit multiplicity
0160       const auto nt = riemannFit::maxNumberOfConcurrentFits;
0161       for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0162         auto tuple_idx = local_idx + offset;
0163         if (tuple_idx >= tupleMultiplicity->size(nHits))
0164           break;
0165 
0166         // get it for the ntuple container (one to one to helix)
0167         int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0168 
0169         riemannFit::Map3xNd<N> hits(phits + local_idx);
0170         riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0171         riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0172 
0173         auto const &line_fit = riemannFit::lineFit(acc, hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true);
0174 
0175         riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]);
0176 
0177         TracksUtilities<TrackerTraits>::copyFromCircle(results_view,
0178                                                        circle_fit[local_idx].par,
0179                                                        circle_fit[local_idx].cov,
0180                                                        line_fit.par,
0181                                                        line_fit.cov,
0182                                                        1.f / float(bField),
0183                                                        tkid);
0184         results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2));
0185         results_view[tkid].eta() = asinhf(line_fit.par(0));
0186         results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5);
0187 
0188 #ifdef RIEMANN_DEBUG
0189         printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0190                N,
0191                nHits,
0192                tkid,
0193                circle_fit[local_idx].par(0),
0194                circle_fit[local_idx].par(1),
0195                circle_fit[local_idx].par(2));
0196         printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1));
0197         printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0198                circle_fit[local_idx].chi2,
0199                line_fit.chi2,
0200                circle_fit[local_idx].cov(0, 0),
0201                circle_fit[local_idx].cov(1, 1),
0202                circle_fit[local_idx].cov(2, 2),
0203                line_fit.cov(0, 0),
0204                line_fit.cov(1, 1));
0205 #endif
0206       }
0207     }
0208   };
0209 
0210   template <typename TrackerTraits>
0211   void HelixFit<TrackerTraits>::launchRiemannKernels(const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0212                                                      pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *cpeParams,
0213                                                      uint32_t nhits,
0214                                                      uint32_t maxNumberOfTuples,
0215                                                      Queue &queue) {
0216     assert(tuples_);
0217 
0218     auto blockSize = 64;
0219     auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0220     const auto workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0221     const auto workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0222 
0223     //  Fit internals
0224     auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0225         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double));
0226     auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0227         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float));
0228     auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0229         queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0230     auto circle_fit_resultsDevice_holder =
0231         cms::alpakatools::make_device_buffer<char[]>(queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit));
0232     riemannFit::CircleFit *circle_fit_resultsDevice_ =
0233         (riemannFit::CircleFit *)(circle_fit_resultsDevice_holder.data());
0234 
0235     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0236       // triplets
0237       alpaka::exec<Acc1D>(queue,
0238                           workDivTriplets,
0239                           Kernel_FastFit<3, TrackerTraits>{},
0240                           tuples_,
0241                           tupleMultiplicity_,
0242                           3,
0243                           hv,
0244                           cpeParams,
0245                           hitsDevice.data(),
0246                           hits_geDevice.data(),
0247                           fast_fit_resultsDevice.data(),
0248                           offset);
0249 
0250       alpaka::exec<Acc1D>(queue,
0251                           workDivTriplets,
0252                           Kernel_CircleFit<3, TrackerTraits>{},
0253                           tupleMultiplicity_,
0254                           3,
0255                           bField_,
0256                           hitsDevice.data(),
0257                           hits_geDevice.data(),
0258                           fast_fit_resultsDevice.data(),
0259                           circle_fit_resultsDevice_,
0260                           offset);
0261 
0262       alpaka::exec<Acc1D>(queue,
0263                           workDivTriplets,
0264                           Kernel_LineFit<3, TrackerTraits>{},
0265                           tupleMultiplicity_,
0266                           3,
0267                           bField_,
0268                           outputSoa_,
0269                           hitsDevice.data(),
0270                           hits_geDevice.data(),
0271                           fast_fit_resultsDevice.data(),
0272                           circle_fit_resultsDevice_,
0273                           offset);
0274 
0275       // quads
0276       alpaka::exec<Acc1D>(queue,
0277                           workDivQuadsPenta,
0278                           Kernel_FastFit<4, TrackerTraits>{},
0279                           tuples_,
0280                           tupleMultiplicity_,
0281                           4,
0282                           hv,
0283                           cpeParams,
0284                           hitsDevice.data(),
0285                           hits_geDevice.data(),
0286                           fast_fit_resultsDevice.data(),
0287                           offset);
0288 
0289       alpaka::exec<Acc1D>(queue,
0290                           workDivQuadsPenta,
0291                           Kernel_CircleFit<4, TrackerTraits>{},
0292                           tupleMultiplicity_,
0293                           4,
0294                           bField_,
0295                           hitsDevice.data(),
0296                           hits_geDevice.data(),
0297                           fast_fit_resultsDevice.data(),
0298                           circle_fit_resultsDevice_,
0299                           offset);
0300 
0301       alpaka::exec<Acc1D>(queue,
0302                           workDivQuadsPenta,
0303                           Kernel_LineFit<4, TrackerTraits>{},
0304                           tupleMultiplicity_,
0305                           4,
0306                           bField_,
0307                           outputSoa_,
0308                           hitsDevice.data(),
0309                           hits_geDevice.data(),
0310                           fast_fit_resultsDevice.data(),
0311                           circle_fit_resultsDevice_,
0312                           offset);
0313 
0314       if (fitNas4_) {
0315         // penta
0316         alpaka::exec<Acc1D>(queue,
0317                             workDivQuadsPenta,
0318                             Kernel_FastFit<4, TrackerTraits>{},
0319                             tuples_,
0320                             tupleMultiplicity_,
0321                             5,
0322                             hv,
0323                             cpeParams,
0324                             hitsDevice.data(),
0325                             hits_geDevice.data(),
0326                             fast_fit_resultsDevice.data(),
0327                             offset);
0328 
0329         alpaka::exec<Acc1D>(queue,
0330                             workDivQuadsPenta,
0331                             Kernel_CircleFit<4, TrackerTraits>{},
0332                             tupleMultiplicity_,
0333                             5,
0334                             bField_,
0335                             hitsDevice.data(),
0336                             hits_geDevice.data(),
0337                             fast_fit_resultsDevice.data(),
0338                             circle_fit_resultsDevice_,
0339                             offset);
0340 
0341         alpaka::exec<Acc1D>(queue,
0342                             workDivQuadsPenta,
0343                             Kernel_LineFit<4, TrackerTraits>{},
0344                             tupleMultiplicity_,
0345                             5,
0346                             bField_,
0347                             outputSoa_,
0348                             hitsDevice.data(),
0349                             hits_geDevice.data(),
0350                             fast_fit_resultsDevice.data(),
0351                             circle_fit_resultsDevice_,
0352                             offset);
0353       } else {
0354         // penta all 5
0355         alpaka::exec<Acc1D>(queue,
0356                             workDivQuadsPenta,
0357                             Kernel_FastFit<5, TrackerTraits>{},
0358                             tuples_,
0359                             tupleMultiplicity_,
0360                             5,
0361                             hv,
0362                             cpeParams,
0363                             hitsDevice.data(),
0364                             hits_geDevice.data(),
0365                             fast_fit_resultsDevice.data(),
0366                             offset);
0367 
0368         alpaka::exec<Acc1D>(queue,
0369                             workDivQuadsPenta,
0370                             Kernel_CircleFit<5, TrackerTraits>{},
0371                             tupleMultiplicity_,
0372                             5,
0373                             bField_,
0374                             hitsDevice.data(),
0375                             hits_geDevice.data(),
0376                             fast_fit_resultsDevice.data(),
0377                             circle_fit_resultsDevice_,
0378                             offset);
0379 
0380         alpaka::exec<Acc1D>(queue,
0381                             workDivQuadsPenta,
0382                             Kernel_LineFit<5, TrackerTraits>{},
0383                             tupleMultiplicity_,
0384                             5,
0385                             bField_,
0386                             outputSoa_,
0387                             hitsDevice.data(),
0388                             hits_geDevice.data(),
0389                             fast_fit_resultsDevice.data(),
0390                             circle_fit_resultsDevice_,
0391                             offset);
0392       }
0393     }
0394   }
0395 
0396   template class HelixFit<pixelTopology::Phase1>;
0397   template class HelixFit<pixelTopology::Phase2>;
0398   template class HelixFit<pixelTopology::HIonPhase1>;
0399 
0400 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE