File indexing completed on 2025-07-03 00:42:42
0001 #include <cstdint>
0002
0003 #include <alpaka/alpaka.hpp>
0004
0005 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0006 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0009 #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h"
0010 #include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h"
0011
0012 #include "HelixFit.h"
0013 #include "CAStructures.h"
0014
0015 using OutputSoAView = reco::TrackSoAView;
0016 using TupleMultiplicity = caStructures::GenericContainer;
0017 using Tuples = caStructures::SequentialContainer;
0018
0019 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0020 using namespace alpaka;
0021 using namespace cms::alpakatools;
0022
0023 template <int N, typename TrackerTraits>
0024 class Kernel_FastFit {
0025 public:
0026 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0027 Tuples const *__restrict__ foundNtuplets,
0028 TupleMultiplicity const *__restrict__ tupleMultiplicity,
0029 uint32_t nHits,
0030 ::reco::TrackingRecHitConstView hh,
0031 ::reco::CAModulesConstView cm,
0032 double *__restrict__ phits,
0033 float *__restrict__ phits_ge,
0034 double *__restrict__ pfast_fit,
0035 uint32_t offset) const {
0036 constexpr uint32_t hitsInFit = N;
0037
0038 ALPAKA_ASSERT_ACC(hitsInFit <= nHits);
0039
0040 ALPAKA_ASSERT_ACC(pfast_fit);
0041 ALPAKA_ASSERT_ACC(foundNtuplets);
0042 ALPAKA_ASSERT_ACC(tupleMultiplicity);
0043
0044
0045
0046 #ifdef RIEMANN_DEBUG
0047 const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0048 if (cms::alpakatools::once_per_grid(acc))
0049 printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
0050 #endif
0051
0052 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0053 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0054 auto tuple_idx = local_idx + offset;
0055 if (tuple_idx >= tupleMultiplicity->size(nHits))
0056 break;
0057
0058
0059 auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0060 ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0061
0062 ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits);
0063
0064 riemannFit::Map3xNd<N> hits(phits + local_idx);
0065 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0066 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0067
0068
0069 auto const *hitId = foundNtuplets->begin(tkid);
0070 for (unsigned int i = 0; i < hitsInFit; ++i) {
0071 auto hit = hitId[i];
0072 float ge[6];
0073 cm.detFrame(hh.detectorIndex(hit)).toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0074 hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0075 hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0076 }
0077 riemannFit::fastFit(acc, hits, fast_fit);
0078
0079 #ifdef RIEMANN_DEBUG
0080
0081 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0082 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0083 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0084 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0085 #endif
0086 }
0087 }
0088 };
0089
0090 template <int N, typename TrackerTraits>
0091 class Kernel_CircleFit {
0092 public:
0093 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0094 TupleMultiplicity const *__restrict__ tupleMultiplicity,
0095 uint32_t nHits,
0096 double bField,
0097 double *__restrict__ phits,
0098 float *__restrict__ phits_ge,
0099 double *__restrict__ pfast_fit_input,
0100 riemannFit::CircleFit *circle_fit,
0101 uint32_t offset) const {
0102 ALPAKA_ASSERT_ACC(circle_fit);
0103 ALPAKA_ASSERT_ACC(N <= nHits);
0104
0105
0106
0107
0108 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0109 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0110 auto tuple_idx = local_idx + offset;
0111 if (tuple_idx >= tupleMultiplicity->size(nHits))
0112 break;
0113
0114 riemannFit::Map3xNd<N> hits(phits + local_idx);
0115 riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0116 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0117
0118 riemannFit::VectorNd<N> rad = (hits.block(0, 0, 2, N).colwise().norm());
0119
0120 riemannFit::Matrix2Nd<N> hits_cov = riemannFit::Matrix2Nd<N>::Zero();
0121 riemannFit::loadCovariance2D(acc, hits_ge, hits_cov);
0122
0123 circle_fit[local_idx] =
0124 riemannFit::circleFit(acc, hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true);
0125
0126 #ifdef RIEMANN_DEBUG
0127
0128
0129
0130 #endif
0131 }
0132 }
0133 };
0134
0135 template <int N, typename TrackerTraits>
0136 class Kernel_LineFit {
0137 public:
0138 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0139 TupleMultiplicity const *__restrict__ tupleMultiplicity,
0140 uint32_t nHits,
0141 double bField,
0142 OutputSoAView results_view,
0143 double *__restrict__ phits,
0144 float *__restrict__ phits_ge,
0145 double *__restrict__ pfast_fit_input,
0146 riemannFit::CircleFit *__restrict__ circle_fit,
0147 uint32_t offset) const {
0148 ALPAKA_ASSERT_ACC(circle_fit);
0149 ALPAKA_ASSERT_ACC(N <= nHits);
0150
0151
0152
0153
0154 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0155 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0156 auto tuple_idx = local_idx + offset;
0157 if (tuple_idx >= tupleMultiplicity->size(nHits))
0158 break;
0159
0160
0161 int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0162
0163 riemannFit::Map3xNd<N> hits(phits + local_idx);
0164 riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0165 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0166
0167 auto const &line_fit = riemannFit::lineFit(acc, hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true);
0168
0169 riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]);
0170
0171 reco::copyFromCircle(results_view,
0172 circle_fit[local_idx].par,
0173 circle_fit[local_idx].cov,
0174 line_fit.par,
0175 line_fit.cov,
0176 1.f / float(bField),
0177 tkid);
0178 results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2));
0179 results_view[tkid].eta() = asinhf(line_fit.par(0));
0180 results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5);
0181
0182 #ifdef RIEMANN_DEBUG
0183 printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0184 N,
0185 nHits,
0186 tkid,
0187 circle_fit[local_idx].par(0),
0188 circle_fit[local_idx].par(1),
0189 circle_fit[local_idx].par(2));
0190 printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1));
0191 printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0192 circle_fit[local_idx].chi2,
0193 line_fit.chi2,
0194 circle_fit[local_idx].cov(0, 0),
0195 circle_fit[local_idx].cov(1, 1),
0196 circle_fit[local_idx].cov(2, 2),
0197 line_fit.cov(0, 0),
0198 line_fit.cov(1, 1));
0199 #endif
0200 }
0201 }
0202 };
0203
0204 template <typename TrackerTraits>
0205 void HelixFit<TrackerTraits>::launchRiemannKernels(const ::reco::TrackingRecHitConstView &hv,
0206 const ::reco::CAModulesConstView &cm,
0207 uint32_t nhits,
0208 uint32_t maxNumberOfTuples,
0209 Queue &queue) {
0210 assert(tuples_);
0211
0212 auto blockSize = 64;
0213 auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0214 const auto workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0215 const auto workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0216
0217
0218 auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0219 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double));
0220 auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0221 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float));
0222 auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0223 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0224 auto circle_fit_resultsDevice_holder =
0225 cms::alpakatools::make_device_buffer<char[]>(queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit));
0226 riemannFit::CircleFit *circle_fit_resultsDevice_ =
0227 (riemannFit::CircleFit *)(circle_fit_resultsDevice_holder.data());
0228
0229 for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0230
0231 alpaka::exec<Acc1D>(queue,
0232 workDivTriplets,
0233 Kernel_FastFit<3, TrackerTraits>{},
0234 tuples_,
0235 tupleMultiplicity_,
0236 3,
0237 hv,
0238 cm,
0239 hitsDevice.data(),
0240 hits_geDevice.data(),
0241 fast_fit_resultsDevice.data(),
0242 offset);
0243
0244 alpaka::exec<Acc1D>(queue,
0245 workDivTriplets,
0246 Kernel_CircleFit<3, TrackerTraits>{},
0247 tupleMultiplicity_,
0248 3,
0249 bField_,
0250 hitsDevice.data(),
0251 hits_geDevice.data(),
0252 fast_fit_resultsDevice.data(),
0253 circle_fit_resultsDevice_,
0254 offset);
0255
0256 alpaka::exec<Acc1D>(queue,
0257 workDivTriplets,
0258 Kernel_LineFit<3, TrackerTraits>{},
0259 tupleMultiplicity_,
0260 3,
0261 bField_,
0262 outputSoa_,
0263 hitsDevice.data(),
0264 hits_geDevice.data(),
0265 fast_fit_resultsDevice.data(),
0266 circle_fit_resultsDevice_,
0267 offset);
0268
0269
0270 alpaka::exec<Acc1D>(queue,
0271 workDivQuadsPenta,
0272 Kernel_FastFit<4, TrackerTraits>{},
0273 tuples_,
0274 tupleMultiplicity_,
0275 4,
0276 hv,
0277 cm,
0278 hitsDevice.data(),
0279 hits_geDevice.data(),
0280 fast_fit_resultsDevice.data(),
0281 offset);
0282
0283 alpaka::exec<Acc1D>(queue,
0284 workDivQuadsPenta,
0285 Kernel_CircleFit<4, TrackerTraits>{},
0286 tupleMultiplicity_,
0287 4,
0288 bField_,
0289 hitsDevice.data(),
0290 hits_geDevice.data(),
0291 fast_fit_resultsDevice.data(),
0292 circle_fit_resultsDevice_,
0293 offset);
0294
0295 alpaka::exec<Acc1D>(queue,
0296 workDivQuadsPenta,
0297 Kernel_LineFit<4, TrackerTraits>{},
0298 tupleMultiplicity_,
0299 4,
0300 bField_,
0301 outputSoa_,
0302 hitsDevice.data(),
0303 hits_geDevice.data(),
0304 fast_fit_resultsDevice.data(),
0305 circle_fit_resultsDevice_,
0306 offset);
0307
0308 if (fitNas4_) {
0309
0310 alpaka::exec<Acc1D>(queue,
0311 workDivQuadsPenta,
0312 Kernel_FastFit<4, TrackerTraits>{},
0313 tuples_,
0314 tupleMultiplicity_,
0315 5,
0316 hv,
0317 cm,
0318 hitsDevice.data(),
0319 hits_geDevice.data(),
0320 fast_fit_resultsDevice.data(),
0321 offset);
0322
0323 alpaka::exec<Acc1D>(queue,
0324 workDivQuadsPenta,
0325 Kernel_CircleFit<4, TrackerTraits>{},
0326 tupleMultiplicity_,
0327 5,
0328 bField_,
0329 hitsDevice.data(),
0330 hits_geDevice.data(),
0331 fast_fit_resultsDevice.data(),
0332 circle_fit_resultsDevice_,
0333 offset);
0334
0335 alpaka::exec<Acc1D>(queue,
0336 workDivQuadsPenta,
0337 Kernel_LineFit<4, TrackerTraits>{},
0338 tupleMultiplicity_,
0339 5,
0340 bField_,
0341 outputSoa_,
0342 hitsDevice.data(),
0343 hits_geDevice.data(),
0344 fast_fit_resultsDevice.data(),
0345 circle_fit_resultsDevice_,
0346 offset);
0347 } else {
0348
0349 alpaka::exec<Acc1D>(queue,
0350 workDivQuadsPenta,
0351 Kernel_FastFit<5, TrackerTraits>{},
0352 tuples_,
0353 tupleMultiplicity_,
0354 5,
0355 hv,
0356 cm,
0357 hitsDevice.data(),
0358 hits_geDevice.data(),
0359 fast_fit_resultsDevice.data(),
0360 offset);
0361
0362 alpaka::exec<Acc1D>(queue,
0363 workDivQuadsPenta,
0364 Kernel_CircleFit<5, TrackerTraits>{},
0365 tupleMultiplicity_,
0366 5,
0367 bField_,
0368 hitsDevice.data(),
0369 hits_geDevice.data(),
0370 fast_fit_resultsDevice.data(),
0371 circle_fit_resultsDevice_,
0372 offset);
0373
0374 alpaka::exec<Acc1D>(queue,
0375 workDivQuadsPenta,
0376 Kernel_LineFit<5, TrackerTraits>{},
0377 tupleMultiplicity_,
0378 5,
0379 bField_,
0380 outputSoa_,
0381 hitsDevice.data(),
0382 hits_geDevice.data(),
0383 fast_fit_resultsDevice.data(),
0384 circle_fit_resultsDevice_,
0385 offset);
0386 }
0387 }
0388 }
0389
0390 template class HelixFit<pixelTopology::Phase1>;
0391 template class HelixFit<pixelTopology::Phase2>;
0392 template class HelixFit<pixelTopology::HIonPhase1>;
0393
0394 }