File indexing completed on 2025-07-03 00:42:40
0001
0002
0003
0004 #include <cstdint>
0005
0006 #include <alpaka/alpaka.hpp>
0007
0008 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0011 #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h"
0012 #include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h"
0013
0014 #include "HelixFit.h"
0015
0016 using OutputSoAView = reco::TrackSoAView;
0017 using TupleMultiplicity = caStructures::GenericContainer;
0018 using Tuples = caStructures::SequentialContainer;
0019
0020 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0021
0022 template <int N>
0023 class Kernel_BLFastFit {
0024 public:
0025 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0026 Tuples const *__restrict__ foundNtuplets,
0027 TupleMultiplicity const *__restrict__ tupleMultiplicity,
0028 ::reco::TrackingRecHitConstView hh,
0029 ::reco::CAModulesConstView cm,
0030 typename caStructures::tindex_type *__restrict__ ptkids,
0031 double *__restrict__ phits,
0032 float *__restrict__ phits_ge,
0033 double *__restrict__ pfast_fit,
0034 uint32_t nHitsL,
0035 uint32_t nHitsH,
0036 int32_t offset) const {
0037 constexpr uint32_t hitsInFit = N;
0038 constexpr auto invalidTkId = std::numeric_limits<typename caStructures::tindex_type>::max();
0039
0040 ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL);
0041 ALPAKA_ASSERT_ACC(nHitsL <= nHitsH);
0042 ALPAKA_ASSERT_ACC(phits);
0043 ALPAKA_ASSERT_ACC(pfast_fit);
0044 ALPAKA_ASSERT_ACC(foundNtuplets);
0045 ALPAKA_ASSERT_ACC(tupleMultiplicity);
0046
0047
0048 int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0049 ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size()));
0050 ALPAKA_ASSERT_ACC(totTK >= 0);
0051
0052 #ifdef BROKENLINE_DEBUG
0053 if (cms::alpakatools::once_per_grid(acc)) {
0054 printf("%d total Ntuple\n", tupleMultiplicity->size());
0055 printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0056 }
0057 #endif
0058 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0059 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0060 auto tuple_idx = local_idx + offset;
0061 if ((int)tuple_idx >= totTK) {
0062 ptkids[local_idx] = invalidTkId;
0063 break;
0064 }
0065
0066 auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0067 ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0068
0069 ptkids[local_idx] = tkid;
0070
0071 auto nHits = foundNtuplets->size(tkid);
0072
0073 ALPAKA_ASSERT_ACC(nHits >= nHitsL);
0074 ALPAKA_ASSERT_ACC(nHits <= nHitsH);
0075
0076 riemannFit::Map3xNd<N> hits(phits + local_idx);
0077 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0078 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0079
0080 #ifdef BL_DUMP_HITS
0081 auto &&done = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0082 done = 0;
0083 alpaka::syncBlockThreads(acc);
0084 bool dump =
0085 (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
0086 #endif
0087
0088
0089 auto const *hitId = foundNtuplets->begin(tkid);
0090
0091
0092 #ifdef YERR_FROM_DC
0093
0094 auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
0095 auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
0096 auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
0097 float ux, uy, uz;
0098 #endif
0099
0100 float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0101 float n = 0;
0102 for (uint32_t i = 0; i < hitsInFit; ++i) {
0103 int j = int(n + 0.5f);
0104 if (hitsInFit - 1 == i)
0105 j = nHits - 1;
0106 ALPAKA_ASSERT_ACC(j < int(nHits));
0107 n += incr;
0108 auto hit = hitId[j];
0109 float ge[6];
0110
0111 #ifdef YERR_FROM_DC
0112 auto const &dp = cm->detParams(hh.detectorIndex(hit));
0113 auto status = hh[hit].chargeAndStatus().status;
0114 int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0115 ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5);
0116 bool nok = (status.isBigY | status.isOneY);
0117
0118 dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0119 auto cb = std::abs(uy / uz);
0120 int bin =
0121 int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0122 int low_value = 0;
0123 int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0124
0125 bin = std::clamp(bin, low_value, high_value);
0126 float yerr = dp.sigmay[bin] * 1.e-4f;
0127 yerr *= dp.yfact[qbin];
0128 yerr *= yerr;
0129 yerr += dp.apeYY;
0130 yerr = nok ? hh[hit].yerrLocal() : yerr;
0131 dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
0132 #else
0133 auto const &frame = cm.detFrame(hh.detectorIndex(hit));
0134 frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0135 #endif
0136
0137 #ifdef BL_DUMP_HITS
0138 bool dump = foundNtuplets->size(tkid) == 5;
0139 if (dump) {
0140 printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0141 local_idx,
0142 tkid,
0143 hit,
0144 hh[hit].detectorIndex(),
0145 i,
0146 hh[hit].xGlobal(),
0147 hh[hit].yGlobal(),
0148 hh[hit].zGlobal());
0149 printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0150 }
0151 #endif
0152
0153 hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0154 hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0155 }
0156 brokenline::fastFit(acc, hits, fast_fit);
0157
0158 #ifdef BROKENLINE_DEBUG
0159
0160 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0161 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0162 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0163 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0164 #endif
0165 }
0166 }
0167 };
0168
0169 template <int N, typename TrackerTraits>
0170 struct Kernel_BLFit {
0171 public:
0172 ALPAKA_FN_ACC void operator()(Acc1D const &acc,
0173 TupleMultiplicity const *__restrict__ tupleMultiplicity,
0174 double bField,
0175 OutputSoAView results_view,
0176 typename caStructures::tindex_type const *__restrict__ ptkids,
0177 double *__restrict__ phits,
0178 float *__restrict__ phits_ge,
0179 double *__restrict__ pfast_fit) const {
0180
0181 debug::do_not_optimise(results_view);
0182
0183 ALPAKA_ASSERT_ACC(results_view.pt());
0184 ALPAKA_ASSERT_ACC(results_view.eta());
0185 ALPAKA_ASSERT_ACC(results_view.chi2());
0186 ALPAKA_ASSERT_ACC(pfast_fit);
0187
0188 constexpr auto invalidTkId = std::numeric_limits<typename caStructures::tindex_type>::max();
0189
0190
0191
0192 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0193 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0194 if (invalidTkId == ptkids[local_idx])
0195 break;
0196 auto tkid = ptkids[local_idx];
0197
0198 ALPAKA_ASSERT_ACC(int(tkid) < tupleMultiplicity->capacity());
0199
0200 riemannFit::Map3xNd<N> hits(phits + local_idx);
0201 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0202 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0203
0204 brokenline::PreparedBrokenLineData<N> data;
0205
0206 brokenline::karimaki_circle_fit circle;
0207 riemannFit::LineFit line;
0208
0209 brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data);
0210 brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
0211 brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
0212
0213 reco::copyFromCircle(results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0214 results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
0215 results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
0216 results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
0217
0218 #ifdef BROKENLINE_DEBUG
0219 if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0220 printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0221 printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0222 N,
0223 N,
0224 tkid,
0225 circle.par(0),
0226 circle.par(1),
0227 circle.par(2));
0228 printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0229 printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0230 circle.chi2,
0231 line.chi2,
0232 circle.cov(0, 0),
0233 circle.cov(1, 1),
0234 circle.cov(2, 2),
0235 line.cov(0, 0),
0236 line.cov(1, 1));
0237 #endif
0238 }
0239 }
0240 };
0241
0242 template <typename TrackerTraits>
0243 void HelixFit<TrackerTraits>::launchBrokenLineKernels(const ::reco::TrackingRecHitConstView &hv,
0244 const ::reco::CAModulesConstView &cm,
0245 uint32_t hitsInFit,
0246 uint32_t maxNumberOfTuples,
0247 Queue &queue) {
0248 ALPAKA_ASSERT_ACC(tuples_);
0249
0250 #ifdef GPU_DEBUG
0251 alpaka::wait(queue);
0252 std::cout << "Starting HelixFit<TrackerTraits>::launchBrokenLineKernels" << std::endl;
0253 #endif
0254
0255 uint32_t blockSize = 64;
0256 uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
0257 const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0258 const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0259
0260
0261 auto tkidDevice =
0262 cms::alpakatools::make_device_buffer<typename caStructures::tindex_type[]>(queue, maxNumberOfConcurrentFits_);
0263 auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0264 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
0265 auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0266 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
0267 auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0268 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0269
0270 for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0271
0272
0273 alpaka::exec<Acc1D>(queue,
0274 workDivTriplets,
0275 Kernel_BLFastFit<3>{},
0276 tuples_,
0277 tupleMultiplicity_,
0278 hv,
0279 cm,
0280 tkidDevice.data(),
0281 hitsDevice.data(),
0282 hits_geDevice.data(),
0283 fast_fit_resultsDevice.data(),
0284 3,
0285 3,
0286 offset);
0287 #ifdef GPU_DEBUG
0288 alpaka::wait(queue);
0289 std::cout << "Kernel_BLFastFit(3) -> done! " << std::endl;
0290 #endif
0291 alpaka::exec<Acc1D>(queue,
0292 workDivTriplets,
0293 Kernel_BLFit<3, TrackerTraits>{},
0294 tupleMultiplicity_,
0295 bField_,
0296 outputSoa_,
0297 tkidDevice.data(),
0298 hitsDevice.data(),
0299 hits_geDevice.data(),
0300 fast_fit_resultsDevice.data());
0301 #ifdef GPU_DEBUG
0302 alpaka::wait(queue);
0303 std::cout << "Kernel_BLFit(3) -> done! " << std::endl;
0304 #endif
0305
0306 if (fitNas4_) {
0307
0308 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
0309 &hv,
0310 &cm,
0311 &tkidDevice,
0312 &hitsDevice,
0313 &hits_geDevice,
0314 &fast_fit_resultsDevice,
0315 &offset,
0316 &queue,
0317 &workDivQuadsPenta](auto i) {
0318 alpaka::exec<Acc1D>(queue,
0319 workDivQuadsPenta,
0320 Kernel_BLFastFit<4>{},
0321 tuples_,
0322 tupleMultiplicity_,
0323 hv,
0324 cm,
0325 tkidDevice.data(),
0326 hitsDevice.data(),
0327 hits_geDevice.data(),
0328 fast_fit_resultsDevice.data(),
0329 4,
0330 4,
0331 offset);
0332
0333 alpaka::exec<Acc1D>(queue,
0334 workDivQuadsPenta,
0335 Kernel_BLFit<4, TrackerTraits>{},
0336 tupleMultiplicity_,
0337 bField_,
0338 outputSoa_,
0339 tkidDevice.data(),
0340 hitsDevice.data(),
0341 hits_geDevice.data(),
0342 fast_fit_resultsDevice.data());
0343 });
0344 } else {
0345 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
0346 &hv,
0347 &cm,
0348 &tkidDevice,
0349 &hitsDevice,
0350 &hits_geDevice,
0351 &fast_fit_resultsDevice,
0352 &offset,
0353 &queue,
0354 &workDivQuadsPenta](auto i) {
0355 alpaka::exec<Acc1D>(queue,
0356 workDivQuadsPenta,
0357 Kernel_BLFastFit<i>{},
0358 tuples_,
0359 tupleMultiplicity_,
0360 hv,
0361 cm,
0362 tkidDevice.data(),
0363 hitsDevice.data(),
0364 hits_geDevice.data(),
0365 fast_fit_resultsDevice.data(),
0366 i,
0367 i,
0368 offset);
0369
0370 alpaka::exec<Acc1D>(queue,
0371 workDivQuadsPenta,
0372 Kernel_BLFit<i, TrackerTraits>{},
0373 tupleMultiplicity_,
0374 bField_,
0375 outputSoa_,
0376 tkidDevice.data(),
0377 hitsDevice.data(),
0378 hits_geDevice.data(),
0379 fast_fit_resultsDevice.data());
0380 #ifdef GPU_DEBUG
0381 alpaka::wait(queue);
0382 std::cout << "Kernel_BLFastFit(" << i << ") and Kernel_BLFit(" << i << ") -> done! " << std::endl;
0383 #endif
0384 });
0385
0386 static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
0387
0388
0389 alpaka::exec<Acc1D>(queue,
0390 workDivQuadsPenta,
0391 Kernel_BLFastFit<TrackerTraits::maxHitsOnTrackForFullFit>{},
0392 tuples_,
0393 tupleMultiplicity_,
0394 hv,
0395 cm,
0396 tkidDevice.data(),
0397 hitsDevice.data(),
0398 hits_geDevice.data(),
0399 fast_fit_resultsDevice.data(),
0400 TrackerTraits::maxHitsOnTrackForFullFit,
0401 TrackerTraits::maxHitsOnTrack - 1,
0402 offset);
0403
0404 alpaka::exec<Acc1D>(queue,
0405 workDivQuadsPenta,
0406 Kernel_BLFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0407 tupleMultiplicity_,
0408 bField_,
0409 outputSoa_,
0410 tkidDevice.data(),
0411 hitsDevice.data(),
0412 hits_geDevice.data(),
0413 fast_fit_resultsDevice.data());
0414 }
0415
0416 }
0417 }
0418
0419 template class HelixFit<pixelTopology::Phase1>;
0420 template class HelixFit<pixelTopology::Phase2>;
0421 template class HelixFit<pixelTopology::HIonPhase1>;
0422
0423 }