File indexing completed on 2025-04-29 02:41:33
0001
0002
0003
0004 #include <cstdint>
0005
0006 #include <alpaka/alpaka.hpp>
0007
0008 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0011 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0012 #include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h"
0013
0014 #include "HelixFit.h"
0015
0016 template <typename TrackerTraits>
0017 using Tuples = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0018 template <typename TrackerTraits>
0019 using OutputSoAView = reco::TrackSoAView<TrackerTraits>;
0020 template <typename TrackerTraits>
0021 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0022
0023
0024
0025 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0026 template <int N, typename TrackerTraits>
0027 class Kernel_BLFastFit {
0028 public:
0029 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0030 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0031 Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0032 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0033 TrackingRecHitSoAConstView<TrackerTraits> hh,
0034 pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
0035 typename TrackerTraits::tindex_type *__restrict__ ptkids,
0036 double *__restrict__ phits,
0037 float *__restrict__ phits_ge,
0038 double *__restrict__ pfast_fit,
0039 uint32_t nHitsL,
0040 uint32_t nHitsH,
0041 int32_t offset) const {
0042 constexpr uint32_t hitsInFit = N;
0043 constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0044
0045 ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL);
0046 ALPAKA_ASSERT_ACC(nHitsL <= nHitsH);
0047 ALPAKA_ASSERT_ACC(phits);
0048 ALPAKA_ASSERT_ACC(pfast_fit);
0049 ALPAKA_ASSERT_ACC(foundNtuplets);
0050 ALPAKA_ASSERT_ACC(tupleMultiplicity);
0051
0052
0053 int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0054 ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size()));
0055 ALPAKA_ASSERT_ACC(totTK >= 0);
0056
0057 #ifdef BROKENLINE_DEBUG
0058 const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0059 if (cms::alpakatools::once_per_grid(acc)) {
0060 printf("%d total Ntuple\n", tupleMultiplicity->size());
0061 printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0062 }
0063 #endif
0064 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0065 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0066 auto tuple_idx = local_idx + offset;
0067 if ((int)tuple_idx >= totTK) {
0068 ptkids[local_idx] = invalidTkId;
0069 break;
0070 }
0071
0072 auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0073 ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0074
0075 ptkids[local_idx] = tkid;
0076
0077 auto nHits = foundNtuplets->size(tkid);
0078
0079 ALPAKA_ASSERT_ACC(nHits >= nHitsL);
0080 ALPAKA_ASSERT_ACC(nHits <= nHitsH);
0081
0082 riemannFit::Map3xNd<N> hits(phits + local_idx);
0083 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0084 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0085
0086 #ifdef BL_DUMP_HITS
0087 auto &&done = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0088 done = 0;
0089 alpaka::syncBlockThreads(acc);
0090 bool dump =
0091 (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
0092 #endif
0093
0094
0095 auto const *hitId = foundNtuplets->begin(tkid);
0096
0097
0098 #ifdef YERR_FROM_DC
0099
0100 auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
0101 auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
0102 auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
0103 float ux, uy, uz;
0104 #endif
0105
0106 float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0107 float n = 0;
0108 for (uint32_t i = 0; i < hitsInFit; ++i) {
0109 int j = int(n + 0.5f);
0110 if (hitsInFit - 1 == i)
0111 j = nHits - 1;
0112 ALPAKA_ASSERT_ACC(j < int(nHits));
0113 n += incr;
0114 auto hit = hitId[j];
0115 float ge[6];
0116
0117 #ifdef YERR_FROM_DC
0118 auto const &dp = cpeParams->detParams(hh.detectorIndex(hit));
0119 auto status = hh[hit].chargeAndStatus().status;
0120 int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0121 ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5);
0122 bool nok = (status.isBigY | status.isOneY);
0123
0124 dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0125 auto cb = std::abs(uy / uz);
0126 int bin =
0127 int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0128 int low_value = 0;
0129 int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0130
0131 bin = std::clamp(bin, low_value, high_value);
0132 float yerr = dp.sigmay[bin] * 1.e-4f;
0133 yerr *= dp.yfact[qbin];
0134 yerr *= yerr;
0135 yerr += dp.apeYY;
0136 yerr = nok ? hh[hit].yerrLocal() : yerr;
0137 dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
0138 #else
0139 cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0140 #endif
0141
0142 #ifdef BL_DUMP_HITS
0143 bool dump = foundNtuplets->size(tkid) == 5;
0144 if (dump) {
0145 printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0146 local_idx,
0147 tkid,
0148 hit,
0149 hh[hit].detectorIndex(),
0150 i,
0151 hh[hit].xGlobal(),
0152 hh[hit].yGlobal(),
0153 hh[hit].zGlobal());
0154 printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0155 }
0156 #endif
0157
0158 hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0159 hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0160 }
0161 brokenline::fastFit(acc, hits, fast_fit);
0162
0163 #ifdef BROKENLINE_DEBUG
0164
0165 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0166 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0167 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0168 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0169 #endif
0170 }
0171 }
0172 };
0173
0174 template <int N, typename TrackerTraits>
0175 struct Kernel_BLFit {
0176 public:
0177 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0178 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0179 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0180 double bField,
0181 OutputSoAView<TrackerTraits> results_view,
0182 typename TrackerTraits::tindex_type const *__restrict__ ptkids,
0183 double *__restrict__ phits,
0184 float *__restrict__ phits_ge,
0185 double *__restrict__ pfast_fit) const {
0186
0187 debug::do_not_optimise(results_view);
0188
0189 ALPAKA_ASSERT_ACC(results_view.pt());
0190 ALPAKA_ASSERT_ACC(results_view.eta());
0191 ALPAKA_ASSERT_ACC(results_view.chi2());
0192 ALPAKA_ASSERT_ACC(pfast_fit);
0193 constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0194
0195
0196
0197 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0198 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0199 if (invalidTkId == ptkids[local_idx])
0200 break;
0201 auto tkid = ptkids[local_idx];
0202
0203 ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples);
0204
0205 riemannFit::Map3xNd<N> hits(phits + local_idx);
0206 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0207 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0208
0209 brokenline::PreparedBrokenLineData<N> data;
0210
0211 brokenline::karimaki_circle_fit circle;
0212 riemannFit::LineFit line;
0213
0214 brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data);
0215 brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
0216 brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
0217
0218 TracksUtilities<TrackerTraits>::copyFromCircle(
0219 results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0220 results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
0221 results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
0222 results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
0223
0224 #ifdef BROKENLINE_DEBUG
0225 if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0226 printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0227 printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0228 N,
0229 N,
0230 tkid,
0231 circle.par(0),
0232 circle.par(1),
0233 circle.par(2));
0234 printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0235 printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0236 circle.chi2,
0237 line.chi2,
0238 circle.cov(0, 0),
0239 circle.cov(1, 1),
0240 circle.cov(2, 2),
0241 line.cov(0, 0),
0242 line.cov(1, 1));
0243 #endif
0244 }
0245 }
0246 };
0247
0248 template <typename TrackerTraits>
0249 void HelixFit<TrackerTraits>::launchBrokenLineKernels(
0250 const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0251 pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *cpeParams,
0252 uint32_t hitsInFit,
0253 uint32_t maxNumberOfTuples,
0254 Queue &queue) {
0255 ALPAKA_ASSERT_ACC(tuples_);
0256
0257 uint32_t blockSize = 64;
0258 uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
0259 const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0260 const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0261
0262
0263 auto tkidDevice =
0264 cms::alpakatools::make_device_buffer<typename TrackerTraits::tindex_type[]>(queue, maxNumberOfConcurrentFits_);
0265 auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0266 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
0267 auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0268 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
0269 auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0270 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0271
0272 for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0273
0274
0275 alpaka::exec<Acc1D>(queue,
0276 workDivTriplets,
0277 Kernel_BLFastFit<3, TrackerTraits>{},
0278 tuples_,
0279 tupleMultiplicity_,
0280 hv,
0281 cpeParams,
0282 tkidDevice.data(),
0283 hitsDevice.data(),
0284 hits_geDevice.data(),
0285 fast_fit_resultsDevice.data(),
0286 3,
0287 3,
0288 offset);
0289
0290 alpaka::exec<Acc1D>(queue,
0291 workDivTriplets,
0292 Kernel_BLFit<3, TrackerTraits>{},
0293 tupleMultiplicity_,
0294 bField_,
0295 outputSoa_,
0296 tkidDevice.data(),
0297 hitsDevice.data(),
0298 hits_geDevice.data(),
0299 fast_fit_resultsDevice.data());
0300
0301 if (fitNas4_) {
0302
0303 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
0304 &hv,
0305 &cpeParams,
0306 &tkidDevice,
0307 &hitsDevice,
0308 &hits_geDevice,
0309 &fast_fit_resultsDevice,
0310 &offset,
0311 &queue,
0312 &workDivQuadsPenta](auto i) {
0313 alpaka::exec<Acc1D>(queue,
0314 workDivQuadsPenta,
0315 Kernel_BLFastFit<4, TrackerTraits>{},
0316 tuples_,
0317 tupleMultiplicity_,
0318 hv,
0319 cpeParams,
0320 tkidDevice.data(),
0321 hitsDevice.data(),
0322 hits_geDevice.data(),
0323 fast_fit_resultsDevice.data(),
0324 4,
0325 4,
0326 offset);
0327
0328 alpaka::exec<Acc1D>(queue,
0329 workDivQuadsPenta,
0330 Kernel_BLFit<4, TrackerTraits>{},
0331 tupleMultiplicity_,
0332 bField_,
0333 outputSoa_,
0334 tkidDevice.data(),
0335 hitsDevice.data(),
0336 hits_geDevice.data(),
0337 fast_fit_resultsDevice.data());
0338 });
0339
0340 } else {
0341 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
0342 &hv,
0343 &cpeParams,
0344 &tkidDevice,
0345 &hitsDevice,
0346 &hits_geDevice,
0347 &fast_fit_resultsDevice,
0348 &offset,
0349 &queue,
0350 &workDivQuadsPenta](auto i) {
0351 alpaka::exec<Acc1D>(queue,
0352 workDivQuadsPenta,
0353 Kernel_BLFastFit<i, TrackerTraits>{},
0354 tuples_,
0355 tupleMultiplicity_,
0356 hv,
0357 cpeParams,
0358 tkidDevice.data(),
0359 hitsDevice.data(),
0360 hits_geDevice.data(),
0361 fast_fit_resultsDevice.data(),
0362 i,
0363 i,
0364 offset);
0365
0366 alpaka::exec<Acc1D>(queue,
0367 workDivQuadsPenta,
0368 Kernel_BLFit<i, TrackerTraits>{},
0369 tupleMultiplicity_,
0370 bField_,
0371 outputSoa_,
0372 tkidDevice.data(),
0373 hitsDevice.data(),
0374 hits_geDevice.data(),
0375 fast_fit_resultsDevice.data());
0376 });
0377
0378 static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
0379
0380
0381 alpaka::exec<Acc1D>(queue,
0382 workDivQuadsPenta,
0383 Kernel_BLFastFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0384 tuples_,
0385 tupleMultiplicity_,
0386 hv,
0387 cpeParams,
0388 tkidDevice.data(),
0389 hitsDevice.data(),
0390 hits_geDevice.data(),
0391 fast_fit_resultsDevice.data(),
0392 TrackerTraits::maxHitsOnTrackForFullFit,
0393 TrackerTraits::maxHitsOnTrack - 1,
0394 offset);
0395
0396 alpaka::exec<Acc1D>(queue,
0397 workDivQuadsPenta,
0398 Kernel_BLFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0399 tupleMultiplicity_,
0400 bField_,
0401 outputSoa_,
0402 tkidDevice.data(),
0403 hitsDevice.data(),
0404 hits_geDevice.data(),
0405 fast_fit_resultsDevice.data());
0406 }
0407
0408 }
0409 }
0410
0411 template class HelixFit<pixelTopology::Phase1>;
0412 template class HelixFit<pixelTopology::Phase2>;
0413 template class HelixFit<pixelTopology::HIonPhase1>;
0414
0415 }