File indexing completed on 2024-04-23 22:56:29
0001
0002
0003
0004 #include <cstdint>
0005
0006 #include <alpaka/alpaka.hpp>
0007
0008 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0011 #include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h"
0012
0013 #include "HelixFit.h"
0014
0015 template <typename TrackerTraits>
0016 using Tuples = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0017 template <typename TrackerTraits>
0018 using OutputSoAView = reco::TrackSoAView<TrackerTraits>;
0019 template <typename TrackerTraits>
0020 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0021
0022
0023
0024 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0025 template <int N, typename TrackerTraits>
0026 class Kernel_BLFastFit {
0027 public:
0028 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0029 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0030 Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0031 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0032 TrackingRecHitSoAConstView<TrackerTraits> hh,
0033 pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
0034 typename TrackerTraits::tindex_type *__restrict__ ptkids,
0035 double *__restrict__ phits,
0036 float *__restrict__ phits_ge,
0037 double *__restrict__ pfast_fit,
0038 uint32_t nHitsL,
0039 uint32_t nHitsH,
0040 int32_t offset) const {
0041 constexpr uint32_t hitsInFit = N;
0042 constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0043
0044 ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL);
0045 ALPAKA_ASSERT_ACC(nHitsL <= nHitsH);
0046 ALPAKA_ASSERT_ACC(phits);
0047 ALPAKA_ASSERT_ACC(pfast_fit);
0048 ALPAKA_ASSERT_ACC(foundNtuplets);
0049 ALPAKA_ASSERT_ACC(tupleMultiplicity);
0050
0051
0052 int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0053 ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size()));
0054 ALPAKA_ASSERT_ACC(totTK >= 0);
0055
0056 #ifdef BROKENLINE_DEBUG
0057 const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0058 if (cms::alpakatools::once_per_grid(acc)) {
0059 printf("%d total Ntuple\n", tupleMultiplicity->size());
0060 printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0061 }
0062 #endif
0063 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0064 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0065 auto tuple_idx = local_idx + offset;
0066 if ((int)tuple_idx >= totTK) {
0067 ptkids[local_idx] = invalidTkId;
0068 break;
0069 }
0070
0071 auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0072 ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0073
0074 ptkids[local_idx] = tkid;
0075
0076 auto nHits = foundNtuplets->size(tkid);
0077
0078 ALPAKA_ASSERT_ACC(nHits >= nHitsL);
0079 ALPAKA_ASSERT_ACC(nHits <= nHitsH);
0080
0081 riemannFit::Map3xNd<N> hits(phits + local_idx);
0082 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0083 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0084
0085 #ifdef BL_DUMP_HITS
0086 auto &&done = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0087 done = 0;
0088 alpaka::syncBlockThreads(acc);
0089 bool dump =
0090 (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
0091 #endif
0092
0093
0094 auto const *hitId = foundNtuplets->begin(tkid);
0095
0096
0097 #ifdef YERR_FROM_DC
0098
0099 auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
0100 auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
0101 auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
0102 float ux, uy, uz;
0103 #endif
0104
0105 float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0106 float n = 0;
0107 for (uint32_t i = 0; i < hitsInFit; ++i) {
0108 int j = int(n + 0.5f);
0109 if (hitsInFit - 1 == i)
0110 j = nHits - 1;
0111 ALPAKA_ASSERT_ACC(j < int(nHits));
0112 n += incr;
0113 auto hit = hitId[j];
0114 float ge[6];
0115
0116 #ifdef YERR_FROM_DC
0117 auto const &dp = cpeParams->detParams(hh.detectorIndex(hit));
0118 auto status = hh[hit].chargeAndStatus().status;
0119 int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0120 ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5);
0121 bool nok = (status.isBigY | status.isOneY);
0122
0123 dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0124 auto cb = std::abs(uy / uz);
0125 int bin =
0126 int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0127 int low_value = 0;
0128 int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0129
0130 bin = std::clamp(bin, low_value, high_value);
0131 float yerr = dp.sigmay[bin] * 1.e-4f;
0132 yerr *= dp.yfact[qbin];
0133 yerr *= yerr;
0134 yerr += dp.apeYY;
0135 yerr = nok ? hh[hit].yerrLocal() : yerr;
0136 dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
0137 #else
0138 cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0139 #endif
0140
0141 #ifdef BL_DUMP_HITS
0142 bool dump = foundNtuplets->size(tkid) == 5;
0143 if (dump) {
0144 printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0145 local_idx,
0146 tkid,
0147 hit,
0148 hh[hit].detectorIndex(),
0149 i,
0150 hh[hit].xGlobal(),
0151 hh[hit].yGlobal(),
0152 hh[hit].zGlobal());
0153 printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0154 }
0155 #endif
0156
0157 hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0158 hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0159 }
0160 brokenline::fastFit(acc, hits, fast_fit);
0161
0162 #ifdef BROKENLINE_DEBUG
0163
0164 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0165 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0166 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0167 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0168 #endif
0169 }
0170 }
0171 };
0172
0173 template <int N, typename TrackerTraits>
0174 struct Kernel_BLFit {
0175 public:
0176 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0177 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0178 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0179 double bField,
0180 OutputSoAView<TrackerTraits> results_view,
0181 typename TrackerTraits::tindex_type const *__restrict__ ptkids,
0182 double *__restrict__ phits,
0183 float *__restrict__ phits_ge,
0184 double *__restrict__ pfast_fit) const {
0185 ALPAKA_ASSERT_ACC(results_view.pt());
0186 ALPAKA_ASSERT_ACC(results_view.eta());
0187 ALPAKA_ASSERT_ACC(results_view.chi2());
0188 ALPAKA_ASSERT_ACC(pfast_fit);
0189 constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0190
0191
0192
0193 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0194 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0195 if (invalidTkId == ptkids[local_idx])
0196 break;
0197 auto tkid = ptkids[local_idx];
0198
0199 ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples);
0200
0201 riemannFit::Map3xNd<N> hits(phits + local_idx);
0202 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0203 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0204
0205 brokenline::PreparedBrokenLineData<N> data;
0206
0207 brokenline::karimaki_circle_fit circle;
0208 riemannFit::LineFit line;
0209
0210 brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data);
0211 brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
0212 brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
0213
0214 TracksUtilities<TrackerTraits>::copyFromCircle(
0215 results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0216 results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
0217 results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
0218 results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
0219
0220 #ifdef BROKENLINE_DEBUG
0221 if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0222 printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0223 printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0224 N,
0225 N,
0226 tkid,
0227 circle.par(0),
0228 circle.par(1),
0229 circle.par(2));
0230 printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0231 printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0232 circle.chi2,
0233 line.chi2,
0234 circle.cov(0, 0),
0235 circle.cov(1, 1),
0236 circle.cov(2, 2),
0237 line.cov(0, 0),
0238 line.cov(1, 1));
0239 #endif
0240 }
0241 }
0242 };
0243
0244 template <typename TrackerTraits>
0245 void HelixFit<TrackerTraits>::launchBrokenLineKernels(
0246 const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0247 pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *cpeParams,
0248 uint32_t hitsInFit,
0249 uint32_t maxNumberOfTuples,
0250 Queue &queue) {
0251 ALPAKA_ASSERT_ACC(tuples_);
0252
0253 uint32_t blockSize = 64;
0254 uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
0255 const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0256 const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0257
0258
0259 auto tkidDevice =
0260 cms::alpakatools::make_device_buffer<typename TrackerTraits::tindex_type[]>(queue, maxNumberOfConcurrentFits_);
0261 auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0262 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
0263 auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0264 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
0265 auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0266 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0267
0268 for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0269
0270
0271 alpaka::exec<Acc1D>(queue,
0272 workDivTriplets,
0273 Kernel_BLFastFit<3, TrackerTraits>{},
0274 tuples_,
0275 tupleMultiplicity_,
0276 hv,
0277 cpeParams,
0278 tkidDevice.data(),
0279 hitsDevice.data(),
0280 hits_geDevice.data(),
0281 fast_fit_resultsDevice.data(),
0282 3,
0283 3,
0284 offset);
0285
0286 alpaka::exec<Acc1D>(queue,
0287 workDivTriplets,
0288 Kernel_BLFit<3, TrackerTraits>{},
0289 tupleMultiplicity_,
0290 bField_,
0291 outputSoa_,
0292 tkidDevice.data(),
0293 hitsDevice.data(),
0294 hits_geDevice.data(),
0295 fast_fit_resultsDevice.data());
0296
0297 if (fitNas4_) {
0298
0299 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
0300 &hv,
0301 &cpeParams,
0302 &tkidDevice,
0303 &hitsDevice,
0304 &hits_geDevice,
0305 &fast_fit_resultsDevice,
0306 &offset,
0307 &queue,
0308 &workDivQuadsPenta](auto i) {
0309 alpaka::exec<Acc1D>(queue,
0310 workDivQuadsPenta,
0311 Kernel_BLFastFit<4, TrackerTraits>{},
0312 tuples_,
0313 tupleMultiplicity_,
0314 hv,
0315 cpeParams,
0316 tkidDevice.data(),
0317 hitsDevice.data(),
0318 hits_geDevice.data(),
0319 fast_fit_resultsDevice.data(),
0320 4,
0321 4,
0322 offset);
0323
0324 alpaka::exec<Acc1D>(queue,
0325 workDivQuadsPenta,
0326 Kernel_BLFit<4, TrackerTraits>{},
0327 tupleMultiplicity_,
0328 bField_,
0329 outputSoa_,
0330 tkidDevice.data(),
0331 hitsDevice.data(),
0332 hits_geDevice.data(),
0333 fast_fit_resultsDevice.data());
0334 });
0335
0336 } else {
0337 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
0338 &hv,
0339 &cpeParams,
0340 &tkidDevice,
0341 &hitsDevice,
0342 &hits_geDevice,
0343 &fast_fit_resultsDevice,
0344 &offset,
0345 &queue,
0346 &workDivQuadsPenta](auto i) {
0347 alpaka::exec<Acc1D>(queue,
0348 workDivQuadsPenta,
0349 Kernel_BLFastFit<i, TrackerTraits>{},
0350 tuples_,
0351 tupleMultiplicity_,
0352 hv,
0353 cpeParams,
0354 tkidDevice.data(),
0355 hitsDevice.data(),
0356 hits_geDevice.data(),
0357 fast_fit_resultsDevice.data(),
0358 i,
0359 i,
0360 offset);
0361
0362 alpaka::exec<Acc1D>(queue,
0363 workDivQuadsPenta,
0364 Kernel_BLFit<i, TrackerTraits>{},
0365 tupleMultiplicity_,
0366 bField_,
0367 outputSoa_,
0368 tkidDevice.data(),
0369 hitsDevice.data(),
0370 hits_geDevice.data(),
0371 fast_fit_resultsDevice.data());
0372 });
0373
0374 static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
0375
0376
0377 alpaka::exec<Acc1D>(queue,
0378 workDivQuadsPenta,
0379 Kernel_BLFastFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0380 tuples_,
0381 tupleMultiplicity_,
0382 hv,
0383 cpeParams,
0384 tkidDevice.data(),
0385 hitsDevice.data(),
0386 hits_geDevice.data(),
0387 fast_fit_resultsDevice.data(),
0388 TrackerTraits::maxHitsOnTrackForFullFit,
0389 TrackerTraits::maxHitsOnTrack - 1,
0390 offset);
0391
0392 alpaka::exec<Acc1D>(queue,
0393 workDivQuadsPenta,
0394 Kernel_BLFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>{},
0395 tupleMultiplicity_,
0396 bField_,
0397 outputSoa_,
0398 tkidDevice.data(),
0399 hitsDevice.data(),
0400 hits_geDevice.data(),
0401 fast_fit_resultsDevice.data());
0402 }
0403
0404 }
0405 }
0406
0407 template class HelixFit<pixelTopology::Phase1>;
0408 template class HelixFit<pixelTopology::Phase2>;
0409 template class HelixFit<pixelTopology::HIonPhase1>;
0410
0411 }