File indexing completed on 2024-04-23 22:56:30
0001 #include <cstdint>
0002
0003 #include <alpaka/alpaka.hpp>
0004
0005 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0006 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0009 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0010 #include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h"
0011
0012 #include "HelixFit.h"
0013 #include "CAStructures.h"
0014
0015 template <typename TrackerTraits>
0016 using Tuples = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0017 template <typename TrackerTraits>
0018 using OutputSoAView = reco::TrackSoAView<TrackerTraits>;
0019 template <typename TrackerTraits>
0020 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0021
0022 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0023 using namespace alpaka;
0024 using namespace cms::alpakatools;
0025
0026 template <int N, typename TrackerTraits>
0027 class Kernel_FastFit {
0028 public:
0029 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0030 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0031 Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0032 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0033 uint32_t nHits,
0034 TrackingRecHitSoAConstView<TrackerTraits> hh,
0035 pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *__restrict__ cpeParams,
0036 double *__restrict__ phits,
0037 float *__restrict__ phits_ge,
0038 double *__restrict__ pfast_fit,
0039 uint32_t offset) const {
0040 constexpr uint32_t hitsInFit = N;
0041
0042 ALPAKA_ASSERT_ACC(hitsInFit <= nHits);
0043
0044 ALPAKA_ASSERT_ACC(pfast_fit);
0045 ALPAKA_ASSERT_ACC(foundNtuplets);
0046 ALPAKA_ASSERT_ACC(tupleMultiplicity);
0047
0048
0049
0050 #ifdef RIEMANN_DEBUG
0051 const uint32_t threadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
0052 if (cms::alpakatools::once_per_grid(acc))
0053 printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit);
0054 #endif
0055
0056 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0057 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0058 auto tuple_idx = local_idx + offset;
0059 if (tuple_idx >= tupleMultiplicity->size(nHits))
0060 break;
0061
0062
0063 auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0064 ALPAKA_ASSERT_ACC(static_cast<int>(tkid) < foundNtuplets->nOnes());
0065
0066 ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits);
0067
0068 riemannFit::Map3xNd<N> hits(phits + local_idx);
0069 riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0070 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0071
0072
0073 auto const *hitId = foundNtuplets->begin(tkid);
0074 for (unsigned int i = 0; i < hitsInFit; ++i) {
0075 auto hit = hitId[i];
0076 float ge[6];
0077 cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0078
0079 hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0080 hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0081 }
0082 riemannFit::fastFit(acc, hits, fast_fit);
0083
0084 #ifdef RIEMANN_DEBUG
0085
0086 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(0)));
0087 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(1)));
0088 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(2)));
0089 ALPAKA_ASSERT_ACC(not alpaka::math::isnan(acc, fast_fit(3)));
0090 #endif
0091 }
0092 }
0093 };
0094
0095 template <int N, typename TrackerTraits>
0096 class Kernel_CircleFit {
0097 public:
0098 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0099 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0100 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0101 uint32_t nHits,
0102 double bField,
0103 double *__restrict__ phits,
0104 float *__restrict__ phits_ge,
0105 double *__restrict__ pfast_fit_input,
0106 riemannFit::CircleFit *circle_fit,
0107 uint32_t offset) const {
0108 ALPAKA_ASSERT_ACC(circle_fit);
0109 ALPAKA_ASSERT_ACC(N <= nHits);
0110
0111
0112
0113
0114 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0115 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0116 auto tuple_idx = local_idx + offset;
0117 if (tuple_idx >= tupleMultiplicity->size(nHits))
0118 break;
0119
0120 riemannFit::Map3xNd<N> hits(phits + local_idx);
0121 riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0122 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0123
0124 riemannFit::VectorNd<N> rad = (hits.block(0, 0, 2, N).colwise().norm());
0125
0126 riemannFit::Matrix2Nd<N> hits_cov = riemannFit::Matrix2Nd<N>::Zero();
0127 riemannFit::loadCovariance2D(acc, hits_ge, hits_cov);
0128
0129 circle_fit[local_idx] =
0130 riemannFit::circleFit(acc, hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true);
0131
0132 #ifdef RIEMANN_DEBUG
0133
0134
0135
0136 #endif
0137 }
0138 }
0139 };
0140
0141 template <int N, typename TrackerTraits>
0142 class Kernel_LineFit {
0143 public:
0144 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0145 ALPAKA_FN_ACC void operator()(TAcc const &acc,
0146 TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0147 uint32_t nHits,
0148 double bField,
0149 OutputSoAView<TrackerTraits> results_view,
0150 double *__restrict__ phits,
0151 float *__restrict__ phits_ge,
0152 double *__restrict__ pfast_fit_input,
0153 riemannFit::CircleFit *__restrict__ circle_fit,
0154 uint32_t offset) const {
0155 ALPAKA_ASSERT_ACC(circle_fit);
0156 ALPAKA_ASSERT_ACC(N <= nHits);
0157
0158
0159
0160
0161 const auto nt = riemannFit::maxNumberOfConcurrentFits;
0162 for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) {
0163 auto tuple_idx = local_idx + offset;
0164 if (tuple_idx >= tupleMultiplicity->size(nHits))
0165 break;
0166
0167
0168 int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx);
0169
0170 riemannFit::Map3xNd<N> hits(phits + local_idx);
0171 riemannFit::Map4d fast_fit(pfast_fit_input + local_idx);
0172 riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0173
0174 auto const &line_fit = riemannFit::lineFit(acc, hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true);
0175
0176 riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]);
0177
0178 TracksUtilities<TrackerTraits>::copyFromCircle(results_view,
0179 circle_fit[local_idx].par,
0180 circle_fit[local_idx].cov,
0181 line_fit.par,
0182 line_fit.cov,
0183 1.f / float(bField),
0184 tkid);
0185 results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2));
0186 results_view[tkid].eta() = asinhf(line_fit.par(0));
0187 results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5);
0188
0189 #ifdef RIEMANN_DEBUG
0190 printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0191 N,
0192 nHits,
0193 tkid,
0194 circle_fit[local_idx].par(0),
0195 circle_fit[local_idx].par(1),
0196 circle_fit[local_idx].par(2));
0197 printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1));
0198 printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n",
0199 circle_fit[local_idx].chi2,
0200 line_fit.chi2,
0201 circle_fit[local_idx].cov(0, 0),
0202 circle_fit[local_idx].cov(1, 1),
0203 circle_fit[local_idx].cov(2, 2),
0204 line_fit.cov(0, 0),
0205 line_fit.cov(1, 1));
0206 #endif
0207 }
0208 }
0209 };
0210
0211 template <typename TrackerTraits>
0212 void HelixFit<TrackerTraits>::launchRiemannKernels(const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0213 pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const *cpeParams,
0214 uint32_t nhits,
0215 uint32_t maxNumberOfTuples,
0216 Queue &queue) {
0217 assert(tuples_);
0218
0219 auto blockSize = 64;
0220 auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0221 const auto workDivTriplets = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0222 const auto workDivQuadsPenta = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks / 4, blockSize);
0223
0224
0225 auto hitsDevice = cms::alpakatools::make_device_buffer<double[]>(
0226 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double));
0227 auto hits_geDevice = cms::alpakatools::make_device_buffer<float[]>(
0228 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float));
0229 auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer<double[]>(
0230 queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
0231 auto circle_fit_resultsDevice_holder =
0232 cms::alpakatools::make_device_buffer<char[]>(queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit));
0233 riemannFit::CircleFit *circle_fit_resultsDevice_ =
0234 (riemannFit::CircleFit *)(circle_fit_resultsDevice_holder.data());
0235
0236 for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0237
0238 alpaka::exec<Acc1D>(queue,
0239 workDivTriplets,
0240 Kernel_FastFit<3, TrackerTraits>{},
0241 tuples_,
0242 tupleMultiplicity_,
0243 3,
0244 hv,
0245 cpeParams,
0246 hitsDevice.data(),
0247 hits_geDevice.data(),
0248 fast_fit_resultsDevice.data(),
0249 offset);
0250
0251 alpaka::exec<Acc1D>(queue,
0252 workDivTriplets,
0253 Kernel_CircleFit<3, TrackerTraits>{},
0254 tupleMultiplicity_,
0255 3,
0256 bField_,
0257 hitsDevice.data(),
0258 hits_geDevice.data(),
0259 fast_fit_resultsDevice.data(),
0260 circle_fit_resultsDevice_,
0261 offset);
0262
0263 alpaka::exec<Acc1D>(queue,
0264 workDivTriplets,
0265 Kernel_LineFit<3, TrackerTraits>{},
0266 tupleMultiplicity_,
0267 3,
0268 bField_,
0269 outputSoa_,
0270 hitsDevice.data(),
0271 hits_geDevice.data(),
0272 fast_fit_resultsDevice.data(),
0273 circle_fit_resultsDevice_,
0274 offset);
0275
0276
0277 alpaka::exec<Acc1D>(queue,
0278 workDivQuadsPenta,
0279 Kernel_FastFit<4, TrackerTraits>{},
0280 tuples_,
0281 tupleMultiplicity_,
0282 4,
0283 hv,
0284 cpeParams,
0285 hitsDevice.data(),
0286 hits_geDevice.data(),
0287 fast_fit_resultsDevice.data(),
0288 offset);
0289
0290 alpaka::exec<Acc1D>(queue,
0291 workDivQuadsPenta,
0292 Kernel_CircleFit<4, TrackerTraits>{},
0293 tupleMultiplicity_,
0294 4,
0295 bField_,
0296 hitsDevice.data(),
0297 hits_geDevice.data(),
0298 fast_fit_resultsDevice.data(),
0299 circle_fit_resultsDevice_,
0300 offset);
0301
0302 alpaka::exec<Acc1D>(queue,
0303 workDivQuadsPenta,
0304 Kernel_LineFit<4, TrackerTraits>{},
0305 tupleMultiplicity_,
0306 4,
0307 bField_,
0308 outputSoa_,
0309 hitsDevice.data(),
0310 hits_geDevice.data(),
0311 fast_fit_resultsDevice.data(),
0312 circle_fit_resultsDevice_,
0313 offset);
0314
0315 if (fitNas4_) {
0316
0317 alpaka::exec<Acc1D>(queue,
0318 workDivQuadsPenta,
0319 Kernel_FastFit<4, TrackerTraits>{},
0320 tuples_,
0321 tupleMultiplicity_,
0322 5,
0323 hv,
0324 cpeParams,
0325 hitsDevice.data(),
0326 hits_geDevice.data(),
0327 fast_fit_resultsDevice.data(),
0328 offset);
0329
0330 alpaka::exec<Acc1D>(queue,
0331 workDivQuadsPenta,
0332 Kernel_CircleFit<4, TrackerTraits>{},
0333 tupleMultiplicity_,
0334 5,
0335 bField_,
0336 hitsDevice.data(),
0337 hits_geDevice.data(),
0338 fast_fit_resultsDevice.data(),
0339 circle_fit_resultsDevice_,
0340 offset);
0341
0342 alpaka::exec<Acc1D>(queue,
0343 workDivQuadsPenta,
0344 Kernel_LineFit<4, TrackerTraits>{},
0345 tupleMultiplicity_,
0346 5,
0347 bField_,
0348 outputSoa_,
0349 hitsDevice.data(),
0350 hits_geDevice.data(),
0351 fast_fit_resultsDevice.data(),
0352 circle_fit_resultsDevice_,
0353 offset);
0354 } else {
0355
0356 alpaka::exec<Acc1D>(queue,
0357 workDivQuadsPenta,
0358 Kernel_FastFit<5, TrackerTraits>{},
0359 tuples_,
0360 tupleMultiplicity_,
0361 5,
0362 hv,
0363 cpeParams,
0364 hitsDevice.data(),
0365 hits_geDevice.data(),
0366 fast_fit_resultsDevice.data(),
0367 offset);
0368
0369 alpaka::exec<Acc1D>(queue,
0370 workDivQuadsPenta,
0371 Kernel_CircleFit<5, TrackerTraits>{},
0372 tupleMultiplicity_,
0373 5,
0374 bField_,
0375 hitsDevice.data(),
0376 hits_geDevice.data(),
0377 fast_fit_resultsDevice.data(),
0378 circle_fit_resultsDevice_,
0379 offset);
0380
0381 alpaka::exec<Acc1D>(queue,
0382 workDivQuadsPenta,
0383 Kernel_LineFit<5, TrackerTraits>{},
0384 tupleMultiplicity_,
0385 5,
0386 bField_,
0387 outputSoa_,
0388 hitsDevice.data(),
0389 hits_geDevice.data(),
0390 fast_fit_resultsDevice.data(),
0391 circle_fit_resultsDevice_,
0392 offset);
0393 }
0394 }
0395 }
0396
0397 template class HelixFit<pixelTopology::Phase1>;
0398 template class HelixFit<pixelTopology::Phase2>;
0399 template class HelixFit<pixelTopology::HIonPhase1>;
0400
0401 }