Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:32

0001 //
0002 // Author: Felice Pantaleo, CERN
0003 //
0004 
0005 //#define BROKENLINE_DEBUG
0006 //#define BL_DUMP_HITS
0007 #include <cstdint>
0008 
0009 #include <cuda_runtime.h>
0010 
0011 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0014 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0015 #include "RecoTracker/PixelTrackFitting/interface/BrokenLine.h"
0016 
0017 #include "HelixFitOnGPU.h"
0018 
0019 template <typename TrackerTraits>
0020 using Tuples = typename TrackSoA<TrackerTraits>::HitContainer;
0021 template <typename TrackerTraits>
0022 using OutputSoAView = TrackSoAView<TrackerTraits>;
0023 template <typename TrackerTraits>
0024 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0025 
0026 // #define BL_DUMP_HITS
0027 
0028 template <int N, typename TrackerTraits>
0029 __global__ void kernel_BLFastFit(Tuples<TrackerTraits> const *__restrict__ foundNtuplets,
0030                                  TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0031                                  TrackingRecHitSoAConstView<TrackerTraits> hh,
0032                                  typename TrackerTraits::tindex_type *__restrict__ ptkids,
0033                                  double *__restrict__ phits,
0034                                  float *__restrict__ phits_ge,
0035                                  double *__restrict__ pfast_fit,
0036                                  uint32_t nHitsL,
0037                                  uint32_t nHitsH,
0038                                  int32_t offset) {
0039   constexpr uint32_t hitsInFit = N;
0040   constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0041 
0042   assert(hitsInFit <= nHitsL);
0043   assert(nHitsL <= nHitsH);
0044   assert(phits);
0045   assert(pfast_fit);
0046   assert(foundNtuplets);
0047   assert(tupleMultiplicity);
0048 
0049   // look in bin for this hit multiplicity
0050   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
0051   int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0052   assert(totTK <= int(tupleMultiplicity->size()));
0053   assert(totTK >= 0);
0054 
0055 #ifdef BROKENLINE_DEBUG
0056   if (0 == local_start) {
0057     printf("%d total Ntuple\n", tupleMultiplicity->size());
0058     printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0059   }
0060 #endif
0061 
0062   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
0063        local_idx += gridDim.x * blockDim.x) {
0064     int tuple_idx = local_idx + offset;
0065     if (tuple_idx >= totTK) {
0066       ptkids[local_idx] = invalidTkId;
0067       break;
0068     }
0069     // get it from the ntuple container (one to one to helix)
0070     auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0071     assert(int(tkid) < foundNtuplets->nOnes());
0072 
0073     ptkids[local_idx] = tkid;
0074 
0075     auto nHits = foundNtuplets->size(tkid);
0076 
0077     assert(nHits >= nHitsL);
0078     assert(nHits <= nHitsH);
0079 
0080     riemannFit::Map3xNd<N> hits(phits + local_idx);
0081     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0082     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0083 
0084 #ifdef BL_DUMP_HITS
0085     __shared__ int done;
0086     done = 0;
0087     __syncthreads();
0088     bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
0089 #endif
0090 
0091     // Prepare data structure
0092     auto const *hitId = foundNtuplets->begin(tkid);
0093 
0094     // #define YERR_FROM_DC
0095 #ifdef YERR_FROM_DC
0096     // try to compute more precise error in y
0097     auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
0098     auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
0099     auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
0100     float ux, uy, uz;
0101 #endif
0102 
0103     float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0104     float n = 0;
0105     for (uint32_t i = 0; i < hitsInFit; ++i) {
0106       int j = int(n + 0.5f);  // round
0107       if (hitsInFit - 1 == i)
0108         j = nHits - 1;  // force last hit to ensure max lever arm.
0109       assert(j < int(nHits));
0110       n += incr;
0111       auto hit = hitId[j];
0112       float ge[6];
0113 
0114 #ifdef YERR_FROM_DC
0115       auto const &dp = hh.cpeParams().detParams(hh.detectorIndex(hit));
0116       auto status = hh[hit].chargeAndStatus().status;
0117       int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0118       assert(qbin >= 0 && qbin < 5);
0119       bool nok = (status.isBigY | status.isOneY);
0120       // compute cotanbeta and use it to recompute error
0121       dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0122       auto cb = std::abs(uy / uz);
0123       int bin =
0124           int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0125       int low_value = 0;
0126       int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0127       // return estimated bin value truncated to [0, 15]
0128       bin = std::clamp(bin, low_value, high_value);
0129       float yerr = dp.sigmay[bin] * 1.e-4f;  // toCM
0130       yerr *= dp.yfact[qbin];                // inflate
0131       yerr *= yerr;
0132       yerr += dp.apeYY;
0133       yerr = nok ? hh[hit].yerrLocal() : yerr;
0134       dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
0135 #else
0136       hh.cpeParams().detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
0137 #endif
0138 
0139 #ifdef BL_DUMP_HITS
0140       bool dump = foundNtuplets->size(tkid) == 5;
0141       if (dump) {
0142         printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0143                local_idx,
0144                tkid,
0145                hit,
0146                hh[hit].detectorIndex(),
0147                i,
0148                hh[hit].xGlobal(),
0149                hh[hit].yGlobal(),
0150                hh[hit].zGlobal());
0151         printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0152       }
0153 #endif
0154 
0155       hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
0156       hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0157     }
0158     brokenline::fastFit(hits, fast_fit);
0159 
0160     // no NaN here....
0161     assert(fast_fit(0) == fast_fit(0));
0162     assert(fast_fit(1) == fast_fit(1));
0163     assert(fast_fit(2) == fast_fit(2));
0164     assert(fast_fit(3) == fast_fit(3));
0165   }
0166 }
0167 
0168 template <int N, typename TrackerTraits>
0169 __global__ void kernel_BLFit(TupleMultiplicity<TrackerTraits> const *__restrict__ tupleMultiplicity,
0170                              double bField,
0171                              OutputSoAView<TrackerTraits> results_view,
0172                              typename TrackerTraits::tindex_type const *__restrict__ ptkids,
0173                              double *__restrict__ phits,
0174                              float *__restrict__ phits_ge,
0175                              double *__restrict__ pfast_fit) {
0176   assert(results_view.pt());
0177   assert(results_view.eta());
0178   assert(results_view.chi2());
0179   assert(pfast_fit);
0180   constexpr auto invalidTkId = std::numeric_limits<typename TrackerTraits::tindex_type>::max();
0181 
0182   // same as above...
0183   // look in bin for this hit multiplicity
0184   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
0185   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
0186        local_idx += gridDim.x * blockDim.x) {
0187     if (invalidTkId == ptkids[local_idx])
0188       break;
0189     auto tkid = ptkids[local_idx];
0190 
0191     assert(tkid < TrackerTraits::maxNumberOfTuples);
0192 
0193     riemannFit::Map3xNd<N> hits(phits + local_idx);
0194     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0195     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0196 
0197     brokenline::PreparedBrokenLineData<N> data;
0198 
0199     brokenline::karimaki_circle_fit circle;
0200     riemannFit::LineFit line;
0201 
0202     brokenline::prepareBrokenLineData(hits, fast_fit, bField, data);
0203     brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
0204     brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);
0205 
0206     TracksUtilities<TrackerTraits>::copyFromCircle(
0207         results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0208     results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
0209     results_view[tkid].eta() = asinhf(line.par(0));
0210     results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
0211 
0212 #ifdef BROKENLINE_DEBUG
0213     if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0214       printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0215     printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0216            N,
0217            N,
0218            tkid,
0219            circle.par(0),
0220            circle.par(1),
0221            circle.par(2));
0222     printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0223     printf("kernelBLHits chi2 cov %f/%f  %e,%e,%e,%e,%e\n",
0224            circle.chi2,
0225            line.chi2,
0226            circle.cov(0, 0),
0227            circle.cov(1, 1),
0228            circle.cov(2, 2),
0229            line.cov(0, 0),
0230            line.cov(1, 1));
0231 #endif
0232   }
0233 }