Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-12-02 03:39:31

0001 //
0002 // Author: Felice Pantaleo, CERN
0003 //
0004 
0005 // #define BROKENLINE_DEBUG
0006 
0007 #include <cstdint>
0008 
0009 #include <cuda_runtime.h>
0010 
0011 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0014 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0015 #include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h"
0016 
0017 #include "HelixFitOnGPU.h"
0018 
0019 using HitsOnGPU = TrackingRecHit2DSOAView;
0020 using Tuples = pixelTrack::HitContainer;
0021 using OutputSoA = pixelTrack::TrackSoA;
0022 using tindex_type = caConstants::tindex_type;
0023 constexpr auto invalidTkId = std::numeric_limits<tindex_type>::max();
0024 
0025 // #define BL_DUMP_HITS
0026 
0027 template <int N>
0028 __global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets,
0029                                  caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
0030                                  HitsOnGPU const *__restrict__ hhp,
0031                                  tindex_type *__restrict__ ptkids,
0032                                  double *__restrict__ phits,
0033                                  float *__restrict__ phits_ge,
0034                                  double *__restrict__ pfast_fit,
0035                                  uint32_t nHitsL,
0036                                  uint32_t nHitsH,
0037                                  int32_t offset) {
0038   constexpr uint32_t hitsInFit = N;
0039 
0040   assert(hitsInFit <= nHitsL);
0041   assert(nHitsL <= nHitsH);
0042   assert(hhp);
0043   assert(phits);
0044   assert(pfast_fit);
0045   assert(foundNtuplets);
0046   assert(tupleMultiplicity);
0047 
0048   // look in bin for this hit multiplicity
0049   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
0050   int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
0051   assert(totTK <= int(tupleMultiplicity->size()));
0052   assert(totTK >= 0);
0053 
0054 #ifdef BROKENLINE_DEBUG
0055   if (0 == local_start) {
0056     printf("%d total Ntuple\n", tupleMultiplicity->size());
0057     printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
0058   }
0059 #endif
0060 
0061   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
0062        local_idx += gridDim.x * blockDim.x) {
0063     int tuple_idx = local_idx + offset;
0064     if (tuple_idx >= totTK) {
0065       ptkids[local_idx] = invalidTkId;
0066       break;
0067     }
0068     // get it from the ntuple container (one to one to helix)
0069     auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
0070     assert(tkid < foundNtuplets->nOnes());
0071 
0072     ptkids[local_idx] = tkid;
0073 
0074     auto nHits = foundNtuplets->size(tkid);
0075 
0076     assert(nHits >= nHitsL);
0077     assert(nHits <= nHitsH);
0078 
0079     riemannFit::Map3xNd<N> hits(phits + local_idx);
0080     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0081     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0082 
0083 #ifdef BL_DUMP_HITS
0084     __shared__ int done;
0085     done = 0;
0086     __syncthreads();
0087     bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1));
0088 #endif
0089 
0090     // Prepare data structure
0091     auto const *hitId = foundNtuplets->begin(tkid);
0092 
0093     // #define YERR_FROM_DC
0094 #ifdef YERR_FROM_DC
0095     // try to compute more precise error in y
0096     auto dx = hhp->xGlobal(hitId[hitsInFit - 1]) - hhp->xGlobal(hitId[0]);
0097     auto dy = hhp->yGlobal(hitId[hitsInFit - 1]) - hhp->yGlobal(hitId[0]);
0098     auto dz = hhp->zGlobal(hitId[hitsInFit - 1]) - hhp->zGlobal(hitId[0]);
0099     float ux, uy, uz;
0100 #endif
0101 
0102     float incr = std::max(1.f, float(nHits) / float(hitsInFit));
0103     float n = 0;
0104     for (uint32_t i = 0; i < hitsInFit; ++i) {
0105       int j = int(n + 0.5f);  // round
0106       if (hitsInFit - 1 == i)
0107         j = nHits - 1;  // force last hit to ensure max lever arm.
0108       assert(j < int(nHits));
0109       n += incr;
0110       auto hit = hitId[j];
0111       float ge[6];
0112 
0113 #ifdef YERR_FROM_DC
0114       auto const &dp = hhp->cpeParams().detParams(hhp->detectorIndex(hit));
0115       auto status = hhp->status(hit);
0116       int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
0117       assert(qbin >= 0 && qbin < 5);
0118       bool nok = (status.isBigY | status.isOneY);
0119       // compute cotanbeta and use it to recompute error
0120       dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
0121       auto cb = std::abs(uy / uz);
0122       int bin =
0123           int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
0124       int low_value = 0;
0125       int high_value = CPEFastParametrisation::kNumErrorBins - 1;
0126       // return estimated bin value truncated to [0, 15]
0127       bin = std::clamp(bin, low_value, high_value);
0128       float yerr = dp.sigmay[bin] * 1.e-4f;  // toCM
0129       yerr *= dp.yfact[qbin];                // inflate
0130       yerr *= yerr;
0131       yerr += dp.apeYY;
0132       yerr = nok ? hhp->yerrLocal(hit) : yerr;
0133       dp.frame.toGlobal(hhp->xerrLocal(hit), 0, yerr, ge);
0134 #else
0135       hhp->cpeParams()
0136           .detParams(hhp->detectorIndex(hit))
0137           .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge);
0138 #endif
0139 
0140 #ifdef BL_DUMP_HITS
0141       bool dump = foundNtuplets->size(tkid) == 5;
0142       if (dump) {
0143         printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
0144                local_idx,
0145                tkid,
0146                hit,
0147                hhp->detectorIndex(hit),
0148                i,
0149                hhp->xGlobal(hit),
0150                hhp->yGlobal(hit),
0151                hhp->zGlobal(hit));
0152         printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
0153       }
0154 #endif
0155 
0156       hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit);
0157       hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
0158     }
0159     brokenline::fastFit(hits, fast_fit);
0160 
0161     // no NaN here....
0162     assert(fast_fit(0) == fast_fit(0));
0163     assert(fast_fit(1) == fast_fit(1));
0164     assert(fast_fit(2) == fast_fit(2));
0165     assert(fast_fit(3) == fast_fit(3));
0166   }
0167 }
0168 
0169 template <int N>
0170 __global__ void kernel_BLFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity,
0171                              double bField,
0172                              OutputSoA *results,
0173                              tindex_type const *__restrict__ ptkids,
0174                              double *__restrict__ phits,
0175                              float *__restrict__ phits_ge,
0176                              double *__restrict__ pfast_fit) {
0177   assert(results);
0178   assert(pfast_fit);
0179 
0180   // same as above...
0181 
0182   // look in bin for this hit multiplicity
0183   auto local_start = blockIdx.x * blockDim.x + threadIdx.x;
0184   for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt;
0185        local_idx += gridDim.x * blockDim.x) {
0186     if (invalidTkId == ptkids[local_idx])
0187       break;
0188 
0189     auto tkid = ptkids[local_idx];
0190 
0191     assert(tkid < caConstants::maxTuples);
0192 
0193     riemannFit::Map3xNd<N> hits(phits + local_idx);
0194     riemannFit::Map4d fast_fit(pfast_fit + local_idx);
0195     riemannFit::Map6xNf<N> hits_ge(phits_ge + local_idx);
0196 
0197     brokenline::PreparedBrokenLineData<N> data;
0198 
0199     brokenline::karimaki_circle_fit circle;
0200     riemannFit::LineFit line;
0201 
0202     brokenline::prepareBrokenLineData(hits, fast_fit, bField, data);
0203     brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
0204     brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);
0205 
0206     results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
0207     results->pt(tkid) = float(bField) / float(std::abs(circle.par(2)));
0208     results->eta(tkid) = asinhf(line.par(0));
0209     results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5);
0210 
0211 #ifdef BROKENLINE_DEBUG
0212     if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
0213       printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
0214     printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
0215            N,
0216            nHits,
0217            tkid,
0218            circle.par(0),
0219            circle.par(1),
0220            circle.par(2));
0221     printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
0222     printf("kernelBLHits chi2 cov %f/%f  %e,%e,%e,%e,%e\n",
0223            circle.chi2,
0224            line.chi2,
0225            circle.cov(0, 0),
0226            circle.cov(1, 1),
0227            circle.cov(2, 2),
0228            line.cov(0, 0),
0229            line.cov(1, 1));
0230 #endif
0231   }
0232 }