Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-31 23:02:15

0001 //
0002 // Original Author: Felice Pantaleo, CERN
0003 //
0004 
0005 // #define NTUPLE_DEBUG
0006 // #define GPU_DEBUG
0007 
0008 #include <cmath>
0009 #include <cstdint>
0010 #include <limits>
0011 
0012 #include <cuda_runtime.h>
0013 
0014 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0016 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0017 
0018 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0019 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0020 
0021 #include "CAStructures.h"
0022 #include "CAHitNtupletGeneratorKernels.h"
0023 #include "GPUCACell.h"
0024 #include "gpuFishbone.h"
0025 #include "gpuPixelDoublets.h"
0026 
0027 namespace caHitNtupletGeneratorKernels {
0028 
0029   constexpr uint32_t tkNotFound = std::numeric_limits<uint16_t>::max();
0030   constexpr float maxScore = std::numeric_limits<float>::max();
0031   constexpr float nSigma2 = 25.f;
0032 
0033   //all of these below are mostly to avoid brining around the relative namespace
0034 
0035   template <typename TrackerTraits>
0036   using HitToTuple = caStructures::HitToTupleT<TrackerTraits>;
0037 
0038   template <typename TrackerTraits>
0039   using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0040 
0041   template <typename TrackerTraits>
0042   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0043 
0044   template <typename TrackerTraits>
0045   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0046 
0047   template <typename TrackerTraits>
0048   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0049 
0050   using Quality = pixelTrack::Quality;
0051 
0052   template <typename TrackerTraits>
0053   using TkSoAView = TrackSoAView<TrackerTraits>;
0054 
0055   template <typename TrackerTraits>
0056   using HitContainer = typename TrackSoA<TrackerTraits>::HitContainer;
0057 
0058   template <typename TrackerTraits>
0059   using HitsConstView = typename GPUCACellT<TrackerTraits>::HitsConstView;
0060 
0061   template <typename TrackerTraits>
0062   using QualityCuts = pixelTrack::QualityCutsT<TrackerTraits>;
0063 
0064   template <typename TrackerTraits>
0065   using CAParams = caHitNtupletGenerator::CAParamsT<TrackerTraits>;
0066 
0067   using Counters = caHitNtupletGenerator::Counters;
0068 
0069   template <typename TrackerTraits>
0070   __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
0071                                         TupleMultiplicity<TrackerTraits> const *tupleMultiplicity,
0072                                         HitToTuple<TrackerTraits> const *hitToTuple,
0073                                         cms::cuda::AtomicPairCounter *apc,
0074                                         GPUCACellT<TrackerTraits> const *__restrict__ cells,
0075                                         uint32_t const *__restrict__ nCells,
0076                                         CellNeighborsVector<TrackerTraits> const *cellNeighbors,
0077                                         CellTracksVector<TrackerTraits> const *cellTracks,
0078                                         OuterHitOfCell<TrackerTraits> const isOuterHitOfCell,
0079                                         int32_t nHits,
0080                                         uint32_t maxNumberOfDoublets,
0081                                         Counters *counters) {
0082     auto first = threadIdx.x + blockIdx.x * blockDim.x;
0083 
0084     auto &c = *counters;
0085     // counters once per event
0086     if (0 == first) {
0087       atomicAdd(&c.nEvents, 1);
0088       atomicAdd(&c.nHits, nHits);
0089       atomicAdd(&c.nCells, *nCells);
0090       atomicAdd(&c.nTuples, apc->get().m);
0091       atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
0092     }
0093 
0094 #ifdef NTUPLE_DEBUG
0095     if (0 == first) {
0096       printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
0097              *nCells,
0098              apc->get().m,
0099              apc->get().n,
0100              nHits,
0101              hitToTuple->totOnes());
0102       if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
0103         assert(tracks_view.hitIndices().size(apc->get().m) == 0);
0104         assert(tracks_view.hitIndices().size() == apc->get().n);
0105       }
0106     }
0107 
0108     for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
0109       if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack)  // current real limit
0110         printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
0111       assert(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
0112       for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
0113         assert(int(*ih) < nHits);
0114     }
0115 #endif
0116 
0117     if (0 == first) {
0118       if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
0119         printf("Tuples overflow\n");
0120       if (*nCells >= maxNumberOfDoublets)
0121         printf("Cells overflow\n");
0122       if (cellNeighbors && cellNeighbors->full())
0123         printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
0124       if (cellTracks && cellTracks->full())
0125         printf("cellTracks overflow\n");
0126       if (int(hitToTuple->nOnes()) < nHits)
0127         printf("ERROR hitToTuple  overflow %d %d\n", hitToTuple->nOnes(), nHits);
0128 #ifdef GPU_DEBUG
0129       printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
0130              cellNeighbors->size(),
0131              cellTracks->size(),
0132              hitToTuple->size());
0133 // printf("cellTracksSizes;");
0134 // for (int i = 0; i < cellTracks->size(); i++) {
0135 //   printf("%d;",cellTracks[i].size());
0136 // }
0137 //
0138 // printf("\n");
0139 // printf("cellNeighborsSizes;");
0140 // for (int i = 0; i < cellNeighbors->size(); i++) {
0141 //   printf("%d;",cellNeighbors[i].size());
0142 // }
0143 // printf("\n");
0144 #endif
0145     }
0146 
0147     for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0148       auto const &thisCell = cells[idx];
0149       if (thisCell.hasFishbone() && !thisCell.isKilled())
0150         atomicAdd(&c.nFishCells, 1);
0151       if (thisCell.outerNeighbors().full())  //++tooManyNeighbors[thisCell.theLayerPairId];
0152         printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
0153       if (thisCell.tracks().full())  //++tooManyTracks[thisCell.theLayerPairId];
0154         printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
0155       if (thisCell.isKilled())
0156         atomicAdd(&c.nKilledCells, 1);
0157       if (!thisCell.unused())
0158         atomicAdd(&c.nEmptyCells, 1);
0159       if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
0160         atomicAdd(&c.nZeroTrackCells, 1);
0161     }
0162 
0163     for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
0164       if (isOuterHitOfCell.container[idx].full())  // ++tooManyOuterHitOfCell;
0165         printf("OuterHitOfCell overflow %d\n", idx);
0166     }
0167   }
0168 
0169   template <typename TrackerTraits>
0170   __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
0171                                          uint32_t const *__restrict__ nCells,
0172                                          TkSoAView<TrackerTraits> tracks_view) {
0173     constexpr auto reject = pixelTrack::Quality::dup;
0174 
0175     auto first = threadIdx.x + blockIdx.x * blockDim.x;
0176     for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0177       auto const &thisCell = cells[idx];
0178       if (!thisCell.isKilled())
0179         continue;
0180 
0181       for (auto it : thisCell.tracks())
0182         tracks_view[it].quality() = reject;
0183     }
0184   }
0185 
0186   // remove shorter tracks if sharing a cell
0187   // It does not seem to affect efficiency in any way!
0188   template <typename TrackerTraits>
0189   __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
0190                                                uint32_t const *__restrict__ nCells,
0191                                                TkSoAView<TrackerTraits> tracks_view,
0192                                                bool dupPassThrough) {
0193     // quality to mark rejected
0194     constexpr auto reject = pixelTrack::Quality::edup;  /// cannot be loose
0195 
0196     assert(nCells);
0197     auto first = threadIdx.x + blockIdx.x * blockDim.x;
0198     for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0199       auto const &thisCell = cells[idx];
0200 
0201       if (thisCell.tracks().size() < 2)
0202         continue;
0203 
0204       int8_t maxNl = 0;
0205 
0206       // find maxNl
0207       for (auto it : thisCell.tracks()) {
0208         auto nl = tracks_view[it].nLayers();
0209         maxNl = std::max(nl, maxNl);
0210       }
0211 
0212       // if (maxNl<4) continue;
0213       // quad pass through (leave it her for tests)
0214       //  maxNl = std::min(4, maxNl);
0215 
0216       for (auto it : thisCell.tracks()) {
0217         if (tracks_view[it].nLayers() < maxNl)
0218           tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0219       }
0220     }
0221   }
0222 
0223   // assume the above (so, short tracks already removed)
0224   template <typename TrackerTraits>
0225   __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
0226                                               uint32_t const *__restrict__ nCells,
0227                                               TkSoAView<TrackerTraits> tracks_view,
0228                                               bool dupPassThrough) {
0229     // quality to mark rejected
0230     auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0231     constexpr auto loose = pixelTrack::Quality::loose;
0232 
0233     assert(nCells);
0234 
0235     auto first = threadIdx.x + blockIdx.x * blockDim.x;
0236     for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0237       auto const &thisCell = cells[idx];
0238       if (thisCell.tracks().size() < 2)
0239         continue;
0240 
0241       float mc = maxScore;
0242       uint16_t im = tkNotFound;
0243 
0244       auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
0245 
0246       // full crazy combinatorics
0247       // full crazy combinatorics
0248       int ntr = thisCell.tracks().size();
0249       for (int i = 0; i < ntr - 1; ++i) {
0250         auto it = thisCell.tracks()[i];
0251         auto qi = tracks_view[it].quality();
0252         if (qi <= reject)
0253           continue;
0254         auto opi = tracks_view[it].state()(2);
0255         auto e2opi = tracks_view[it].covariance()(9);
0256         auto cti = tracks_view[it].state()(3);
0257         auto e2cti = tracks_view[it].covariance()(12);
0258         for (auto j = i + 1; j < ntr; ++j) {
0259           auto jt = thisCell.tracks()[j];
0260           auto qj = tracks_view[jt].quality();
0261           if (qj <= reject)
0262             continue;
0263           auto opj = tracks_view[jt].state()(2);
0264           auto ctj = tracks_view[jt].state()(3);
0265           auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0266           if ((cti - ctj) * (cti - ctj) > dct)
0267             continue;
0268           auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0269           if ((opi - opj) * (opi - opj) > dop)
0270             continue;
0271           if ((qj < qi) || (qj == qi && score(it) < score(jt)))
0272             tracks_view[jt].quality() = reject;
0273           else {
0274             tracks_view[it].quality() = reject;
0275             break;
0276           }
0277         }
0278       }
0279 
0280       // find maxQual
0281       auto maxQual = reject;  // no duplicate!
0282       for (auto it : thisCell.tracks()) {
0283         if (tracks_view[it].quality() > maxQual)
0284           maxQual = tracks_view[it].quality();
0285       }
0286 
0287       if (maxQual <= loose)
0288         continue;
0289 
0290       // find min score
0291       for (auto it : thisCell.tracks()) {
0292         if (tracks_view[it].quality() == maxQual && score(it) < mc) {
0293           mc = score(it);
0294           im = it;
0295         }
0296       }
0297 
0298       if (tkNotFound == im)
0299         continue;
0300 
0301       // mark all other duplicates  (not yet, keep it loose)
0302       for (auto it : thisCell.tracks()) {
0303         if (tracks_view[it].quality() > loose && it != im)
0304           tracks_view[it].quality() = loose;  //no race:  simple assignment of the same constant
0305       }
0306     }
0307   }
0308 
0309   template <typename TrackerTraits>
0310   __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
0311                                  cms::cuda::AtomicPairCounter *apc2,  // just to zero them,
0312                                  HitsConstView<TrackerTraits> hh,
0313                                  GPUCACellT<TrackerTraits> *cells,
0314                                  uint32_t const *__restrict__ nCells,
0315                                  CellNeighborsVector<TrackerTraits> *cellNeighbors,
0316                                  OuterHitOfCell<TrackerTraits> const isOuterHitOfCell,
0317                                  CAParams<TrackerTraits> params) {
0318     using Cell = GPUCACellT<TrackerTraits>;
0319 
0320     auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
0321     auto first = threadIdx.x;
0322     auto stride = blockDim.x;
0323 
0324     if (0 == (firstCellIndex + first)) {
0325       (*apc1) = 0;
0326       (*apc2) = 0;
0327     }  // ready for next kernel
0328 
0329     constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
0330     constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
0331     for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
0332       auto cellIndex = idx;
0333       auto &thisCell = cells[idx];
0334       auto innerHitId = thisCell.inner_hit_id();
0335       if (int(innerHitId) < isOuterHitOfCell.offset)
0336         continue;
0337       int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
0338       auto vi = isOuterHitOfCell[innerHitId].data();
0339 
0340       auto ri = thisCell.inner_r(hh);
0341       auto zi = thisCell.inner_z(hh);
0342 
0343       auto ro = thisCell.outer_r(hh);
0344       auto zo = thisCell.outer_z(hh);
0345       auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
0346 
0347       for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
0348         auto otherCell = __ldg(vi + j);
0349         auto &oc = cells[otherCell];
0350         auto r1 = oc.inner_r(hh);
0351         auto z1 = oc.inner_z(hh);
0352         bool aligned = Cell::areAlignedRZ(
0353             r1,
0354             z1,
0355             ri,
0356             zi,
0357             ro,
0358             zo,
0359             params.ptmin_,
0360             isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_);  // 2.f*thetaCut); // FIXME tune cuts
0361         if (aligned && thisCell.dcaCut(hh,
0362                                        oc,
0363                                        oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
0364                                                                                    : params.dcaCutOuterTriplet_,
0365                                        params.hardCurvCut_)) {  // FIXME tune cuts
0366           oc.addOuterNeighbor(cellIndex, *cellNeighbors);
0367           thisCell.setStatusBits(Cell::StatusBit::kUsed);
0368           oc.setStatusBits(Cell::StatusBit::kUsed);
0369         }
0370       }  // loop on inner cells
0371     }    // loop on outer cells
0372   }
0373 
0374   template <typename TrackerTraits>
0375   __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
0376                                        TkSoAView<TrackerTraits> tracks_view,
0377                                        GPUCACellT<TrackerTraits> *__restrict__ cells,
0378                                        uint32_t const *nCells,
0379                                        CellTracksVector<TrackerTraits> *cellTracks,
0380                                        cms::cuda::AtomicPairCounter *apc,
0381                                        CAParams<TrackerTraits> params) {
0382     // recursive: not obvious to widen
0383 
0384     using Cell = GPUCACellT<TrackerTraits>;
0385 
0386     auto first = threadIdx.x + blockIdx.x * blockDim.x;
0387 
0388 #ifdef GPU_DEBUG
0389     if (first == 0)
0390       printf("starting producing ntuplets from %d cells \n", *nCells);
0391 #endif
0392     for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0393       auto const &thisCell = cells[idx];
0394 
0395       if (thisCell.isKilled())
0396         continue;  // cut by earlyFishbone
0397 
0398       // we require at least three hits...
0399       if (thisCell.outerNeighbors().empty())
0400         continue;
0401 
0402       auto pid = thisCell.layerPairId();
0403       bool doit = params.startingLayerPair(pid);
0404 
0405       constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
0406       if (doit) {
0407         typename Cell::TmpTuple stack;
0408         stack.reset();
0409 
0410         bool bpix1Start = params.startAt0(pid);
0411 
0412         thisCell.template find_ntuplets<maxDepth>(hh,
0413                                                   cells,
0414                                                   *cellTracks,
0415                                                   tracks_view.hitIndices(),
0416                                                   *apc,
0417                                                   tracks_view.quality(),
0418                                                   stack,
0419                                                   params.minHitsPerNtuplet_,
0420                                                   bpix1Start);
0421 
0422         assert(stack.empty());
0423       }
0424     }
0425   }
0426   template <typename TrackerTraits>
0427   __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
0428     auto first = threadIdx.x + blockIdx.x * blockDim.x;
0429     using Cell = GPUCACellT<TrackerTraits>;
0430     for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0431       auto &thisCell = cells[idx];
0432       if (!thisCell.tracks().empty())
0433         thisCell.setStatusBits(Cell::StatusBit::kInTrack);
0434     }
0435   }
0436 
0437   template <typename TrackerTraits>
0438   __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
0439                                            TupleMultiplicity<TrackerTraits> *tupleMultiplicity) {
0440     auto first = blockIdx.x * blockDim.x + threadIdx.x;
0441     for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0442       auto nhits = tracks_view.hitIndices().size(it);
0443       if (nhits < 3)
0444         continue;
0445       if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0446         continue;
0447       assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0448       if (nhits > TrackerTraits::maxHitsOnTrack)  // current limit
0449         printf("wrong mult %d %d\n", it, nhits);
0450       assert(nhits <= TrackerTraits::maxHitsOnTrack);
0451       tupleMultiplicity->count(nhits);
0452     }
0453   }
0454 
0455   template <typename TrackerTraits>
0456   __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
0457                                           TupleMultiplicity<TrackerTraits> *tupleMultiplicity) {
0458     auto first = blockIdx.x * blockDim.x + threadIdx.x;
0459     for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0460       auto nhits = tracks_view.hitIndices().size(it);
0461       if (nhits < 3)
0462         continue;
0463       if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0464         continue;
0465       assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0466       if (nhits > TrackerTraits::maxHitsOnTrack)
0467         printf("wrong mult %d %d\n", it, nhits);
0468       assert(nhits <= TrackerTraits::maxHitsOnTrack);
0469       tupleMultiplicity->fill(nhits, it);
0470     }
0471   }
0472 
0473   ///TODO : why there was quality here?
0474   template <typename TrackerTraits>
0475   __global__ void kernel_classifyTracks(TkSoAView<TrackerTraits> tracks_view, QualityCuts<TrackerTraits> cuts) {
0476     // Quality *__restrict__ quality) {
0477     int first = blockDim.x * blockIdx.x + threadIdx.x;
0478     for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0479       auto nhits = tracks_view.hitIndices().size(it);
0480       if (nhits == 0)
0481         break;  // guard
0482 
0483       // if duplicate: not even fit
0484       if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0485         continue;
0486 
0487       assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0488 
0489       // mark doublets as bad
0490       if (nhits < 3)
0491         continue;
0492 
0493       // if the fit has any invalid parameters, mark it as bad
0494       bool isNaN = false;
0495       for (int i = 0; i < 5; ++i) {
0496         isNaN |= std::isnan(tracks_view[it].state()(i));
0497       }
0498       if (isNaN) {
0499 #ifdef NTUPLE_DEBUG
0500         printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
0501 #endif
0502         continue;
0503       }
0504 
0505       tracks_view[it].quality() = pixelTrack::Quality::strict;
0506 
0507       if (cuts.strictCut(tracks_view, it))
0508         continue;
0509 
0510       tracks_view[it].quality() = pixelTrack::Quality::tight;
0511 
0512       if (cuts.isHP(tracks_view, nhits, it))
0513         tracks_view[it].quality() = pixelTrack::Quality::highPurity;
0514     }
0515   }
0516 
0517   template <typename TrackerTraits>
0518   __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
0519     int first = blockDim.x * blockIdx.x + threadIdx.x;
0520     for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0521       if (tracks_view.hitIndices().size(idx) == 0)
0522         break;  //guard
0523       if (tracks_view[idx].quality() < pixelTrack::Quality::loose)
0524         continue;
0525       atomicAdd(&(counters->nLooseTracks), 1);
0526       if (tracks_view[idx].quality() < pixelTrack::Quality::strict)
0527         continue;
0528       atomicAdd(&(counters->nGoodTracks), 1);
0529     }
0530   }
0531 
0532   template <typename TrackerTraits>
0533   __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
0534     int first = blockDim.x * blockIdx.x + threadIdx.x;
0535     for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0536       if (tracks_view.hitIndices().size(idx) == 0)
0537         break;  // guard
0538       for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0539         hitToTuple->count(*h);
0540     }
0541   }
0542 
0543   template <typename TrackerTraits>
0544   __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
0545     int first = blockDim.x * blockIdx.x + threadIdx.x;
0546     for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0547       if (tracks_view.hitIndices().size(idx) == 0)
0548         break;  // guard
0549       for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0550         hitToTuple->fill(*h, idx);
0551     }
0552   }
0553 
0554   template <typename TrackerTraits>
0555   __global__ void kernel_fillHitDetIndices(TkSoAView<TrackerTraits> tracks_view, HitsConstView<TrackerTraits> hh) {
0556     int first = blockDim.x * blockIdx.x + threadIdx.x;
0557     // copy offsets
0558     for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0559       tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
0560     }
0561     // fill hit indices
0562     auto nhits = hh.nHits();
0563 
0564     for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
0565       assert(tracks_view.hitIndices().content[idx] < nhits);
0566       tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
0567     }
0568   }
0569 
0570   template <typename TrackerTraits>
0571   __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
0572     auto first = blockIdx.x * blockDim.x + threadIdx.x;
0573     // clamp the number of tracks to the capacity of the SoA
0574     auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
0575     if (0 == first)
0576       tracks_view.nTracks() = ntracks;
0577     for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
0578       auto nHits = TracksUtilities<TrackerTraits>::nHits(tracks_view, idx);
0579       assert(nHits >= 3);
0580       tracks_view[idx].nLayers() = TracksUtilities<TrackerTraits>::computeNumberOfLayers(tracks_view, idx);
0581     }
0582   }
0583 
0584   template <typename TrackerTraits>
0585   __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
0586                                                Counters *counters) {
0587     auto &c = *counters;
0588     int first = blockDim.x * blockIdx.x + threadIdx.x;
0589     for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0590       if (hitToTuple->size(idx) == 0)
0591         continue;  // SHALL NOT BE break
0592       atomicAdd(&c.nUsedHits, 1);
0593       if (hitToTuple->size(idx) > 1)
0594         atomicAdd(&c.nDupHits, 1);
0595     }
0596   }
0597 
0598   template <typename TrackerTraits>
0599   __global__ void kernel_countSharedHit(int *__restrict__ nshared,
0600                                         HitContainer<TrackerTraits> const *__restrict__ ptuples,
0601                                         Quality const *__restrict__ quality,
0602                                         HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0603     constexpr auto loose = pixelTrack::Quality::loose;
0604 
0605     auto &hitToTuple = *phitToTuple;
0606     auto const &foundNtuplets = *ptuples;
0607 
0608     int first = blockDim.x * blockIdx.x + threadIdx.x;
0609     for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0610       if (hitToTuple.size(idx) < 2)
0611         continue;
0612 
0613       int nt = 0;
0614 
0615       // count "good" tracks
0616       for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0617         if (quality[*it] < loose)
0618           continue;
0619         ++nt;
0620       }
0621 
0622       if (nt < 2)
0623         continue;
0624 
0625       // now mark  each track triplet as sharing a hit
0626       for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0627         if (foundNtuplets.size(*it) > 3)
0628           continue;
0629         atomicAdd(&nshared[*it], 1);
0630       }
0631 
0632     }  //  hit loop
0633   }
0634 
0635   template <typename TrackerTraits>
0636   __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
0637                                        HitContainer<TrackerTraits> const *__restrict__ tuples,
0638                                        Quality *__restrict__ quality,
0639                                        bool dupPassThrough) {
0640     // constexpr auto bad = pixelTrack::Quality::bad;
0641     constexpr auto dup = pixelTrack::Quality::dup;
0642     constexpr auto loose = pixelTrack::Quality::loose;
0643     // constexpr auto strict = pixelTrack::Quality::strict;
0644 
0645     // quality to mark rejected
0646     auto const reject = dupPassThrough ? loose : dup;
0647 
0648     int first = blockDim.x * blockIdx.x + threadIdx.x;
0649     for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0650       if (tuples->size(idx) == 0)
0651         break;  //guard
0652       if (quality[idx] <= reject)
0653         continue;
0654       if (nshared[idx] > 2)
0655         quality[idx] = reject;
0656     }
0657   }
0658 
0659   // mostly for very forward triplets.....
0660   template <typename TrackerTraits>
0661   __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
0662                                          uint16_t nmin,
0663                                          bool dupPassThrough,
0664                                          HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0665     // quality to mark rejected
0666     auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0667 
0668     auto &hitToTuple = *phitToTuple;
0669 
0670     int first = blockDim.x * blockIdx.x + threadIdx.x;
0671     for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0672       if (hitToTuple.size(idx) < 2)
0673         continue;
0674 
0675       auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
0676 
0677       // full combinatorics
0678       for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
0679         auto const it = *ip;
0680         auto qi = tracks_view[it].quality();
0681         if (qi <= reject)
0682           continue;
0683         auto opi = tracks_view[it].state()(2);
0684         auto e2opi = tracks_view[it].covariance()(9);
0685         auto cti = tracks_view[it].state()(3);
0686         auto e2cti = tracks_view[it].covariance()(12);
0687         auto nli = tracks_view[it].nLayers();
0688         for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
0689           auto const jt = *jp;
0690           auto qj = tracks_view[jt].quality();
0691           if (qj <= reject)
0692             continue;
0693           auto opj = tracks_view[jt].state()(2);
0694           auto ctj = tracks_view[jt].state()(3);
0695           auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0696           if ((cti - ctj) * (cti - ctj) > dct)
0697             continue;
0698           auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0699           if ((opi - opj) * (opi - opj) > dop)
0700             continue;
0701           auto nlj = tracks_view[jt].nLayers();
0702           if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
0703             tracks_view[jt].quality() = reject;
0704           else {
0705             tracks_view[it].quality() = reject;
0706             break;
0707           }
0708         }
0709       }
0710     }
0711   }
0712 
0713   template <typename TrackerTraits>
0714   __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
0715                                           TkSoAView<TrackerTraits> tracks_view,
0716                                           int nmin,
0717                                           bool dupPassThrough,
0718                                           HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0719     // quality to mark rejected
0720     auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0721     // quality of longest track
0722     auto const longTqual = pixelTrack::Quality::highPurity;
0723 
0724     auto &hitToTuple = *phitToTuple;
0725 
0726     int l1end = hh.hitsLayerStart()[1];
0727 
0728     int first = blockDim.x * blockIdx.x + threadIdx.x;
0729     for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0730       if (hitToTuple.size(idx) < 2)
0731         continue;
0732 
0733       int8_t maxNl = 0;
0734 
0735       // find maxNl
0736       for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0737         if (tracks_view[*it].quality() < longTqual)
0738           continue;
0739         // if (tracks_view[*it].nHits()==3) continue;
0740         auto nl = tracks_view[*it].nLayers();
0741         maxNl = std::max(nl, maxNl);
0742       }
0743 
0744       if (maxNl < 4)
0745         continue;
0746 
0747       // quad pass through (leave for tests)
0748       // maxNl = std::min(4, maxNl);
0749 
0750       // kill all tracks shorter than maxHl (only triplets???
0751       for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0752         auto nl = tracks_view[*it].nLayers();
0753 
0754         //checking if shared hit is on bpix1 and if the tuple is short enough
0755         if (idx < l1end and nl > nmin)
0756           continue;
0757 
0758         if (nl < maxNl && tracks_view[*it].quality() > reject)
0759           tracks_view[*it].quality() = reject;
0760       }
0761     }
0762   }
0763 
0764   template <typename TrackerTraits>
0765   __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
0766                                         uint16_t nmin,
0767                                         bool dupPassThrough,
0768                                         HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0769     // quality to mark rejected
0770     auto const reject = pixelTrack::Quality::loose;
0771     /// min quality of good
0772     auto const good = pixelTrack::Quality::strict;
0773 
0774     auto &hitToTuple = *phitToTuple;
0775 
0776     int first = blockDim.x * blockIdx.x + threadIdx.x;
0777     for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0778       if (hitToTuple.size(idx) < 2)
0779         continue;
0780 
0781       float mc = maxScore;
0782       uint16_t im = tkNotFound;
0783       bool onlyTriplets = true;
0784 
0785       // check if only triplets
0786       for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0787         if (tracks_view[*it].quality() <= good)
0788           continue;
0789         onlyTriplets &= TracksUtilities<TrackerTraits>::isTriplet(tracks_view, *it);
0790         if (!onlyTriplets)
0791           break;
0792       }
0793 
0794       // only triplets
0795       if (!onlyTriplets)
0796         continue;
0797 
0798       // for triplets choose best tip!  (should we first find best quality???)
0799       for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0800         auto const it = *ip;
0801         if (tracks_view[it].quality() >= good && std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
0802           mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
0803           im = it;
0804         }
0805       }
0806 
0807       if (tkNotFound == im)
0808         continue;
0809 
0810       // mark worse ambiguities
0811       for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0812         auto const it = *ip;
0813         if (tracks_view[it].quality() > reject && it != im)
0814           tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0815       }
0816 
0817     }  // loop over hits
0818   }
0819 
0820   template <typename TrackerTraits>
0821   __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
0822                                               uint16_t nmin,
0823                                               bool dupPassThrough,
0824                                               HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0825     // quality to mark rejected
0826     auto const reject = pixelTrack::Quality::loose;
0827     /// min quality of good
0828     auto const good = pixelTrack::Quality::loose;
0829 
0830     auto &hitToTuple = *phitToTuple;
0831 
0832     int first = blockDim.x * blockIdx.x + threadIdx.x;
0833     for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0834       if (hitToTuple.size(idx) < 2)
0835         continue;
0836 
0837       float mc = maxScore;
0838       uint16_t im = tkNotFound;
0839 
0840       // choose best tip!  (should we first find best quality???)
0841       for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0842         auto const it = *ip;
0843         if (tracks_view[it].quality() >= good && std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
0844           mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
0845           im = it;
0846         }
0847       }
0848 
0849       if (tkNotFound == im)
0850         continue;
0851 
0852       // mark worse ambiguities
0853       for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0854         auto const it = *ip;
0855         if (tracks_view[it].quality() > reject && TracksUtilities<TrackerTraits>::isTriplet(tracks_view, it) &&
0856             it != im)
0857           tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0858       }
0859 
0860     }  // loop over hits
0861   }
0862 
0863   template <typename TrackerTraits>
0864   __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
0865                                               TkSoAView<TrackerTraits> tracks_view,
0866                                               HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
0867                                               int32_t firstPrint,
0868                                               int32_t lastPrint,
0869                                               int iev) {
0870     constexpr auto loose = pixelTrack::Quality::loose;
0871 
0872     int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
0873     for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
0874          i += blockDim.x * gridDim.x) {
0875       auto nh = tracks_view.hitIndices().size(i);
0876       if (nh < 3)
0877         continue;
0878       if (tracks_view[i].quality() < loose)
0879         continue;
0880       printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
0881              10000 * iev + i,
0882              int(tracks_view[i].quality()),
0883              nh,
0884              tracks_view[i].nLayers(),
0885              TracksUtilities<TrackerTraits>::charge(tracks_view, i),
0886              tracks_view[i].pt(),
0887              tracks_view[i].eta(),
0888              TracksUtilities<TrackerTraits>::phi(tracks_view, i),
0889              TracksUtilities<TrackerTraits>::tip(tracks_view, i),
0890              TracksUtilities<TrackerTraits>::zip(tracks_view, i),
0891              tracks_view[i].chi2(),
0892              hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
0893              hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
0894              hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
0895              nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
0896              nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
0897              nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
0898              nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
0899     }
0900   }
0901 
0902   __global__ void kernel_printCounters(Counters const *counters) {
0903     auto const &c = *counters;
0904     printf(
0905         "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks  |  nLooseTracks  |  nGoodTracks | nUsedHits | "
0906         "nDupHits | "
0907         "nFishCells | "
0908         "nKilledCells | "
0909         "nUsedCells | nZeroTrackCells ||\n");
0910     printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
0911            c.nEvents,
0912            c.nHits,
0913            c.nCells,
0914            c.nTuples,
0915            c.nFitTracks,
0916            c.nLooseTracks,
0917            c.nGoodTracks,
0918            c.nUsedHits,
0919            c.nDupHits,
0920            c.nFishCells,
0921            c.nKilledCells,
0922            c.nEmptyCells,
0923            c.nZeroTrackCells);
0924     printf(
0925         "Counters Norm %lld ||  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.3f|  %.3f|  %.3f|  %.3f||\n",
0926         c.nEvents,
0927         c.nHits / double(c.nEvents),
0928         c.nCells / double(c.nEvents),
0929         c.nTuples / double(c.nEvents),
0930         c.nFitTracks / double(c.nEvents),
0931         c.nLooseTracks / double(c.nEvents),
0932         c.nGoodTracks / double(c.nEvents),
0933         c.nUsedHits / double(c.nEvents),
0934         c.nDupHits / double(c.nEvents),
0935         c.nFishCells / double(c.nCells),
0936         c.nKilledCells / double(c.nCells),
0937         c.nEmptyCells / double(c.nCells),
0938         c.nZeroTrackCells / double(c.nCells));
0939   }
0940 
0941 }  // namespace caHitNtupletGeneratorKernels