Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:38:01

0001 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h
0002 #define RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h
0003 
0004 //#define GPU_DEBUG
0005 //#define NTUPLE_DEBUG
0006 
0007 // C++ includes
0008 #include <cmath>
0009 #include <cstdint>
0010 #include <cstdio>
0011 #include <limits>
0012 #include <type_traits>
0013 
0014 // Alpaka includes
0015 #include <alpaka/alpaka.hpp>
0016 
0017 // CMSSW includes
0018 #include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
0019 #include "DataFormats/TrackSoA/interface/TracksSoA.h"
0020 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0021 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0022 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0023 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0024 #include "FWCore/Utilities/interface/isFinite.h"
0025 
0026 // local includes
0027 #include "CACell.h"
0028 #include "CAHitNtupletGeneratorKernels.h"
0029 #include "CAStructures.h"
0030 
0031 namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels {
0032 
0033   constexpr uint32_t tkNotFound = std::numeric_limits<uint16_t>::max();
0034   constexpr float maxScore = std::numeric_limits<float>::max();
0035   constexpr float nSigma2 = 25.f;
0036 
0037   // all of these below are mostly to avoid brining around the relative namespace
0038 
0039   template <typename TrackerTraits>
0040   using HitToTuple = caStructures::HitToTupleT<TrackerTraits>;
0041 
0042   template <typename TrackerTraits>
0043   using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0044 
0045   template <typename TrackerTraits>
0046   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0047 
0048   template <typename TrackerTraits>
0049   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0050 
0051   template <typename TrackerTraits>
0052   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0053 
0054   using Quality = ::pixelTrack::Quality;
0055 
0056   template <typename TrackerTraits>
0057   using TkSoAView = reco::TrackSoAView<TrackerTraits>;
0058 
0059   template <typename TrackerTraits>
0060   using HitContainer = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0061 
0062   template <typename TrackerTraits>
0063   using HitsConstView = typename CACellT<TrackerTraits>::HitsConstView;
0064 
0065   template <typename TrackerTraits>
0066   using QualityCuts = ::pixelTrack::QualityCutsT<TrackerTraits>;
0067 
0068   template <typename TrackerTraits>
0069   using CAParams = caHitNtupletGenerator::CAParamsT<TrackerTraits>;
0070 
0071   using Counters = caHitNtupletGenerator::Counters;
0072 
0073   template <typename TrackerTraits>
0074   class Kernel_checkOverflows {
0075   public:
0076     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0077     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0078                                   TkSoAView<TrackerTraits> tracks_view,
0079                                   TupleMultiplicity<TrackerTraits> const *tupleMultiplicity,
0080                                   HitToTuple<TrackerTraits> const *hitToTuple,
0081                                   cms::alpakatools::AtomicPairCounter *apc,
0082                                   CACellT<TrackerTraits> const *__restrict__ cells,
0083                                   uint32_t const *__restrict__ nCells,
0084                                   CellNeighborsVector<TrackerTraits> const *cellNeighbors,
0085                                   CellTracksVector<TrackerTraits> const *cellTracks,
0086                                   OuterHitOfCell<TrackerTraits> const *isOuterHitOfCell,
0087                                   int32_t nHits,
0088                                   uint32_t maxNumberOfDoublets,
0089                                   Counters *counters) const {
0090       auto &c = *counters;
0091       // counters once per event
0092       if (cms::alpakatools::once_per_grid(acc)) {
0093         alpaka::atomicAdd(acc, &c.nEvents, 1ull, alpaka::hierarchy::Blocks{});
0094         alpaka::atomicAdd(acc, &c.nHits, static_cast<unsigned long long>(nHits), alpaka::hierarchy::Blocks{});
0095         alpaka::atomicAdd(acc, &c.nCells, static_cast<unsigned long long>(*nCells), alpaka::hierarchy::Blocks{});
0096         alpaka::atomicAdd(
0097             acc, &c.nTuples, static_cast<unsigned long long>(apc->get().first), alpaka::hierarchy::Blocks{});
0098         alpaka::atomicAdd(acc,
0099                           &c.nFitTracks,
0100                           static_cast<unsigned long long>(tupleMultiplicity->size()),
0101                           alpaka::hierarchy::Blocks{});
0102       }
0103 
0104 #ifdef NTUPLE_DEBUGS
0105       if (cms::alpakatools::once_per_grid(acc)) {
0106         printf("number of found cells %d \n found tuples %d with total hits %d out of %d\n",
0107                *nCells,
0108                apc->get().first,
0109                apc->get().second,
0110                nHits);
0111         if (apc->get().first < TrackerTraits::maxNumberOfQuadruplets) {
0112           ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size(apc->get().first) == 0);
0113           ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size() == apc->get().second);
0114         }
0115       }
0116 
0117       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0118         if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack)  // current real limit
0119           printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
0120         ALPAKA_ASSERT_ACC(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
0121         for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
0122           ALPAKA_ASSERT_ACC(int(*ih) < nHits);
0123       }
0124 #endif
0125 
0126       if (cms::alpakatools::once_per_grid(acc)) {
0127         if (apc->get().first >= TrackerTraits::maxNumberOfQuadruplets)
0128           printf("Tuples overflow\n");
0129         if (*nCells >= maxNumberOfDoublets)
0130           printf("Cells overflow\n");
0131         if (cellNeighbors && cellNeighbors->full())
0132           printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
0133         if (cellTracks && cellTracks->full())
0134           printf("cellTracks overflow\n");
0135         if (int(hitToTuple->nOnes()) < nHits)
0136           printf("ERROR hitToTuple  overflow %d %d\n", hitToTuple->nOnes(), nHits);
0137 #ifdef GPU_DEBUG
0138         printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
0139                cellNeighbors->size(),
0140                cellTracks->size(),
0141                hitToTuple->size());
0142 #endif
0143       }
0144 
0145       for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) {
0146         auto const &thisCell = cells[idx];
0147         if (thisCell.hasFishbone() && !thisCell.isKilled())
0148           alpaka::atomicAdd(acc, &c.nFishCells, 1ull, alpaka::hierarchy::Blocks{});
0149         if (thisCell.outerNeighbors().full())  //++tooManyNeighbors[thisCell.theLayerPairId];
0150           printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
0151         if (thisCell.tracks().full())  //++tooManyTracks[thisCell.theLayerPairId];
0152           printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
0153         if (thisCell.isKilled())
0154           alpaka::atomicAdd(acc, &c.nKilledCells, 1ull, alpaka::hierarchy::Blocks{});
0155         if (!thisCell.unused())
0156           alpaka::atomicAdd(acc, &c.nEmptyCells, 1ull, alpaka::hierarchy::Blocks{});
0157         if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
0158           alpaka::atomicAdd(acc, &c.nZeroTrackCells, 1ull, alpaka::hierarchy::Blocks{});
0159       }
0160 
0161       // FIXME this loop was up to nHits - isOuterHitOfCell.offset in the CUDA version
0162       for (auto idx : cms::alpakatools::uniform_elements(acc, nHits))
0163         if ((*isOuterHitOfCell).container[idx].full())  // ++tooManyOuterHitOfCell;
0164           printf("OuterHitOfCell overflow %d\n", idx);
0165     }
0166   };
0167 
0168   template <typename TrackerTraits>
0169   class Kernel_fishboneCleaner {
0170   public:
0171     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0172     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0173                                   CACellT<TrackerTraits> const *cells,
0174                                   uint32_t const *__restrict__ nCells,
0175                                   TkSoAView<TrackerTraits> tracks_view) const {
0176       constexpr auto reject = Quality::dup;
0177 
0178       for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) {
0179         auto const &thisCell = cells[idx];
0180         if (!thisCell.isKilled())
0181           continue;
0182 
0183         for (auto it : thisCell.tracks())
0184           tracks_view[it].quality() = reject;
0185       }
0186     }
0187   };
0188 
0189   // remove shorter tracks if sharing a cell
0190   // It does not seem to affect efficiency in any way!
0191   template <typename TrackerTraits>
0192   class Kernel_earlyDuplicateRemover {
0193   public:
0194     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0195     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0196                                   CACellT<TrackerTraits> const *cells,
0197                                   uint32_t const *__restrict__ nCells,
0198                                   TkSoAView<TrackerTraits> tracks_view,
0199                                   bool dupPassThrough) const {
0200       // quality to mark rejected
0201       constexpr auto reject = Quality::edup;  /// cannot be loose
0202       ALPAKA_ASSERT_ACC(nCells);
0203       for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) {
0204         auto const &thisCell = cells[idx];
0205 
0206         if (thisCell.tracks().size() < 2)
0207           continue;
0208 
0209         int8_t maxNl = 0;
0210 
0211         // find maxNl
0212         for (auto it : thisCell.tracks()) {
0213           auto nl = tracks_view[it].nLayers();
0214           maxNl = std::max(nl, maxNl);
0215         }
0216 
0217         // if (maxNl<4) continue;
0218         // quad pass through (leave it here for tests)
0219         //  maxNl = std::min(4, maxNl);
0220 
0221         for (auto it : thisCell.tracks()) {
0222           if (tracks_view[it].nLayers() < maxNl)
0223             tracks_view[it].quality() = reject;  // no race: simple assignment of the same constant
0224         }
0225       }
0226     }
0227   };
0228 
0229   // assume the above (so, short tracks already removed)
0230   template <typename TrackerTraits>
0231   class Kernel_fastDuplicateRemover {
0232   public:
0233     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0234     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0235                                   CACellT<TrackerTraits> const *__restrict__ cells,
0236                                   uint32_t const *__restrict__ nCells,
0237                                   TkSoAView<TrackerTraits> tracks_view,
0238                                   bool dupPassThrough) const {
0239       // quality to mark rejected
0240       auto const reject = dupPassThrough ? Quality::loose : Quality::dup;
0241       constexpr auto loose = Quality::loose;
0242 
0243       ALPAKA_ASSERT_ACC(nCells);
0244       const auto ntNCells = (*nCells);
0245 
0246       for (auto idx : cms::alpakatools::uniform_elements(acc, ntNCells)) {
0247         auto const &thisCell = cells[idx];
0248         if (thisCell.tracks().size() < 2)
0249           continue;
0250 
0251         float mc = maxScore;
0252         uint16_t im = tkNotFound;
0253 
0254         auto score = [&](auto it) { return std::abs(reco::tip(tracks_view, it)); };
0255 
0256         // full crazy combinatorics
0257         int ntr = thisCell.tracks().size();
0258         for (int i = 0; i < ntr - 1; ++i) {
0259           auto it = thisCell.tracks()[i];
0260           auto qi = tracks_view[it].quality();
0261           if (qi <= reject)
0262             continue;
0263           auto opi = tracks_view[it].state()(2);
0264           auto e2opi = tracks_view[it].covariance()(9);
0265           auto cti = tracks_view[it].state()(3);
0266           auto e2cti = tracks_view[it].covariance()(12);
0267           for (auto j = i + 1; j < ntr; ++j) {
0268             auto jt = thisCell.tracks()[j];
0269             auto qj = tracks_view[jt].quality();
0270             if (qj <= reject)
0271               continue;
0272             auto opj = tracks_view[jt].state()(2);
0273             auto ctj = tracks_view[jt].state()(3);
0274             auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0275             if ((cti - ctj) * (cti - ctj) > dct)
0276               continue;
0277             auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0278             if ((opi - opj) * (opi - opj) > dop)
0279               continue;
0280             if ((qj < qi) || (qj == qi && score(it) < score(jt)))
0281               tracks_view[jt].quality() = reject;
0282             else {
0283               tracks_view[it].quality() = reject;
0284               break;
0285             }
0286           }
0287         }
0288 
0289         // find maxQual
0290         auto maxQual = reject;  // no duplicate!
0291         for (auto it : thisCell.tracks()) {
0292           if (tracks_view[it].quality() > maxQual)
0293             maxQual = tracks_view[it].quality();
0294         }
0295 
0296         if (maxQual <= loose)
0297           continue;
0298 
0299         // find min score
0300         for (auto it : thisCell.tracks()) {
0301           if (tracks_view[it].quality() == maxQual && score(it) < mc) {
0302             mc = score(it);
0303             im = it;
0304           }
0305         }
0306 
0307         if (tkNotFound == im)
0308           continue;
0309 
0310         // mark all other duplicates  (not yet, keep it loose)
0311         for (auto it : thisCell.tracks()) {
0312           if (tracks_view[it].quality() > loose && it != im)
0313             tracks_view[it].quality() = loose;  //no race:  simple assignment of the same constant
0314         }
0315       }
0316     }
0317   };
0318 
0319   template <typename TrackerTraits>
0320   class Kernel_connect {
0321   public:
0322     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0323     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0324                                   cms::alpakatools::AtomicPairCounter *apc1,
0325                                   cms::alpakatools::AtomicPairCounter *apc2,  // just to zero them
0326                                   HitsConstView<TrackerTraits> hh,
0327                                   CACellT<TrackerTraits> *cells,
0328                                   uint32_t *nCells,
0329                                   CellNeighborsVector<TrackerTraits> *cellNeighbors,
0330                                   OuterHitOfCell<TrackerTraits> const *isOuterHitOfCell,
0331                                   CAParams<TrackerTraits> params) const {
0332       using Cell = CACellT<TrackerTraits>;
0333 
0334       if (cms::alpakatools::once_per_grid(acc)) {
0335         *apc1 = 0;
0336         *apc2 = 0;
0337       }  // ready for next kernel
0338 
0339       // loop on outer cells
0340       for (uint32_t idx : cms::alpakatools::uniform_elements_y(acc, *nCells)) {
0341         auto cellIndex = idx;
0342         auto &thisCell = cells[idx];
0343         auto innerHitId = thisCell.inner_hit_id();
0344         if (int(innerHitId) < isOuterHitOfCell->offset)
0345           continue;
0346 
0347         uint32_t numberOfPossibleNeighbors = (*isOuterHitOfCell)[innerHitId].size();
0348         auto vi = (*isOuterHitOfCell)[innerHitId].data();
0349         auto ri = thisCell.inner_r(hh);
0350         auto zi = thisCell.inner_z(hh);
0351         auto ro = thisCell.outer_r(hh);
0352         auto zo = thisCell.outer_z(hh);
0353         auto isBarrel = thisCell.inner_detIndex(hh) < TrackerTraits::last_barrel_detIndex;
0354 
0355         // loop on inner cells
0356         for (uint32_t j : cms::alpakatools::independent_group_elements_x(acc, numberOfPossibleNeighbors)) {
0357           auto otherCell = (vi[j]);
0358           auto &oc = cells[otherCell];
0359           auto r1 = oc.inner_r(hh);
0360           auto z1 = oc.inner_z(hh);
0361           bool aligned = Cell::areAlignedRZ(
0362               r1,
0363               z1,
0364               ri,
0365               zi,
0366               ro,
0367               zo,
0368               params.ptmin_,
0369               isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_);  // 2.f*thetaCut); // FIXME tune cuts
0370           if (aligned &&
0371               thisCell.dcaCut(hh,
0372                               oc,
0373                               oc.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? params.dcaCutInnerTriplet_
0374                                                                                          : params.dcaCutOuterTriplet_,
0375                               params.hardCurvCut_)) {  // FIXME tune cuts
0376             oc.addOuterNeighbor(acc, cellIndex, *cellNeighbors);
0377             thisCell.setStatusBits(Cell::StatusBit::kUsed);
0378             oc.setStatusBits(Cell::StatusBit::kUsed);
0379           }
0380         }  // loop on inner cells
0381       }  // loop on outer cells
0382     }
0383   };
0384   template <typename TrackerTraits>
0385   class Kernel_find_ntuplets {
0386   public:
0387     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0388     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0389                                   HitsConstView<TrackerTraits> hh,
0390                                   TkSoAView<TrackerTraits> tracks_view,
0391                                   CACellT<TrackerTraits> *__restrict__ cells,
0392                                   uint32_t const *nCells,
0393                                   CellTracksVector<TrackerTraits> *cellTracks,
0394                                   cms::alpakatools::AtomicPairCounter *apc,
0395                                   CAParams<TrackerTraits> params) const {
0396       // recursive: not obvious to widen
0397 
0398       using Cell = CACellT<TrackerTraits>;
0399 
0400 #ifdef GPU_DEBUG
0401       if (cms::alpakatools::once_per_grid(acc))
0402         printf("starting producing ntuplets from %d cells \n", *nCells);
0403 #endif
0404 
0405       for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) {
0406         auto const &thisCell = cells[idx];
0407 
0408         // cut by earlyFishbone
0409         if (thisCell.isKilled())
0410           continue;
0411 
0412         // we require at least three hits
0413         if (thisCell.outerNeighbors().empty())
0414           continue;
0415 
0416         auto pid = thisCell.layerPairId();
0417         bool doit = params.startingLayerPair(pid);
0418 
0419         constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
0420 
0421         if (doit) {
0422           typename Cell::TmpTuple stack;
0423           stack.reset();
0424           bool bpix1Start = params.startAt0(pid);
0425           thisCell.template find_ntuplets<maxDepth, TAcc>(acc,
0426                                                           hh,
0427                                                           cells,
0428                                                           *cellTracks,
0429                                                           tracks_view.hitIndices(),
0430                                                           *apc,
0431                                                           tracks_view.quality(),
0432                                                           stack,
0433                                                           params.minHitsPerNtuplet_,
0434                                                           bpix1Start);
0435           ALPAKA_ASSERT_ACC(stack.empty());
0436         }
0437       }
0438     }
0439   };
0440 
0441   template <typename TrackerTraits>
0442   class Kernel_mark_used {
0443   public:
0444     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0445     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0446                                   CACellT<TrackerTraits> *__restrict__ cells,
0447                                   uint32_t const *nCells) const {
0448       using Cell = CACellT<TrackerTraits>;
0449       for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) {
0450         auto &thisCell = cells[idx];
0451         if (!thisCell.tracks().empty())
0452           thisCell.setStatusBits(Cell::StatusBit::kInTrack);
0453       }
0454     }
0455   };
0456 
0457   template <typename TrackerTraits>
0458   class Kernel_countMultiplicity {
0459   public:
0460     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0461     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0462                                   TkSoAView<TrackerTraits> tracks_view,
0463                                   TupleMultiplicity<TrackerTraits> *tupleMultiplicity) const {
0464       for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0465         auto nhits = tracks_view.hitIndices().size(it);
0466         if (nhits < 3)
0467           continue;
0468         if (tracks_view[it].quality() == Quality::edup)
0469           continue;
0470         ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad);
0471         if (nhits > TrackerTraits::maxHitsOnTrack)  // current limit
0472           printf("wrong mult %d %d\n", it, nhits);
0473         ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack);
0474         tupleMultiplicity->count(acc, nhits);
0475       }
0476     }
0477   };
0478 
0479   template <typename TrackerTraits>
0480   class Kernel_fillMultiplicity {
0481   public:
0482     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0483     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0484                                   TkSoAView<TrackerTraits> tracks_view,
0485                                   TupleMultiplicity<TrackerTraits> *tupleMultiplicity) const {
0486       for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0487         auto nhits = tracks_view.hitIndices().size(it);
0488         if (nhits < 3)
0489           continue;
0490         if (tracks_view[it].quality() == Quality::edup)
0491           continue;
0492         ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad);
0493         if (nhits > TrackerTraits::maxHitsOnTrack)
0494           printf("wrong mult %d %d\n", it, nhits);
0495         ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack);
0496         tupleMultiplicity->fill(acc, nhits, it);
0497       }
0498     }
0499   };
0500 
0501   template <typename TrackerTraits>
0502   class Kernel_classifyTracks {
0503   public:
0504     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0505     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0506                                   TkSoAView<TrackerTraits> tracks_view,
0507                                   QualityCuts<TrackerTraits> cuts) const {
0508       for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0509         auto nhits = tracks_view.hitIndices().size(it);
0510         if (nhits == 0)
0511           break;  // guard
0512 
0513         // if duplicate: not even fit
0514         if (tracks_view[it].quality() == Quality::edup)
0515           continue;
0516 
0517         ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad);
0518 
0519         // mark doublets as bad
0520         if (nhits < 3)
0521           continue;
0522 
0523         // if the fit has any invalid parameters, mark it as bad
0524         bool isNaN = false;
0525         for (int i = 0; i < 5; ++i) {
0526           isNaN |= edm::isNotFinite(tracks_view[it].state()(i));
0527         }
0528         if (isNaN) {
0529 #ifdef NTUPLE_DEBUG
0530           printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
0531 #endif
0532           continue;
0533         }
0534 
0535         tracks_view[it].quality() = Quality::strict;
0536 
0537         if (cuts.strictCut(tracks_view, it))
0538           continue;
0539 
0540         tracks_view[it].quality() = Quality::tight;
0541 
0542         if (cuts.isHP(tracks_view, nhits, it))
0543           tracks_view[it].quality() = Quality::highPurity;
0544       }
0545     }
0546   };
0547 
0548   template <typename TrackerTraits>
0549   class Kernel_doStatsForTracks {
0550   public:
0551     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0552     ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView<TrackerTraits> tracks_view, Counters *counters) const {
0553       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0554         if (tracks_view.hitIndices().size(idx) == 0)
0555           break;  //guard
0556         if (tracks_view[idx].quality() < Quality::loose)
0557           continue;
0558         alpaka::atomicAdd(acc, &(counters->nLooseTracks), 1ull, alpaka::hierarchy::Blocks{});
0559         if (tracks_view[idx].quality() < Quality::strict)
0560           continue;
0561         alpaka::atomicAdd(acc, &(counters->nGoodTracks), 1ull, alpaka::hierarchy::Blocks{});
0562       }
0563     }
0564   };
0565 
0566   template <typename TrackerTraits>
0567   class Kernel_countHitInTracks {
0568   public:
0569     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0570     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0571                                   TkSoAView<TrackerTraits> tracks_view,
0572                                   HitToTuple<TrackerTraits> *hitToTuple) const {
0573       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0574         if (tracks_view.hitIndices().size(idx) == 0)
0575           break;  // guard
0576         for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0577           hitToTuple->count(acc, *h);
0578       }
0579     }
0580   };
0581 
0582   template <typename TrackerTraits>
0583   class Kernel_fillHitInTracks {
0584   public:
0585     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0586     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0587                                   TkSoAView<TrackerTraits> tracks_view,
0588                                   HitToTuple<TrackerTraits> *hitToTuple) const {
0589       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0590         if (tracks_view.hitIndices().size(idx) == 0)
0591           break;  // guard
0592         for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0593           hitToTuple->fill(acc, *h, idx);
0594       }
0595     }
0596   };
0597 
0598   template <typename TrackerTraits>
0599   class Kernel_fillHitDetIndices {
0600   public:
0601     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0602     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0603                                   TkSoAView<TrackerTraits> tracks_view,
0604                                   HitsConstView<TrackerTraits> hh) const {
0605       // copy offsets
0606       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0607         tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
0608       }
0609       // fill hit indices
0610       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().size())) {
0611         ALPAKA_ASSERT_ACC(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size());
0612         tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
0613       }
0614     }
0615   };
0616 
0617   template <typename TrackerTraits>
0618   class Kernel_fillNLayers {
0619   public:
0620     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0621     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0622                                   TkSoAView<TrackerTraits> tracks_view,
0623                                   cms::alpakatools::AtomicPairCounter *apc) const {
0624       // clamp the number of tracks to the capacity of the SoA
0625       auto ntracks = std::min<int>(apc->get().first, tracks_view.metadata().size() - 1);
0626 
0627       if (cms::alpakatools::once_per_grid(acc))
0628         tracks_view.nTracks() = ntracks;
0629       for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) {
0630         ALPAKA_ASSERT_ACC(TracksUtilities<TrackerTraits>::nHits(tracks_view, idx) >= 3);
0631         tracks_view[idx].nLayers() = TracksUtilities<TrackerTraits>::computeNumberOfLayers(tracks_view, idx);
0632       }
0633     }
0634   };
0635 
0636   template <typename TrackerTraits>
0637   class Kernel_doStatsForHitInTracks {
0638   public:
0639     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0640     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0641                                   HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
0642                                   Counters *counters) const {
0643       auto &c = *counters;
0644       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nOnes())) {
0645         if (hitToTuple->size(idx) == 0)
0646           continue;  // SHALL NOT BE break
0647         alpaka::atomicAdd(acc, &c.nUsedHits, 1ull, alpaka::hierarchy::Blocks{});
0648         if (hitToTuple->size(idx) > 1)
0649           alpaka::atomicAdd(acc, &c.nDupHits, 1ull, alpaka::hierarchy::Blocks{});
0650       }
0651     }
0652   };
0653 
0654   template <typename TrackerTraits>
0655   class Kernel_countSharedHit {
0656   public:
0657     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0658     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0659                                   int *__restrict__ nshared,
0660                                   HitContainer<TrackerTraits> const *__restrict__ ptuples,
0661                                   Quality const *__restrict__ quality,
0662                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0663       constexpr auto loose = Quality::loose;
0664 
0665       auto &hitToTuple = *phitToTuple;
0666       auto const &foundNtuplets = *ptuples;
0667       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nbins())) {
0668         if (hitToTuple.size(idx) < 2)
0669           continue;
0670 
0671         int nt = 0;
0672 
0673         // count "good" tracks
0674         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0675           if (quality[*it] < loose)
0676             continue;
0677           ++nt;
0678         }
0679 
0680         if (nt < 2)
0681           continue;
0682 
0683         // now mark  each track triplet as sharing a hit
0684         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0685           if (foundNtuplets.size(*it) > 3)
0686             continue;
0687           alpaka::atomicAdd(acc, &nshared[*it], 1ull, alpaka::hierarchy::Blocks{});
0688         }
0689 
0690       }  //  hit loop
0691     }
0692   };
0693 
0694   template <typename TrackerTraits>
0695   class Kernel_markSharedHit {
0696     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0697     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0698                                   int const *__restrict__ nshared,
0699                                   HitContainer<TrackerTraits> const *__restrict__ tuples,
0700                                   Quality *__restrict__ quality,
0701                                   bool dupPassThrough) const {
0702       // constexpr auto bad = Quality::bad;
0703       constexpr auto dup = Quality::dup;
0704       constexpr auto loose = Quality::loose;
0705       // constexpr auto strict = Quality::strict;
0706 
0707       // quality to mark rejected
0708       auto const reject = dupPassThrough ? loose : dup;
0709       for (auto idx : cms::alpakatools::uniform_elements(acc, tuples->nbins())) {
0710         if (tuples->size(idx) == 0)
0711           break;  //guard
0712         if (quality[idx] <= reject)
0713           continue;
0714         if (nshared[idx] > 2)
0715           quality[idx] = reject;
0716       }
0717     }
0718   };
0719 
0720   // mostly for very forward triplets.....
0721   template <typename TrackerTraits>
0722   class Kernel_rejectDuplicate {
0723   public:
0724     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0725     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0726                                   TkSoAView<TrackerTraits> tracks_view,
0727                                   uint16_t nmin,
0728                                   bool dupPassThrough,
0729                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0730       // quality to mark rejected
0731       auto const reject = dupPassThrough ? Quality::loose : Quality::dup;
0732 
0733       auto &hitToTuple = *phitToTuple;
0734 
0735       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0736         if (hitToTuple.size(idx) < 2)
0737           continue;
0738 
0739         auto score = [&](auto it, auto nl) { return std::abs(reco::tip(tracks_view, it)); };
0740 
0741         // full combinatorics
0742         for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
0743           auto const it = *ip;
0744           auto qi = tracks_view[it].quality();
0745           if (qi <= reject)
0746             continue;
0747           auto opi = tracks_view[it].state()(2);
0748           auto e2opi = tracks_view[it].covariance()(9);
0749           auto cti = tracks_view[it].state()(3);
0750           auto e2cti = tracks_view[it].covariance()(12);
0751           auto nli = tracks_view[it].nLayers();
0752           for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
0753             auto const jt = *jp;
0754             auto qj = tracks_view[jt].quality();
0755             if (qj <= reject)
0756               continue;
0757             auto opj = tracks_view[jt].state()(2);
0758             auto ctj = tracks_view[jt].state()(3);
0759             auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0760             if ((cti - ctj) * (cti - ctj) > dct)
0761               continue;
0762             auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0763             if ((opi - opj) * (opi - opj) > dop)
0764               continue;
0765             auto nlj = tracks_view[jt].nLayers();
0766             if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
0767               tracks_view[jt].quality() = reject;
0768             else {
0769               tracks_view[it].quality() = reject;
0770               break;
0771             }
0772           }
0773         }
0774       }
0775     }
0776   };
0777 
0778   template <typename TrackerTraits>
0779   class Kernel_sharedHitCleaner {
0780   public:
0781     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0782     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0783                                   HitsConstView<TrackerTraits> hh,
0784                                   TkSoAView<TrackerTraits> tracks_view,
0785                                   int nmin,
0786                                   bool dupPassThrough,
0787                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0788       // quality to mark rejected
0789       auto const reject = dupPassThrough ? Quality::loose : Quality::dup;
0790       // quality of longest track
0791       auto const longTqual = Quality::highPurity;
0792 
0793       auto &hitToTuple = *phitToTuple;
0794 
0795       uint32_t l1end = hh.hitsLayerStart()[1];
0796 
0797       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0798         if (hitToTuple.size(idx) < 2)
0799           continue;
0800 
0801         int8_t maxNl = 0;
0802 
0803         // find maxNl
0804         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0805           if (tracks_view[*it].quality() < longTqual)
0806             continue;
0807           // if (tracks_view[*it].nHits()==3) continue;
0808           auto nl = tracks_view[*it].nLayers();
0809           maxNl = std::max(nl, maxNl);
0810         }
0811 
0812         if (maxNl < 4)
0813           continue;
0814 
0815         // quad pass through (leave for tests)
0816         // maxNl = std::min(4, maxNl);
0817 
0818         // kill all tracks shorter than maxHl (only triplets???
0819         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0820           auto nl = tracks_view[*it].nLayers();
0821 
0822           //checking if shared hit is on bpix1 and if the tuple is short enough
0823           if (idx < l1end and nl > nmin)
0824             continue;
0825 
0826           if (nl < maxNl && tracks_view[*it].quality() > reject)
0827             tracks_view[*it].quality() = reject;
0828         }
0829       }
0830     }
0831   };
0832   template <typename TrackerTraits>
0833   class Kernel_tripletCleaner {
0834   public:
0835     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0836     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0837                                   TkSoAView<TrackerTraits> tracks_view,
0838                                   uint16_t nmin,
0839                                   bool dupPassThrough,
0840                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0841       // quality to mark rejected
0842       auto const reject = Quality::loose;
0843       /// min quality of good
0844       auto const good = Quality::strict;
0845 
0846       auto &hitToTuple = *phitToTuple;
0847 
0848       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0849         if (hitToTuple.size(idx) < 2)
0850           continue;
0851 
0852         float mc = maxScore;
0853         uint16_t im = tkNotFound;
0854         bool onlyTriplets = true;
0855 
0856         // check if only triplets
0857         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0858           if (tracks_view[*it].quality() <= good)
0859             continue;
0860           onlyTriplets &= reco::isTriplet(tracks_view, *it);
0861           if (!onlyTriplets)
0862             break;
0863         }
0864 
0865         // only triplets
0866         if (!onlyTriplets)
0867           continue;
0868 
0869         // for triplets choose best tip!  (should we first find best quality???)
0870         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0871           auto const it = *ip;
0872           if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) {
0873             mc = std::abs(reco::tip(tracks_view, it));
0874             im = it;
0875           }
0876         }
0877 
0878         if (tkNotFound == im)
0879           continue;
0880 
0881         // mark worse ambiguities
0882         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0883           auto const it = *ip;
0884           if (tracks_view[it].quality() > reject && it != im)
0885             tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0886         }
0887 
0888       }  // loop over hits
0889     }
0890   };
0891 
0892   template <typename TrackerTraits>
0893   class Kernel_simpleTripletCleaner {
0894   public:
0895     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0896     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0897                                   TkSoAView<TrackerTraits> tracks_view,
0898                                   uint16_t nmin,
0899                                   bool dupPassThrough,
0900                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0901       // quality to mark rejected
0902       auto const reject = Quality::loose;
0903       /// min quality of good
0904       auto const good = Quality::loose;
0905 
0906       auto &hitToTuple = *phitToTuple;
0907 
0908       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0909         if (hitToTuple.size(idx) < 2)
0910           continue;
0911 
0912         float mc = maxScore;
0913         uint16_t im = tkNotFound;
0914 
0915         // choose best tip!  (should we first find best quality???)
0916         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0917           auto const it = *ip;
0918           if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) {
0919             mc = std::abs(reco::tip(tracks_view, it));
0920             im = it;
0921           }
0922         }
0923 
0924         if (tkNotFound == im)
0925           continue;
0926 
0927         // mark worse ambiguities
0928         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0929           auto const it = *ip;
0930           if (tracks_view[it].quality() > reject && reco::isTriplet(tracks_view, it) && it != im)
0931             tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0932         }
0933 
0934       }  // loop over hits
0935     }
0936   };
0937 
0938   template <typename TrackerTraits>
0939   class Kernel_print_found_ntuplets {
0940   public:
0941     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0942     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0943                                   HitsConstView<TrackerTraits> hh,
0944                                   TkSoAView<TrackerTraits> tracks_view,
0945                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
0946                                   int32_t firstPrint,
0947                                   int32_t lastPrint,
0948                                   int iev) const {
0949       constexpr auto loose = Quality::loose;
0950 
0951       for (auto i :
0952            cms::alpakatools::uniform_elements(acc, firstPrint, std::min(lastPrint, tracks_view.hitIndices().nbins()))) {
0953         auto nh = tracks_view.hitIndices().size(i);
0954         if (nh < 3)
0955           continue;
0956         if (tracks_view[i].quality() < loose)
0957           continue;
0958         printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
0959                10000 * iev + i,
0960                int(tracks_view[i].quality()),
0961                nh,
0962                tracks_view[i].nLayers(),
0963                reco::charge(tracks_view, i),
0964                tracks_view[i].pt(),
0965                tracks_view[i].eta(),
0966                reco::phi(tracks_view, i),
0967                reco::tip(tracks_view, i),
0968                reco::zip(tracks_view, i),
0969                tracks_view[i].chi2(),
0970                hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
0971                hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
0972                hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
0973                nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
0974                nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
0975                nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
0976                nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
0977       }
0978     }
0979   };
0980 
0981   class Kernel_printCounters {
0982   public:
0983     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0984     ALPAKA_FN_ACC void operator()(TAcc const &acc, Counters const *counters) const {
0985       auto const &c = *counters;
0986       printf(
0987           "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks  |  nLooseTracks  |  nGoodTracks | nUsedHits | "
0988           "nDupHits | nFishCells | nKilledCells | nUsedCells | nZeroTrackCells ||\n");
0989       printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
0990              c.nEvents,
0991              c.nHits,
0992              c.nCells,
0993              c.nTuples,
0994              c.nFitTracks,
0995              c.nLooseTracks,
0996              c.nGoodTracks,
0997              c.nUsedHits,
0998              c.nDupHits,
0999              c.nFishCells,
1000              c.nKilledCells,
1001              c.nEmptyCells,
1002              c.nZeroTrackCells);
1003       printf(
1004           "Counters Norm %lld ||  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.3f|  %.3f|  %.3f|  "
1005           "%.3f||\n",
1006           c.nEvents,
1007           c.nHits / double(c.nEvents),
1008           c.nCells / double(c.nEvents),
1009           c.nTuples / double(c.nEvents),
1010           c.nFitTracks / double(c.nEvents),
1011           c.nLooseTracks / double(c.nEvents),
1012           c.nGoodTracks / double(c.nEvents),
1013           c.nUsedHits / double(c.nEvents),
1014           c.nDupHits / double(c.nEvents),
1015           c.nFishCells / double(c.nCells),
1016           c.nKilledCells / double(c.nCells),
1017           c.nEmptyCells / double(c.nCells),
1018           c.nZeroTrackCells / double(c.nCells));
1019     }
1020   };
1021 
1022 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels
1023 
1024 #endif  // RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h