Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:32

0001 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h
0002 #define RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h
0003 
0004 //#define GPU_DEBUG
0005 //#define NTUPLE_DEBUG
0006 
0007 // C++ includes
0008 #include <cmath>
0009 #include <cstdint>
0010 #include <cstdio>
0011 #include <limits>
0012 #include <type_traits>
0013 
0014 // Alpaka includes
0015 #include <alpaka/alpaka.hpp>
0016 
0017 // CMSSW includes
0018 #include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
0019 #include "DataFormats/TrackSoA/interface/TracksSoA.h"
0020 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0021 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0022 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0023 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0024 
0025 // local includes
0026 #include "CACell.h"
0027 #include "CAHitNtupletGeneratorKernels.h"
0028 #include "CAStructures.h"
0029 
0030 namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels {
0031 
0032   constexpr uint32_t tkNotFound = std::numeric_limits<uint16_t>::max();
0033   constexpr float maxScore = std::numeric_limits<float>::max();
0034   constexpr float nSigma2 = 25.f;
0035 
0036   // all of these below are mostly to avoid brining around the relative namespace
0037 
0038   template <typename TrackerTraits>
0039   using HitToTuple = caStructures::HitToTupleT<TrackerTraits>;
0040 
0041   template <typename TrackerTraits>
0042   using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0043 
0044   template <typename TrackerTraits>
0045   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0046 
0047   template <typename TrackerTraits>
0048   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0049 
0050   template <typename TrackerTraits>
0051   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0052 
0053   using Quality = ::pixelTrack::Quality;
0054 
0055   template <typename TrackerTraits>
0056   using TkSoAView = reco::TrackSoAView<TrackerTraits>;
0057 
0058   template <typename TrackerTraits>
0059   using HitContainer = typename reco::TrackSoA<TrackerTraits>::HitContainer;
0060 
0061   template <typename TrackerTraits>
0062   using HitsConstView = typename CACellT<TrackerTraits>::HitsConstView;
0063 
0064   template <typename TrackerTraits>
0065   using QualityCuts = ::pixelTrack::QualityCutsT<TrackerTraits>;
0066 
0067   template <typename TrackerTraits>
0068   using CAParams = caHitNtupletGenerator::CAParamsT<TrackerTraits>;
0069 
0070   using Counters = caHitNtupletGenerator::Counters;
0071 
0072   template <typename TrackerTraits>
0073   class Kernel_checkOverflows {
0074   public:
0075     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0076     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0077                                   TkSoAView<TrackerTraits> tracks_view,
0078                                   TupleMultiplicity<TrackerTraits> const *tupleMultiplicity,
0079                                   HitToTuple<TrackerTraits> const *hitToTuple,
0080                                   cms::alpakatools::AtomicPairCounter *apc,
0081                                   CACellT<TrackerTraits> const *__restrict__ cells,
0082                                   uint32_t const *__restrict__ nCells,
0083                                   CellNeighborsVector<TrackerTraits> const *cellNeighbors,
0084                                   CellTracksVector<TrackerTraits> const *cellTracks,
0085                                   OuterHitOfCell<TrackerTraits> const *isOuterHitOfCell,
0086                                   int32_t nHits,
0087                                   uint32_t maxNumberOfDoublets,
0088                                   Counters *counters) const {
0089       auto &c = *counters;
0090       // counters once per event
0091       if (cms::alpakatools::once_per_grid(acc)) {
0092         alpaka::atomicAdd(acc, &c.nEvents, 1ull, alpaka::hierarchy::Blocks{});
0093         alpaka::atomicAdd(acc, &c.nHits, static_cast<unsigned long long>(nHits), alpaka::hierarchy::Blocks{});
0094         alpaka::atomicAdd(acc, &c.nCells, static_cast<unsigned long long>(*nCells), alpaka::hierarchy::Blocks{});
0095         alpaka::atomicAdd(
0096             acc, &c.nTuples, static_cast<unsigned long long>(apc->get().first), alpaka::hierarchy::Blocks{});
0097         alpaka::atomicAdd(acc,
0098                           &c.nFitTracks,
0099                           static_cast<unsigned long long>(tupleMultiplicity->size()),
0100                           alpaka::hierarchy::Blocks{});
0101       }
0102 
0103 #ifdef NTUPLE_DEBUGS
0104       if (cms::alpakatools::once_per_grid(acc)) {
0105         printf("number of found cells %d \n found tuples %d with total hits %d out of %d\n",
0106                *nCells,
0107                apc->get().first,
0108                apc->get().second,
0109                nHits);
0110         if (apc->get().first < TrackerTraits::maxNumberOfQuadruplets) {
0111           ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size(apc->get().first) == 0);
0112           ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size() == apc->get().second);
0113         }
0114       }
0115 
0116       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0117         if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack)  // current real limit
0118           printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
0119         ALPAKA_ASSERT_ACC(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
0120         for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
0121           ALPAKA_ASSERT_ACC(int(*ih) < nHits);
0122       }
0123 #endif
0124 
0125       if (cms::alpakatools::once_per_grid(acc)) {
0126         if (apc->get().first >= TrackerTraits::maxNumberOfQuadruplets)
0127           printf("Tuples overflow\n");
0128         if (*nCells >= maxNumberOfDoublets)
0129           printf("Cells overflow\n");
0130         if (cellNeighbors && cellNeighbors->full())
0131           printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
0132         if (cellTracks && cellTracks->full())
0133           printf("cellTracks overflow\n");
0134         if (int(hitToTuple->nOnes()) < nHits)
0135           printf("ERROR hitToTuple  overflow %d %d\n", hitToTuple->nOnes(), nHits);
0136 #ifdef GPU_DEBUG
0137         printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
0138                cellNeighbors->size(),
0139                cellTracks->size(),
0140                hitToTuple->size());
0141 #endif
0142       }
0143 
0144       for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) {
0145         auto const &thisCell = cells[idx];
0146         if (thisCell.hasFishbone() && !thisCell.isKilled())
0147           alpaka::atomicAdd(acc, &c.nFishCells, 1ull, alpaka::hierarchy::Blocks{});
0148         if (thisCell.outerNeighbors().full())  //++tooManyNeighbors[thisCell.theLayerPairId];
0149           printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
0150         if (thisCell.tracks().full())  //++tooManyTracks[thisCell.theLayerPairId];
0151           printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
0152         if (thisCell.isKilled())
0153           alpaka::atomicAdd(acc, &c.nKilledCells, 1ull, alpaka::hierarchy::Blocks{});
0154         if (!thisCell.unused())
0155           alpaka::atomicAdd(acc, &c.nEmptyCells, 1ull, alpaka::hierarchy::Blocks{});
0156         if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
0157           alpaka::atomicAdd(acc, &c.nZeroTrackCells, 1ull, alpaka::hierarchy::Blocks{});
0158       }
0159 
0160       // FIXME this loop was up to nHits - isOuterHitOfCell.offset in the CUDA version
0161       for (auto idx : cms::alpakatools::uniform_elements(acc, nHits))
0162         if ((*isOuterHitOfCell).container[idx].full())  // ++tooManyOuterHitOfCell;
0163           printf("OuterHitOfCell overflow %d\n", idx);
0164     }
0165   };
0166 
0167   template <typename TrackerTraits>
0168   class Kernel_fishboneCleaner {
0169   public:
0170     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0171     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0172                                   CACellT<TrackerTraits> const *cells,
0173                                   uint32_t const *__restrict__ nCells,
0174                                   TkSoAView<TrackerTraits> tracks_view) const {
0175       constexpr auto reject = Quality::dup;
0176 
0177       for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) {
0178         auto const &thisCell = cells[idx];
0179         if (!thisCell.isKilled())
0180           continue;
0181 
0182         for (auto it : thisCell.tracks())
0183           tracks_view[it].quality() = reject;
0184       }
0185     }
0186   };
0187 
0188   // remove shorter tracks if sharing a cell
0189   // It does not seem to affect efficiency in any way!
0190   template <typename TrackerTraits>
0191   class Kernel_earlyDuplicateRemover {
0192   public:
0193     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0194     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0195                                   CACellT<TrackerTraits> const *cells,
0196                                   uint32_t const *__restrict__ nCells,
0197                                   TkSoAView<TrackerTraits> tracks_view,
0198                                   bool dupPassThrough) const {
0199       // quality to mark rejected
0200       constexpr auto reject = Quality::edup;  /// cannot be loose
0201       ALPAKA_ASSERT_ACC(nCells);
0202       for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) {
0203         auto const &thisCell = cells[idx];
0204 
0205         if (thisCell.tracks().size() < 2)
0206           continue;
0207 
0208         int8_t maxNl = 0;
0209 
0210         // find maxNl
0211         for (auto it : thisCell.tracks()) {
0212           auto nl = tracks_view[it].nLayers();
0213           maxNl = std::max(nl, maxNl);
0214         }
0215 
0216         // if (maxNl<4) continue;
0217         // quad pass through (leave it here for tests)
0218         //  maxNl = std::min(4, maxNl);
0219 
0220         for (auto it : thisCell.tracks()) {
0221           if (tracks_view[it].nLayers() < maxNl)
0222             tracks_view[it].quality() = reject;  // no race: simple assignment of the same constant
0223         }
0224       }
0225     }
0226   };
0227 
0228   // assume the above (so, short tracks already removed)
0229   template <typename TrackerTraits>
0230   class Kernel_fastDuplicateRemover {
0231   public:
0232     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0233     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0234                                   CACellT<TrackerTraits> const *__restrict__ cells,
0235                                   uint32_t const *__restrict__ nCells,
0236                                   TkSoAView<TrackerTraits> tracks_view,
0237                                   bool dupPassThrough) const {
0238       // quality to mark rejected
0239       auto const reject = dupPassThrough ? Quality::loose : Quality::dup;
0240       constexpr auto loose = Quality::loose;
0241 
0242       ALPAKA_ASSERT_ACC(nCells);
0243       const auto ntNCells = (*nCells);
0244 
0245       for (auto idx : cms::alpakatools::uniform_elements(acc, ntNCells)) {
0246         auto const &thisCell = cells[idx];
0247         if (thisCell.tracks().size() < 2)
0248           continue;
0249 
0250         float mc = maxScore;
0251         uint16_t im = tkNotFound;
0252 
0253         auto score = [&](auto it) { return std::abs(reco::tip(tracks_view, it)); };
0254 
0255         // full crazy combinatorics
0256         int ntr = thisCell.tracks().size();
0257         for (int i = 0; i < ntr - 1; ++i) {
0258           auto it = thisCell.tracks()[i];
0259           auto qi = tracks_view[it].quality();
0260           if (qi <= reject)
0261             continue;
0262           auto opi = tracks_view[it].state()(2);
0263           auto e2opi = tracks_view[it].covariance()(9);
0264           auto cti = tracks_view[it].state()(3);
0265           auto e2cti = tracks_view[it].covariance()(12);
0266           for (auto j = i + 1; j < ntr; ++j) {
0267             auto jt = thisCell.tracks()[j];
0268             auto qj = tracks_view[jt].quality();
0269             if (qj <= reject)
0270               continue;
0271             auto opj = tracks_view[jt].state()(2);
0272             auto ctj = tracks_view[jt].state()(3);
0273             auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0274             if ((cti - ctj) * (cti - ctj) > dct)
0275               continue;
0276             auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0277             if ((opi - opj) * (opi - opj) > dop)
0278               continue;
0279             if ((qj < qi) || (qj == qi && score(it) < score(jt)))
0280               tracks_view[jt].quality() = reject;
0281             else {
0282               tracks_view[it].quality() = reject;
0283               break;
0284             }
0285           }
0286         }
0287 
0288         // find maxQual
0289         auto maxQual = reject;  // no duplicate!
0290         for (auto it : thisCell.tracks()) {
0291           if (tracks_view[it].quality() > maxQual)
0292             maxQual = tracks_view[it].quality();
0293         }
0294 
0295         if (maxQual <= loose)
0296           continue;
0297 
0298         // find min score
0299         for (auto it : thisCell.tracks()) {
0300           if (tracks_view[it].quality() == maxQual && score(it) < mc) {
0301             mc = score(it);
0302             im = it;
0303           }
0304         }
0305 
0306         if (tkNotFound == im)
0307           continue;
0308 
0309         // mark all other duplicates  (not yet, keep it loose)
0310         for (auto it : thisCell.tracks()) {
0311           if (tracks_view[it].quality() > loose && it != im)
0312             tracks_view[it].quality() = loose;  //no race:  simple assignment of the same constant
0313         }
0314       }
0315     }
0316   };
0317 
0318   template <typename TrackerTraits>
0319   class Kernel_connect {
0320   public:
0321     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0322     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0323                                   cms::alpakatools::AtomicPairCounter *apc1,
0324                                   cms::alpakatools::AtomicPairCounter *apc2,  // just to zero them
0325                                   HitsConstView<TrackerTraits> hh,
0326                                   CACellT<TrackerTraits> *cells,
0327                                   uint32_t *nCells,
0328                                   CellNeighborsVector<TrackerTraits> *cellNeighbors,
0329                                   OuterHitOfCell<TrackerTraits> const *isOuterHitOfCell,
0330                                   CAParams<TrackerTraits> params) const {
0331       using Cell = CACellT<TrackerTraits>;
0332 
0333       if (cms::alpakatools::once_per_grid(acc)) {
0334         *apc1 = 0;
0335         *apc2 = 0;
0336       }  // ready for next kernel
0337 
0338       // loop on outer cells
0339       for (uint32_t idx : cms::alpakatools::uniform_elements_y(acc, *nCells)) {
0340         auto cellIndex = idx;
0341         auto &thisCell = cells[idx];
0342         auto innerHitId = thisCell.inner_hit_id();
0343         if (int(innerHitId) < isOuterHitOfCell->offset)
0344           continue;
0345 
0346         uint32_t numberOfPossibleNeighbors = (*isOuterHitOfCell)[innerHitId].size();
0347         auto vi = (*isOuterHitOfCell)[innerHitId].data();
0348         auto ri = thisCell.inner_r(hh);
0349         auto zi = thisCell.inner_z(hh);
0350         auto ro = thisCell.outer_r(hh);
0351         auto zo = thisCell.outer_z(hh);
0352         auto isBarrel = thisCell.inner_detIndex(hh) < TrackerTraits::last_barrel_detIndex;
0353 
0354         // loop on inner cells
0355         for (uint32_t j : cms::alpakatools::independent_group_elements_x(acc, numberOfPossibleNeighbors)) {
0356           auto otherCell = (vi[j]);
0357           auto &oc = cells[otherCell];
0358           auto r1 = oc.inner_r(hh);
0359           auto z1 = oc.inner_z(hh);
0360           bool aligned = Cell::areAlignedRZ(
0361               r1,
0362               z1,
0363               ri,
0364               zi,
0365               ro,
0366               zo,
0367               params.ptmin_,
0368               isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_);  // 2.f*thetaCut); // FIXME tune cuts
0369           if (aligned &&
0370               thisCell.dcaCut(hh,
0371                               oc,
0372                               oc.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? params.dcaCutInnerTriplet_
0373                                                                                          : params.dcaCutOuterTriplet_,
0374                               params.hardCurvCut_)) {  // FIXME tune cuts
0375             oc.addOuterNeighbor(acc, cellIndex, *cellNeighbors);
0376             thisCell.setStatusBits(Cell::StatusBit::kUsed);
0377             oc.setStatusBits(Cell::StatusBit::kUsed);
0378           }
0379         }  // loop on inner cells
0380       }    // loop on outer cells
0381     }
0382   };
0383   template <typename TrackerTraits>
0384   class Kernel_find_ntuplets {
0385   public:
0386     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0387     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0388                                   HitsConstView<TrackerTraits> hh,
0389                                   TkSoAView<TrackerTraits> tracks_view,
0390                                   CACellT<TrackerTraits> *__restrict__ cells,
0391                                   uint32_t const *nCells,
0392                                   CellTracksVector<TrackerTraits> *cellTracks,
0393                                   cms::alpakatools::AtomicPairCounter *apc,
0394                                   CAParams<TrackerTraits> params) const {
0395       // recursive: not obvious to widen
0396 
0397       using Cell = CACellT<TrackerTraits>;
0398 
0399 #ifdef GPU_DEBUG
0400       if (cms::alpakatools::once_per_grid(acc))
0401         printf("starting producing ntuplets from %d cells \n", *nCells);
0402 #endif
0403 
0404       for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) {
0405         auto const &thisCell = cells[idx];
0406 
0407         // cut by earlyFishbone
0408         if (thisCell.isKilled())
0409           continue;
0410 
0411         // we require at least three hits
0412         if (thisCell.outerNeighbors().empty())
0413           continue;
0414 
0415         auto pid = thisCell.layerPairId();
0416         bool doit = params.startingLayerPair(pid);
0417 
0418         constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
0419 
0420         if (doit) {
0421           typename Cell::TmpTuple stack;
0422           stack.reset();
0423           bool bpix1Start = params.startAt0(pid);
0424           thisCell.template find_ntuplets<maxDepth, TAcc>(acc,
0425                                                           hh,
0426                                                           cells,
0427                                                           *cellTracks,
0428                                                           tracks_view.hitIndices(),
0429                                                           *apc,
0430                                                           tracks_view.quality(),
0431                                                           stack,
0432                                                           params.minHitsPerNtuplet_,
0433                                                           bpix1Start);
0434           ALPAKA_ASSERT_ACC(stack.empty());
0435         }
0436       }
0437     }
0438   };
0439 
0440   template <typename TrackerTraits>
0441   class Kernel_mark_used {
0442   public:
0443     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0444     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0445                                   CACellT<TrackerTraits> *__restrict__ cells,
0446                                   uint32_t const *nCells) const {
0447       using Cell = CACellT<TrackerTraits>;
0448       for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) {
0449         auto &thisCell = cells[idx];
0450         if (!thisCell.tracks().empty())
0451           thisCell.setStatusBits(Cell::StatusBit::kInTrack);
0452       }
0453     }
0454   };
0455 
0456   template <typename TrackerTraits>
0457   class Kernel_countMultiplicity {
0458   public:
0459     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0460     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0461                                   TkSoAView<TrackerTraits> tracks_view,
0462                                   TupleMultiplicity<TrackerTraits> *tupleMultiplicity) const {
0463       for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0464         auto nhits = tracks_view.hitIndices().size(it);
0465         if (nhits < 3)
0466           continue;
0467         if (tracks_view[it].quality() == Quality::edup)
0468           continue;
0469         ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad);
0470         if (nhits > TrackerTraits::maxHitsOnTrack)  // current limit
0471           printf("wrong mult %d %d\n", it, nhits);
0472         ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack);
0473         tupleMultiplicity->count(acc, nhits);
0474       }
0475     }
0476   };
0477 
0478   template <typename TrackerTraits>
0479   class Kernel_fillMultiplicity {
0480   public:
0481     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0482     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0483                                   TkSoAView<TrackerTraits> tracks_view,
0484                                   TupleMultiplicity<TrackerTraits> *tupleMultiplicity) const {
0485       for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0486         auto nhits = tracks_view.hitIndices().size(it);
0487         if (nhits < 3)
0488           continue;
0489         if (tracks_view[it].quality() == Quality::edup)
0490           continue;
0491         ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad);
0492         if (nhits > TrackerTraits::maxHitsOnTrack)
0493           printf("wrong mult %d %d\n", it, nhits);
0494         ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack);
0495         tupleMultiplicity->fill(acc, nhits, it);
0496       }
0497     }
0498   };
0499 
0500   template <typename TrackerTraits>
0501   class Kernel_classifyTracks {
0502   public:
0503     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0504     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0505                                   TkSoAView<TrackerTraits> tracks_view,
0506                                   QualityCuts<TrackerTraits> cuts) const {
0507       for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0508         auto nhits = tracks_view.hitIndices().size(it);
0509         if (nhits == 0)
0510           break;  // guard
0511 
0512         // if duplicate: not even fit
0513         if (tracks_view[it].quality() == Quality::edup)
0514           continue;
0515 
0516         ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad);
0517 
0518         // mark doublets as bad
0519         if (nhits < 3)
0520           continue;
0521 
0522         // if the fit has any invalid parameters, mark it as bad
0523         bool isNaN = false;
0524         for (int i = 0; i < 5; ++i) {
0525           isNaN |= std::isnan(tracks_view[it].state()(i));
0526         }
0527         if (isNaN) {
0528 #ifdef NTUPLE_DEBUG
0529           printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
0530 #endif
0531           continue;
0532         }
0533 
0534         tracks_view[it].quality() = Quality::strict;
0535 
0536         if (cuts.strictCut(tracks_view, it))
0537           continue;
0538 
0539         tracks_view[it].quality() = Quality::tight;
0540 
0541         if (cuts.isHP(tracks_view, nhits, it))
0542           tracks_view[it].quality() = Quality::highPurity;
0543       }
0544     }
0545   };
0546 
0547   template <typename TrackerTraits>
0548   class Kernel_doStatsForTracks {
0549   public:
0550     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0551     ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView<TrackerTraits> tracks_view, Counters *counters) const {
0552       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0553         if (tracks_view.hitIndices().size(idx) == 0)
0554           break;  //guard
0555         if (tracks_view[idx].quality() < Quality::loose)
0556           continue;
0557         alpaka::atomicAdd(acc, &(counters->nLooseTracks), 1ull, alpaka::hierarchy::Blocks{});
0558         if (tracks_view[idx].quality() < Quality::strict)
0559           continue;
0560         alpaka::atomicAdd(acc, &(counters->nGoodTracks), 1ull, alpaka::hierarchy::Blocks{});
0561       }
0562     }
0563   };
0564 
0565   template <typename TrackerTraits>
0566   class Kernel_countHitInTracks {
0567   public:
0568     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0569     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0570                                   TkSoAView<TrackerTraits> tracks_view,
0571                                   HitToTuple<TrackerTraits> *hitToTuple) const {
0572       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0573         if (tracks_view.hitIndices().size(idx) == 0)
0574           break;  // guard
0575         for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0576           hitToTuple->count(acc, *h);
0577       }
0578     }
0579   };
0580 
0581   template <typename TrackerTraits>
0582   class Kernel_fillHitInTracks {
0583   public:
0584     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0585     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0586                                   TkSoAView<TrackerTraits> tracks_view,
0587                                   HitToTuple<TrackerTraits> *hitToTuple) const {
0588       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0589         if (tracks_view.hitIndices().size(idx) == 0)
0590           break;  // guard
0591         for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0592           hitToTuple->fill(acc, *h, idx);
0593       }
0594     }
0595   };
0596 
0597   template <typename TrackerTraits>
0598   class Kernel_fillHitDetIndices {
0599   public:
0600     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0601     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0602                                   TkSoAView<TrackerTraits> tracks_view,
0603                                   HitsConstView<TrackerTraits> hh) const {
0604       // copy offsets
0605       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) {
0606         tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
0607       }
0608       // fill hit indices
0609       for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().size())) {
0610         ALPAKA_ASSERT_ACC(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size());
0611         tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
0612       }
0613     }
0614   };
0615 
0616   template <typename TrackerTraits>
0617   class Kernel_fillNLayers {
0618   public:
0619     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0620     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0621                                   TkSoAView<TrackerTraits> tracks_view,
0622                                   cms::alpakatools::AtomicPairCounter *apc) const {
0623       // clamp the number of tracks to the capacity of the SoA
0624       auto ntracks = std::min<int>(apc->get().first, tracks_view.metadata().size() - 1);
0625 
0626       if (cms::alpakatools::once_per_grid(acc))
0627         tracks_view.nTracks() = ntracks;
0628       for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) {
0629         ALPAKA_ASSERT_ACC(TracksUtilities<TrackerTraits>::nHits(tracks_view, idx) >= 3);
0630         tracks_view[idx].nLayers() = TracksUtilities<TrackerTraits>::computeNumberOfLayers(tracks_view, idx);
0631       }
0632     }
0633   };
0634 
0635   template <typename TrackerTraits>
0636   class Kernel_doStatsForHitInTracks {
0637   public:
0638     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0639     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0640                                   HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
0641                                   Counters *counters) const {
0642       auto &c = *counters;
0643       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nOnes())) {
0644         if (hitToTuple->size(idx) == 0)
0645           continue;  // SHALL NOT BE break
0646         alpaka::atomicAdd(acc, &c.nUsedHits, 1ull, alpaka::hierarchy::Blocks{});
0647         if (hitToTuple->size(idx) > 1)
0648           alpaka::atomicAdd(acc, &c.nDupHits, 1ull, alpaka::hierarchy::Blocks{});
0649       }
0650     }
0651   };
0652 
0653   template <typename TrackerTraits>
0654   class Kernel_countSharedHit {
0655   public:
0656     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0657     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0658                                   int *__restrict__ nshared,
0659                                   HitContainer<TrackerTraits> const *__restrict__ ptuples,
0660                                   Quality const *__restrict__ quality,
0661                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0662       constexpr auto loose = Quality::loose;
0663 
0664       auto &hitToTuple = *phitToTuple;
0665       auto const &foundNtuplets = *ptuples;
0666       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nbins())) {
0667         if (hitToTuple.size(idx) < 2)
0668           continue;
0669 
0670         int nt = 0;
0671 
0672         // count "good" tracks
0673         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0674           if (quality[*it] < loose)
0675             continue;
0676           ++nt;
0677         }
0678 
0679         if (nt < 2)
0680           continue;
0681 
0682         // now mark  each track triplet as sharing a hit
0683         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0684           if (foundNtuplets.size(*it) > 3)
0685             continue;
0686           alpaka::atomicAdd(acc, &nshared[*it], 1ull, alpaka::hierarchy::Blocks{});
0687         }
0688 
0689       }  //  hit loop
0690     }
0691   };
0692 
0693   template <typename TrackerTraits>
0694   class Kernel_markSharedHit {
0695     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0696     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0697                                   int const *__restrict__ nshared,
0698                                   HitContainer<TrackerTraits> const *__restrict__ tuples,
0699                                   Quality *__restrict__ quality,
0700                                   bool dupPassThrough) const {
0701       // constexpr auto bad = Quality::bad;
0702       constexpr auto dup = Quality::dup;
0703       constexpr auto loose = Quality::loose;
0704       // constexpr auto strict = Quality::strict;
0705 
0706       // quality to mark rejected
0707       auto const reject = dupPassThrough ? loose : dup;
0708       for (auto idx : cms::alpakatools::uniform_elements(acc, tuples->nbins())) {
0709         if (tuples->size(idx) == 0)
0710           break;  //guard
0711         if (quality[idx] <= reject)
0712           continue;
0713         if (nshared[idx] > 2)
0714           quality[idx] = reject;
0715       }
0716     }
0717   };
0718 
0719   // mostly for very forward triplets.....
0720   template <typename TrackerTraits>
0721   class Kernel_rejectDuplicate {
0722   public:
0723     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0724     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0725                                   TkSoAView<TrackerTraits> tracks_view,
0726                                   uint16_t nmin,
0727                                   bool dupPassThrough,
0728                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0729       // quality to mark rejected
0730       auto const reject = dupPassThrough ? Quality::loose : Quality::dup;
0731 
0732       auto &hitToTuple = *phitToTuple;
0733 
0734       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0735         if (hitToTuple.size(idx) < 2)
0736           continue;
0737 
0738         auto score = [&](auto it, auto nl) { return std::abs(reco::tip(tracks_view, it)); };
0739 
0740         // full combinatorics
0741         for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
0742           auto const it = *ip;
0743           auto qi = tracks_view[it].quality();
0744           if (qi <= reject)
0745             continue;
0746           auto opi = tracks_view[it].state()(2);
0747           auto e2opi = tracks_view[it].covariance()(9);
0748           auto cti = tracks_view[it].state()(3);
0749           auto e2cti = tracks_view[it].covariance()(12);
0750           auto nli = tracks_view[it].nLayers();
0751           for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
0752             auto const jt = *jp;
0753             auto qj = tracks_view[jt].quality();
0754             if (qj <= reject)
0755               continue;
0756             auto opj = tracks_view[jt].state()(2);
0757             auto ctj = tracks_view[jt].state()(3);
0758             auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0759             if ((cti - ctj) * (cti - ctj) > dct)
0760               continue;
0761             auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0762             if ((opi - opj) * (opi - opj) > dop)
0763               continue;
0764             auto nlj = tracks_view[jt].nLayers();
0765             if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
0766               tracks_view[jt].quality() = reject;
0767             else {
0768               tracks_view[it].quality() = reject;
0769               break;
0770             }
0771           }
0772         }
0773       }
0774     }
0775   };
0776 
0777   template <typename TrackerTraits>
0778   class Kernel_sharedHitCleaner {
0779   public:
0780     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0781     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0782                                   HitsConstView<TrackerTraits> hh,
0783                                   TkSoAView<TrackerTraits> tracks_view,
0784                                   int nmin,
0785                                   bool dupPassThrough,
0786                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0787       // quality to mark rejected
0788       auto const reject = dupPassThrough ? Quality::loose : Quality::dup;
0789       // quality of longest track
0790       auto const longTqual = Quality::highPurity;
0791 
0792       auto &hitToTuple = *phitToTuple;
0793 
0794       uint32_t l1end = hh.hitsLayerStart()[1];
0795 
0796       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0797         if (hitToTuple.size(idx) < 2)
0798           continue;
0799 
0800         int8_t maxNl = 0;
0801 
0802         // find maxNl
0803         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0804           if (tracks_view[*it].quality() < longTqual)
0805             continue;
0806           // if (tracks_view[*it].nHits()==3) continue;
0807           auto nl = tracks_view[*it].nLayers();
0808           maxNl = std::max(nl, maxNl);
0809         }
0810 
0811         if (maxNl < 4)
0812           continue;
0813 
0814         // quad pass through (leave for tests)
0815         // maxNl = std::min(4, maxNl);
0816 
0817         // kill all tracks shorter than maxHl (only triplets???
0818         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0819           auto nl = tracks_view[*it].nLayers();
0820 
0821           //checking if shared hit is on bpix1 and if the tuple is short enough
0822           if (idx < l1end and nl > nmin)
0823             continue;
0824 
0825           if (nl < maxNl && tracks_view[*it].quality() > reject)
0826             tracks_view[*it].quality() = reject;
0827         }
0828       }
0829     }
0830   };
0831   template <typename TrackerTraits>
0832   class Kernel_tripletCleaner {
0833   public:
0834     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0835     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0836                                   TkSoAView<TrackerTraits> tracks_view,
0837                                   uint16_t nmin,
0838                                   bool dupPassThrough,
0839                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0840       // quality to mark rejected
0841       auto const reject = Quality::loose;
0842       /// min quality of good
0843       auto const good = Quality::strict;
0844 
0845       auto &hitToTuple = *phitToTuple;
0846 
0847       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0848         if (hitToTuple.size(idx) < 2)
0849           continue;
0850 
0851         float mc = maxScore;
0852         uint16_t im = tkNotFound;
0853         bool onlyTriplets = true;
0854 
0855         // check if only triplets
0856         for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0857           if (tracks_view[*it].quality() <= good)
0858             continue;
0859           onlyTriplets &= reco::isTriplet(tracks_view, *it);
0860           if (!onlyTriplets)
0861             break;
0862         }
0863 
0864         // only triplets
0865         if (!onlyTriplets)
0866           continue;
0867 
0868         // for triplets choose best tip!  (should we first find best quality???)
0869         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0870           auto const it = *ip;
0871           if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) {
0872             mc = std::abs(reco::tip(tracks_view, it));
0873             im = it;
0874           }
0875         }
0876 
0877         if (tkNotFound == im)
0878           continue;
0879 
0880         // mark worse ambiguities
0881         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0882           auto const it = *ip;
0883           if (tracks_view[it].quality() > reject && it != im)
0884             tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0885         }
0886 
0887       }  // loop over hits
0888     }
0889   };
0890 
0891   template <typename TrackerTraits>
0892   class Kernel_simpleTripletCleaner {
0893   public:
0894     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0895     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0896                                   TkSoAView<TrackerTraits> tracks_view,
0897                                   uint16_t nmin,
0898                                   bool dupPassThrough,
0899                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) const {
0900       // quality to mark rejected
0901       auto const reject = Quality::loose;
0902       /// min quality of good
0903       auto const good = Quality::loose;
0904 
0905       auto &hitToTuple = *phitToTuple;
0906 
0907       for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) {
0908         if (hitToTuple.size(idx) < 2)
0909           continue;
0910 
0911         float mc = maxScore;
0912         uint16_t im = tkNotFound;
0913 
0914         // choose best tip!  (should we first find best quality???)
0915         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0916           auto const it = *ip;
0917           if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) {
0918             mc = std::abs(reco::tip(tracks_view, it));
0919             im = it;
0920           }
0921         }
0922 
0923         if (tkNotFound == im)
0924           continue;
0925 
0926         // mark worse ambiguities
0927         for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0928           auto const it = *ip;
0929           if (tracks_view[it].quality() > reject && reco::isTriplet(tracks_view, it) && it != im)
0930             tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
0931         }
0932 
0933       }  // loop over hits
0934     }
0935   };
0936 
0937   template <typename TrackerTraits>
0938   class Kernel_print_found_ntuplets {
0939   public:
0940     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0941     ALPAKA_FN_ACC void operator()(TAcc const &acc,
0942                                   HitsConstView<TrackerTraits> hh,
0943                                   TkSoAView<TrackerTraits> tracks_view,
0944                                   HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
0945                                   int32_t firstPrint,
0946                                   int32_t lastPrint,
0947                                   int iev) const {
0948       constexpr auto loose = Quality::loose;
0949 
0950       for (auto i :
0951            cms::alpakatools::uniform_elements(acc, firstPrint, std::min(lastPrint, tracks_view.hitIndices().nbins()))) {
0952         auto nh = tracks_view.hitIndices().size(i);
0953         if (nh < 3)
0954           continue;
0955         if (tracks_view[i].quality() < loose)
0956           continue;
0957         printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
0958                10000 * iev + i,
0959                int(tracks_view[i].quality()),
0960                nh,
0961                tracks_view[i].nLayers(),
0962                reco::charge(tracks_view, i),
0963                tracks_view[i].pt(),
0964                tracks_view[i].eta(),
0965                reco::phi(tracks_view, i),
0966                reco::tip(tracks_view, i),
0967                reco::zip(tracks_view, i),
0968                tracks_view[i].chi2(),
0969                hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
0970                hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
0971                hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
0972                nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
0973                nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
0974                nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
0975                nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
0976       }
0977     }
0978   };
0979 
0980   class Kernel_printCounters {
0981   public:
0982     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0983     ALPAKA_FN_ACC void operator()(TAcc const &acc, Counters const *counters) const {
0984       auto const &c = *counters;
0985       printf(
0986           "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks  |  nLooseTracks  |  nGoodTracks | nUsedHits | "
0987           "nDupHits | nFishCells | nKilledCells | nUsedCells | nZeroTrackCells ||\n");
0988       printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
0989              c.nEvents,
0990              c.nHits,
0991              c.nCells,
0992              c.nTuples,
0993              c.nFitTracks,
0994              c.nLooseTracks,
0995              c.nGoodTracks,
0996              c.nUsedHits,
0997              c.nDupHits,
0998              c.nFishCells,
0999              c.nKilledCells,
1000              c.nEmptyCells,
1001              c.nZeroTrackCells);
1002       printf(
1003           "Counters Norm %lld ||  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.1f|  %.3f|  %.3f|  %.3f|  "
1004           "%.3f||\n",
1005           c.nEvents,
1006           c.nHits / double(c.nEvents),
1007           c.nCells / double(c.nEvents),
1008           c.nTuples / double(c.nEvents),
1009           c.nFitTracks / double(c.nEvents),
1010           c.nLooseTracks / double(c.nEvents),
1011           c.nGoodTracks / double(c.nEvents),
1012           c.nUsedHits / double(c.nEvents),
1013           c.nDupHits / double(c.nEvents),
1014           c.nFishCells / double(c.nCells),
1015           c.nKilledCells / double(c.nCells),
1016           c.nEmptyCells / double(c.nCells),
1017           c.nZeroTrackCells / double(c.nCells));
1018     }
1019   };
1020 
1021 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels
1022 
1023 #endif  // RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h