Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:38:01

0001 #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
0002 #define RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 #include <cstdio>
0008 #include <limits>
0009 
0010 #include <alpaka/alpaka.hpp>
0011 
0012 #include "DataFormats/Math/interface/approx_atan2.h"
0013 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0014 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0015 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0016 #include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h"
0017 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0018 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0019 
0020 #include "CACell.h"
0021 #include "CAStructures.h"
0022 
0023 //#define GPU_DEBUG
0024 //#define NTUPLE_DEBUG
0025 
0026 namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets {
0027   using namespace cms::alpakatools;
0028 
0029   template <typename TrackerTraits>
0030   using CellNeighbors = caStructures::CellNeighborsT<TrackerTraits>;
0031   template <typename TrackerTraits>
0032   using CellTracks = caStructures::CellTracksT<TrackerTraits>;
0033   template <typename TrackerTraits>
0034   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0035   template <typename TrackerTraits>
0036   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0037   template <typename TrackerTraits>
0038   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0039   template <typename TrackerTraits>
0040   using HitsConstView = typename CACellT<TrackerTraits>::HitsConstView;
0041 
0042   template <typename TrackerTraits>
0043   struct CellCutsT {
0044     using H = HitsConstView<TrackerTraits>;
0045     using T = TrackerTraits;
0046 
0047     CellCutsT() = default;
0048 
0049     CellCutsT(const bool doClusterCut,
0050               const bool doZ0Cut,
0051               const bool doPtCut,
0052               const bool idealConditions,
0053               const float z0Cut,
0054               const float ptCut,
0055               const std::vector<int>& phiCutsV)
0056         : doClusterCut_(doClusterCut),
0057           doZ0Cut_(doZ0Cut),
0058           doPtCut_(doPtCut),
0059           idealConditions_(idealConditions),
0060           z0Cut_(z0Cut),
0061           ptCut_(ptCut) {
0062       assert(phiCutsV.size() == TrackerTraits::nPairs);
0063       std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
0064     }
0065 
0066     bool doClusterCut_;
0067     bool doZ0Cut_;
0068     bool doPtCut_;
0069     bool idealConditions_;  //this is actually not used by phase2
0070 
0071     float z0Cut_;  //FIXME: check if could be const now
0072     float ptCut_;
0073 
0074     int phiCuts[T::nPairs];
0075 
0076     template <typename TAcc>
0077     ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) zSizeCut(const TAcc& acc,
0078                                                                                 H hh,
0079                                                                                 int i,
0080                                                                                 int o) const {
0081       const uint32_t mi = hh[i].detectorIndex();
0082 
0083       bool innerB1 = mi < T::last_bpix1_detIndex;
0084       bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
0085       auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
0086 
0087       if (mes < 0)
0088         return false;
0089 
0090       const uint32_t mo = hh[o].detectorIndex();
0091       auto so = hh[o].clusterSizeY();
0092 
0093       auto dz = hh[i].zGlobal() - hh[o].zGlobal();
0094       auto dr = hh[i].rGlobal() - hh[o].rGlobal();
0095 
0096       auto innerBarrel = mi < T::last_barrel_detIndex;
0097       auto onlyBarrel = mo < T::last_barrel_detIndex;
0098 
0099       if (not innerBarrel and not onlyBarrel)
0100         return false;
0101       auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
0102 
0103       return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
0104                         : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
0105     }
0106 
0107     template <typename TAcc>
0108     ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) clusterCut(const TAcc& acc,
0109                                                                                   H hh,
0110                                                                                   uint32_t i) const {
0111       const uint32_t mi = hh[i].detectorIndex();
0112       bool innerB1orB2 = mi < T::last_bpix2_detIndex;
0113 
0114       if (!innerB1orB2)
0115         return false;
0116 
0117       bool innerB1 = mi < T::last_bpix1_detIndex;
0118       bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
0119       auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
0120 
0121       if (innerB1)  // B1
0122         if (mes > 0 && mes < T::minYsizeB1)
0123           return true;                                                                 // only long cluster  (5*8)
0124       bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex);  //FIXME number
0125       if (innerB2)                                                                     // B2 and F1
0126         if (mes > 0 && mes < T::minYsizeB2)
0127           return true;
0128 
0129       return false;
0130     }
0131   };
0132 
0133   template <typename TrackerTraits, typename TAcc>
0134   ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto(
0135       const TAcc& acc,
0136       uint32_t nPairs,
0137       const uint32_t maxNumOfDoublets,
0138       CACellT<TrackerTraits>* cells,
0139       uint32_t* nCells,
0140       CellNeighborsVector<TrackerTraits>* cellNeighbors,
0141       CellTracksVector<TrackerTraits>* cellTracks,
0142       HitsConstView<TrackerTraits> hh,
0143       OuterHitOfCell<TrackerTraits> isOuterHitOfCell,
0144       CellCutsT<TrackerTraits> const& cuts) {  // ysize cuts (z in the barrel)  times 8
0145                                                // these are used if doClusterCut is true
0146 
0147     const bool doClusterCut = cuts.doClusterCut_;
0148     const bool doZ0Cut = cuts.doZ0Cut_;
0149     const bool doPtCut = cuts.doPtCut_;
0150 
0151     const float z0cut = cuts.z0Cut_;      // cm
0152     const float hardPtCut = cuts.ptCut_;  // GeV
0153     // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
0154     const float minRadius = hardPtCut * 87.78f;
0155     const float minRadius2T4 = 4.f * minRadius * minRadius;
0156 
0157     using PhiBinner = typename TrackingRecHitSoA<TrackerTraits>::PhiBinner;
0158 
0159     auto const& __restrict__ phiBinner = hh.phiBinner();
0160     uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
0161     ALPAKA_ASSERT_ACC(offsets);
0162 
0163     auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
0164 
0165     // nPairsMax to be optimized later (originally was 64).
0166     // If it should much be bigger, consider using a block-wide parallel prefix scan,
0167     // e.g. see  https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
0168     auto& innerLayerCumulativeSize = alpaka::declareSharedVar<uint32_t[TrackerTraits::nPairs], __COUNTER__>(acc);
0169     auto& ntot = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
0170 
0171     constexpr uint32_t dimIndexY = 0u;
0172     constexpr uint32_t dimIndexX = 1u;
0173     const uint32_t threadIdxLocalY(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexY]);
0174     const uint32_t threadIdxLocalX(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndexX]);
0175 
0176     if (threadIdxLocalY == 0 && threadIdxLocalX == 0) {
0177       innerLayerCumulativeSize[0] = layerSize(TrackerTraits::layerPairs[0]);
0178       for (uint32_t i = 1; i < nPairs; ++i) {
0179         innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(TrackerTraits::layerPairs[2 * i]);
0180       }
0181       ntot = innerLayerCumulativeSize[nPairs - 1];
0182     }
0183     alpaka::syncBlockThreads(acc);
0184 
0185     // declared outside the loop, as it cannot go backward
0186     uint32_t pairLayerId = 0;
0187 
0188     // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid)
0189     for (uint32_t j : cms::alpakatools::uniform_elements_y(acc, ntot)) {
0190       // move to lower_bound ?
0191       while (j >= innerLayerCumulativeSize[pairLayerId++])
0192         ;
0193       --pairLayerId;
0194 
0195       ALPAKA_ASSERT_ACC(pairLayerId < nPairs);
0196       ALPAKA_ASSERT_ACC(j < innerLayerCumulativeSize[pairLayerId]);
0197       ALPAKA_ASSERT_ACC(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]);
0198 
0199       uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId];
0200       uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
0201       ALPAKA_ASSERT_ACC(outer > inner);
0202 
0203       auto hoff = PhiBinner::histOff(outer);
0204       auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
0205       i += offsets[inner];
0206 
0207       ALPAKA_ASSERT_ACC(i >= offsets[inner]);
0208       ALPAKA_ASSERT_ACC(i < offsets[inner + 1]);
0209 
0210       // found hit corresponding to our worker thread, now do the job
0211       if (hh[i].detectorIndex() > pixelClustering::maxNumModules)
0212         continue;  // invalid
0213 
0214       /* maybe clever, not effective when zoCut is on
0215       auto bpos = (mi%8)/4;  // if barrel is 1 for z>0
0216       auto fpos = (outer>3) & (outer<7);
0217       if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
0218       */
0219 
0220       auto mez = hh[i].zGlobal();
0221 
0222       if (mez < TrackerTraits::minz[pairLayerId] || mez > TrackerTraits::maxz[pairLayerId])
0223         continue;
0224 
0225       if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i))
0226         continue;
0227 
0228       auto mep = hh[i].iphi();
0229       auto mer = hh[i].rGlobal();
0230 
0231       // all cuts: true if fails
0232       auto ptcut = [&](int j, int16_t idphi) {
0233         auto r2t4 = minRadius2T4;
0234         auto ri = mer;
0235         auto ro = hh[j].rGlobal();
0236         auto dphi = short2phi(idphi);
0237         return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
0238       };
0239       auto z0cutoff = [&](int j) {
0240         auto zo = hh[j].zGlobal();
0241         auto ro = hh[j].rGlobal();
0242         auto dr = ro - mer;
0243         return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
0244       };
0245 
0246       auto iphicut = cuts.phiCuts[pairLayerId];
0247 
0248       auto kl = PhiBinner::bin(int16_t(mep - iphicut));
0249       auto kh = PhiBinner::bin(int16_t(mep + iphicut));
0250       auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
0251 
0252 #ifdef GPU_DEBUG
0253       int tot = 0;
0254       int nmin = 0;
0255       int tooMany = 0;
0256 #endif
0257 
0258       auto khh = kh;
0259       incr(khh);
0260       for (auto kk = kl; kk != khh; incr(kk)) {
0261 #ifdef GPU_DEBUG
0262         if (kk != kl && kk != kh)
0263           nmin += phiBinner.size(kk + hoff);
0264 #endif
0265         auto const* __restrict__ p = phiBinner.begin(kk + hoff);
0266         auto const* __restrict__ e = phiBinner.end(kk + hoff);
0267         auto const maxpIndex = e - p;
0268 
0269         // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid)
0270         for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) {
0271           // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough?
0272           auto oi = p[pIndex];
0273           ALPAKA_ASSERT_ACC(oi >= offsets[outer]);
0274           ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]);
0275           auto mo = hh[oi].detectorIndex();
0276 
0277           // invalid
0278           if (mo > pixelClustering::maxNumModules)
0279             continue;
0280 
0281           if (doZ0Cut && z0cutoff(oi))
0282             continue;
0283 
0284           auto mop = hh[oi].iphi();
0285           uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
0286 
0287           if (idphi > iphicut)
0288             continue;
0289 
0290           if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi))
0291             continue;
0292 
0293           if (doPtCut && ptcut(oi, idphi))
0294             continue;
0295 
0296           auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
0297           if (ind >= maxNumOfDoublets) {
0298             alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{});
0299             break;
0300           }  // move to SimpleVector??
0301           cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
0302           isOuterHitOfCell[oi].push_back(acc, ind);
0303 #ifdef GPU_DEBUG
0304           if (isOuterHitOfCell[oi].full())
0305             ++tooMany;
0306           ++tot;
0307 #endif
0308         }
0309       }
0310 //      #endif
0311 #ifdef GPU_DEBUG
0312       if (tooMany > 0 or tot > 0)
0313         printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
0314                i,
0315                inner,
0316                outer,
0317                nmin,
0318                tot,
0319                tooMany,
0320                iphicut,
0321                TrackerTraits::minz[pairLayerId],
0322                TrackerTraits::maxz[pairLayerId],
0323                tooMany > 0 ? "FULL!!" : "not full.");
0324 #endif
0325     }  // loop in block...
0326   }
0327 
0328 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets
0329 
0330 #endif  // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoubletsAlgos_h