Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-02-09 23:42:07

0001 #ifndef RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
0002 #define RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 #include <cstdio>
0008 #include <limits>
0009 
0010 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0011 #include "DataFormats/Math/interface/approx_atan2.h"
0012 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0015 
0016 #include "CAStructures.h"
0017 #include "GPUCACell.h"
0018 
0019 //#define GPU_DEBUG
0020 //#define NTUPLE_DEBUG
0021 
0022 namespace gpuPixelDoublets {
0023 
0024   template <typename TrackerTraits>
0025   using CellNeighbors = caStructures::CellNeighborsT<TrackerTraits>;
0026   template <typename TrackerTraits>
0027   using CellTracks = caStructures::CellTracksT<TrackerTraits>;
0028   template <typename TrackerTraits>
0029   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0030   template <typename TrackerTraits>
0031   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0032   template <typename TrackerTraits>
0033   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0034   template <typename TrackerTraits>
0035   using HitsConstView = typename GPUCACellT<TrackerTraits>::HitsConstView;
0036 
0037   template <typename TrackerTraits>
0038   struct CellCutsT {
0039     using H = HitsConstView<TrackerTraits>;
0040     using T = TrackerTraits;
0041 
0042     CellCutsT() = default;
0043 
0044     CellCutsT(const bool doClusterCut,
0045               const bool doZ0Cut,
0046               const bool doPtCut,
0047               const bool idealConditions,
0048               const float z0Cut,
0049               const float ptCut,
0050               const int minYsizeB1,
0051               const int minYsizeB2,
0052               const std::vector<int>& phiCutsV)
0053         : doClusterCut_(doClusterCut),
0054           doZ0Cut_(doZ0Cut),
0055           doPtCut_(doPtCut),
0056           idealConditions_(idealConditions),
0057           z0Cut_(z0Cut),
0058           ptCut_(ptCut),
0059           minYsizeB1_(minYsizeB1),
0060           minYsizeB2_(minYsizeB2) {
0061       assert(phiCutsV.size() == TrackerTraits::nPairs);
0062       std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
0063     }
0064 
0065     bool doClusterCut_;
0066     bool doZ0Cut_;
0067     bool doPtCut_;
0068     bool idealConditions_;  //this is actually not used by phase2
0069 
0070     float z0Cut_;
0071     float ptCut_;
0072 
0073     int minYsizeB1_;
0074     int minYsizeB2_;
0075 
0076     int phiCuts[T::nPairs];
0077 
0078     __device__ __forceinline__ bool zSizeCut(H hh, int i, int o) const {
0079       const uint32_t mi = hh[i].detectorIndex();
0080 
0081       bool innerB1 = mi < T::last_bpix1_detIndex;
0082       bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
0083       auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
0084 
0085       if (mes < 0)
0086         return false;
0087 
0088       const uint32_t mo = hh[o].detectorIndex();
0089       auto so = hh[o].clusterSizeY();
0090 
0091       auto dz = hh[i].zGlobal() - hh[o].zGlobal();
0092       auto dr = hh[i].rGlobal() - hh[o].rGlobal();
0093 
0094       auto innerBarrel = mi < T::last_barrel_detIndex;
0095       auto onlyBarrel = mo < T::last_barrel_detIndex;
0096 
0097       if (not innerBarrel and not onlyBarrel)
0098         return false;
0099       auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
0100 
0101       return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
0102                         : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
0103     }
0104 
0105     __device__ __forceinline__ bool clusterCut(H hh, int i) const {
0106       const uint32_t mi = hh[i].detectorIndex();
0107       bool innerB1orB2 = mi < T::last_bpix2_detIndex;
0108 
0109       if (!innerB1orB2)
0110         return false;
0111 
0112       bool innerB1 = mi < T::last_bpix1_detIndex;
0113       bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
0114       auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
0115 
0116       if (innerB1)  // B1
0117         if (mes > 0 && mes < minYsizeB1_)
0118           return true;                                                                 // only long cluster  (5*8)
0119       bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex);  //FIXME number
0120       if (innerB2)                                                                     // B2 and F1
0121         if (mes > 0 && mes < minYsizeB2_)
0122           return true;
0123 
0124       return false;
0125     }
0126   };
0127 
0128   template <typename TrackerTraits>
0129   __device__ __forceinline__ void doubletsFromHisto(uint32_t nPairs,
0130                                                     uint32_t maxNumOfDoublets,
0131                                                     GPUCACellT<TrackerTraits>* cells,
0132                                                     uint32_t* nCells,
0133                                                     CellNeighborsVector<TrackerTraits>* cellNeighbors,
0134                                                     CellTracksVector<TrackerTraits>* cellTracks,
0135                                                     HitsConstView<TrackerTraits> hh,
0136                                                     OuterHitOfCell<TrackerTraits> isOuterHitOfCell,
0137                                                     CellCutsT<TrackerTraits> const& cuts) {
0138     // ysize cuts (z in the barrel)  times 8
0139     // these are used if doClusterCut is true
0140 
0141     const bool doClusterCut = cuts.doClusterCut_;
0142     const bool doZ0Cut = cuts.doZ0Cut_;
0143     const bool doPtCut = cuts.doPtCut_;
0144 
0145     const float z0cut = cuts.z0Cut_;      // cm
0146     const float hardPtCut = cuts.ptCut_;  // GeV
0147     // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
0148     const float minRadius = hardPtCut * 87.78f;
0149     const float minRadius2T4 = 4.f * minRadius * minRadius;
0150 
0151     using PhiBinner = typename TrackingRecHitSoA<TrackerTraits>::PhiBinner;
0152 
0153     auto const& __restrict__ phiBinner = hh.phiBinner();
0154     uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
0155     assert(offsets);
0156 
0157     auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
0158 
0159     // nPairsMax to be optimized later (originally was 64).
0160     // If it should be much bigger, consider using a block-wide parallel prefix scan,
0161     // e.g. see  https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
0162 
0163     __shared__ uint32_t innerLayerCumulativeSize[TrackerTraits::nPairs];
0164     __shared__ uint32_t ntot;
0165     if (threadIdx.y == 0 && threadIdx.x == 0) {
0166       innerLayerCumulativeSize[0] = layerSize(TrackerTraits::layerPairs[0]);
0167       for (uint32_t i = 1; i < nPairs; ++i) {
0168         innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(TrackerTraits::layerPairs[2 * i]);
0169       }
0170       ntot = innerLayerCumulativeSize[nPairs - 1];
0171     }
0172     __syncthreads();
0173 
0174     // x runs faster
0175     auto idy = blockIdx.y * blockDim.y + threadIdx.y;
0176     auto first = threadIdx.x;
0177     auto stride = blockDim.x;
0178 
0179     uint32_t pairLayerId = 0;  // cannot go backward
0180 
0181     for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) {
0182       while (j >= innerLayerCumulativeSize[pairLayerId++])
0183         ;
0184       --pairLayerId;  // move to lower_bound ??
0185 
0186       assert(pairLayerId < nPairs);
0187       assert(j < innerLayerCumulativeSize[pairLayerId]);
0188       assert(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]);
0189 
0190       uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId];
0191       uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
0192       assert(outer > inner);
0193 
0194       auto hoff = PhiBinner::histOff(outer);
0195       auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
0196       i += offsets[inner];
0197 
0198       assert(i >= offsets[inner]);
0199       assert(i < offsets[inner + 1]);
0200 
0201       // found hit corresponding to our cuda thread, now do the job
0202 
0203       if (hh[i].detectorIndex() > gpuClustering::maxNumModules)
0204         continue;  // invalid
0205 
0206       /* maybe clever, not effective when zoCut is on
0207       auto bpos = (mi%8)/4;  // if barrel is 1 for z>0
0208       auto fpos = (outer>3) & (outer<7);
0209       if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
0210       */
0211 
0212       auto mez = hh[i].zGlobal();
0213 
0214       if (mez < TrackerTraits::minz[pairLayerId] || mez > TrackerTraits::maxz[pairLayerId])
0215         continue;
0216 
0217       if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(hh, i))
0218         continue;
0219 
0220       auto mep = hh[i].iphi();
0221       auto mer = hh[i].rGlobal();
0222 
0223       auto ptcut = [&](int j, int16_t idphi) {
0224         auto r2t4 = minRadius2T4;
0225         auto ri = mer;
0226         auto ro = hh[j].rGlobal();
0227         auto dphi = short2phi(idphi);
0228         return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
0229       };
0230       auto z0cutoff = [&](int j) {
0231         auto zo = hh[j].zGlobal();
0232         auto ro = hh[j].rGlobal();
0233         auto dr = ro - mer;
0234         return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
0235       };
0236 
0237       auto iphicut = cuts.phiCuts[pairLayerId];
0238 
0239       auto kl = PhiBinner::bin(int16_t(mep - iphicut));
0240       auto kh = PhiBinner::bin(int16_t(mep + iphicut));
0241       auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
0242 
0243 #ifdef GPU_DEBUG
0244       int tot = 0;
0245       int nmin = 0;
0246       int tooMany = 0;
0247 #endif
0248 
0249       auto khh = kh;
0250       incr(khh);
0251       for (auto kk = kl; kk != khh; incr(kk)) {
0252 #ifdef GPU_DEBUG
0253         if (kk != kl && kk != kh)
0254           nmin += phiBinner.size(kk + hoff);
0255 #endif
0256         auto const* __restrict__ p = phiBinner.begin(kk + hoff);
0257         auto const* __restrict__ e = phiBinner.end(kk + hoff);
0258         p += first;
0259         for (; p < e; p += stride) {
0260           auto oi = __ldg(p);
0261           assert(oi >= offsets[outer]);
0262           assert(oi < offsets[outer + 1]);
0263           auto mo = hh[oi].detectorIndex();
0264 
0265           if (mo > gpuClustering::maxNumModules)
0266             continue;  //    invalid
0267 
0268           if (doZ0Cut && z0cutoff(oi))
0269             continue;
0270           auto mop = hh[oi].iphi();
0271           uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
0272           if (idphi > iphicut)
0273             continue;
0274 
0275           if (doClusterCut && cuts.zSizeCut(hh, i, oi))
0276             continue;
0277           if (doPtCut && ptcut(oi, idphi))
0278             continue;
0279 
0280           auto ind = atomicAdd(nCells, 1);
0281           if (ind >= maxNumOfDoublets) {
0282             atomicSub(nCells, 1);
0283             break;
0284           }  // move to SimpleVector??
0285           // int layerPairId, int doubletId, int innerHitId, int outerHitId)
0286           cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
0287           isOuterHitOfCell[oi].push_back(ind);
0288 #ifdef GPU_DEBUG
0289           if (isOuterHitOfCell[oi].full())
0290             ++tooMany;
0291           ++tot;
0292 #endif
0293         }
0294       }
0295 //      #endif
0296 #ifdef GPU_DEBUG
0297       if (tooMany > 0 || tot > 0)
0298         printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
0299                i,
0300                inner,
0301                outer,
0302                nmin,
0303                tot,
0304                tooMany,
0305                iphicut,
0306                TrackerTraits::minz[pairLayerId],
0307                TrackerTraits::maxz[pairLayerId],
0308                tooMany > 0 ? "FULL!!" : "not full.");
0309 #endif
0310     }  // loop in block...
0311   }
0312 
0313 }  // namespace gpuPixelDoublets
0314 
0315 #endif  // RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h