Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:34

0001 #ifndef RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
0002 #define RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 #include <cstdio>
0008 #include <limits>
0009 
0010 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0011 #include "DataFormats/Math/interface/approx_atan2.h"
0012 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0015 
0016 #include "CAStructures.h"
0017 #include "GPUCACell.h"
0018 
0019 //#define GPU_DEBUG
0020 //#define NTUPLE_DEBUG
0021 
0022 namespace gpuPixelDoublets {
0023 
0024   template <typename TrackerTraits>
0025   using CellNeighbors = caStructures::CellNeighborsT<TrackerTraits>;
0026   template <typename TrackerTraits>
0027   using CellTracks = caStructures::CellTracksT<TrackerTraits>;
0028   template <typename TrackerTraits>
0029   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0030   template <typename TrackerTraits>
0031   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0032   template <typename TrackerTraits>
0033   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0034   template <typename TrackerTraits>
0035   using HitsConstView = typename GPUCACellT<TrackerTraits>::HitsConstView;
0036 
0037   template <typename TrackerTraits>
0038   struct CellCutsT {
0039     using H = HitsConstView<TrackerTraits>;
0040     using T = TrackerTraits;
0041 
0042     CellCutsT() = default;
0043 
0044     CellCutsT(const bool doClusterCut,
0045               const bool doZ0Cut,
0046               const bool doPtCut,
0047               const bool idealConditions,
0048               const float z0Cut,
0049               const float ptCut,
0050               const std::vector<int>& phiCutsV)
0051         : doClusterCut_(doClusterCut),
0052           doZ0Cut_(doZ0Cut),
0053           doPtCut_(doPtCut),
0054           idealConditions_(idealConditions),
0055           z0Cut_(z0Cut),
0056           ptCut_(ptCut) {
0057       assert(phiCutsV.size() == TrackerTraits::nPairs);
0058       std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]);
0059     }
0060 
0061     bool doClusterCut_;
0062     bool doZ0Cut_;
0063     bool doPtCut_;
0064     bool idealConditions_;  //this is actually not used by phase2
0065 
0066     float z0Cut_;
0067     float ptCut_;
0068 
0069     int phiCuts[T::nPairs];
0070 
0071     __device__ __forceinline__ bool zSizeCut(H hh, int i, int o) const {
0072       const uint32_t mi = hh[i].detectorIndex();
0073 
0074       bool innerB1 = mi < T::last_bpix1_detIndex;
0075       bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
0076       auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
0077 
0078       if (mes < 0)
0079         return false;
0080 
0081       const uint32_t mo = hh[o].detectorIndex();
0082       auto so = hh[o].clusterSizeY();
0083 
0084       auto dz = hh[i].zGlobal() - hh[o].zGlobal();
0085       auto dr = hh[i].rGlobal() - hh[o].rGlobal();
0086 
0087       auto innerBarrel = mi < T::last_barrel_detIndex;
0088       auto onlyBarrel = mo < T::last_barrel_detIndex;
0089 
0090       if (not innerBarrel and not onlyBarrel)
0091         return false;
0092       auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize;
0093 
0094       return onlyBarrel ? so > 0 && std::abs(so - mes) > dy
0095                         : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred;
0096     }
0097 
0098     __device__ __forceinline__ bool clusterCut(H hh, int i) const {
0099       const uint32_t mi = hh[i].detectorIndex();
0100       bool innerB1orB2 = mi < T::last_bpix2_detIndex;
0101 
0102       if (!innerB1orB2)
0103         return false;
0104 
0105       bool innerB1 = mi < T::last_bpix1_detIndex;
0106       bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2;
0107       auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1;
0108 
0109       if (innerB1)  // B1
0110         if (mes > 0 && mes < T::minYsizeB1)
0111           return true;                                                                 // only long cluster  (5*8)
0112       bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex);  //FIXME number
0113       if (innerB2)                                                                     // B2 and F1
0114         if (mes > 0 && mes < T::minYsizeB2)
0115           return true;
0116 
0117       return false;
0118     }
0119   };
0120 
0121   template <typename TrackerTraits>
0122   __device__ __forceinline__ void doubletsFromHisto(uint32_t nPairs,
0123                                                     uint32_t maxNumOfDoublets,
0124                                                     GPUCACellT<TrackerTraits>* cells,
0125                                                     uint32_t* nCells,
0126                                                     CellNeighborsVector<TrackerTraits>* cellNeighbors,
0127                                                     CellTracksVector<TrackerTraits>* cellTracks,
0128                                                     HitsConstView<TrackerTraits> hh,
0129                                                     OuterHitOfCell<TrackerTraits> isOuterHitOfCell,
0130                                                     CellCutsT<TrackerTraits> const& cuts) {
0131     // ysize cuts (z in the barrel)  times 8
0132     // these are used if doClusterCut is true
0133 
0134     const bool doClusterCut = cuts.doClusterCut_;
0135     const bool doZ0Cut = cuts.doZ0Cut_;
0136     const bool doPtCut = cuts.doPtCut_;
0137 
0138     const float z0cut = cuts.z0Cut_;      // cm
0139     const float hardPtCut = cuts.ptCut_;  // GeV
0140     // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field)
0141     const float minRadius = hardPtCut * 87.78f;
0142     const float minRadius2T4 = 4.f * minRadius * minRadius;
0143 
0144     using PhiBinner = typename TrackingRecHitSoA<TrackerTraits>::PhiBinner;
0145 
0146     auto const& __restrict__ phiBinner = hh.phiBinner();
0147     uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data();
0148     assert(offsets);
0149 
0150     auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; };
0151 
0152     // nPairsMax to be optimized later (originally was 64).
0153     // If it should be much bigger, consider using a block-wide parallel prefix scan,
0154     // e.g. see  https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html
0155 
0156     __shared__ uint32_t innerLayerCumulativeSize[TrackerTraits::nPairs];
0157     __shared__ uint32_t ntot;
0158     if (threadIdx.y == 0 && threadIdx.x == 0) {
0159       innerLayerCumulativeSize[0] = layerSize(TrackerTraits::layerPairs[0]);
0160       for (uint32_t i = 1; i < nPairs; ++i) {
0161         innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(TrackerTraits::layerPairs[2 * i]);
0162       }
0163       ntot = innerLayerCumulativeSize[nPairs - 1];
0164     }
0165     __syncthreads();
0166 
0167     // x runs faster
0168     auto idy = blockIdx.y * blockDim.y + threadIdx.y;
0169     auto first = threadIdx.x;
0170     auto stride = blockDim.x;
0171 
0172     uint32_t pairLayerId = 0;  // cannot go backward
0173 
0174     for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) {
0175       while (j >= innerLayerCumulativeSize[pairLayerId++])
0176         ;
0177       --pairLayerId;  // move to lower_bound ??
0178 
0179       assert(pairLayerId < nPairs);
0180       assert(j < innerLayerCumulativeSize[pairLayerId]);
0181       assert(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]);
0182 
0183       uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId];
0184       uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1];
0185       assert(outer > inner);
0186 
0187       auto hoff = PhiBinner::histOff(outer);
0188       auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1];
0189       i += offsets[inner];
0190 
0191       assert(i >= offsets[inner]);
0192       assert(i < offsets[inner + 1]);
0193 
0194       // found hit corresponding to our cuda thread, now do the job
0195 
0196       if (hh[i].detectorIndex() > gpuClustering::maxNumModules)
0197         continue;  // invalid
0198 
0199       /* maybe clever, not effective when zoCut is on
0200       auto bpos = (mi%8)/4;  // if barrel is 1 for z>0
0201       auto fpos = (outer>3) & (outer<7);
0202       if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue;
0203       */
0204 
0205       auto mez = hh[i].zGlobal();
0206 
0207       if (mez < TrackerTraits::minz[pairLayerId] || mez > TrackerTraits::maxz[pairLayerId])
0208         continue;
0209 
0210       if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(hh, i))
0211         continue;
0212 
0213       auto mep = hh[i].iphi();
0214       auto mer = hh[i].rGlobal();
0215 
0216       auto ptcut = [&](int j, int16_t idphi) {
0217         auto r2t4 = minRadius2T4;
0218         auto ri = mer;
0219         auto ro = hh[j].rGlobal();
0220         auto dphi = short2phi(idphi);
0221         return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri);
0222       };
0223       auto z0cutoff = [&](int j) {
0224         auto zo = hh[j].zGlobal();
0225         auto ro = hh[j].rGlobal();
0226         auto dr = ro - mer;
0227         return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr;
0228       };
0229 
0230       auto iphicut = cuts.phiCuts[pairLayerId];
0231 
0232       auto kl = PhiBinner::bin(int16_t(mep - iphicut));
0233       auto kh = PhiBinner::bin(int16_t(mep + iphicut));
0234       auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); };
0235 
0236 #ifdef GPU_DEBUG
0237       int tot = 0;
0238       int nmin = 0;
0239       int tooMany = 0;
0240 #endif
0241 
0242       auto khh = kh;
0243       incr(khh);
0244       for (auto kk = kl; kk != khh; incr(kk)) {
0245 #ifdef GPU_DEBUG
0246         if (kk != kl && kk != kh)
0247           nmin += phiBinner.size(kk + hoff);
0248 #endif
0249         auto const* __restrict__ p = phiBinner.begin(kk + hoff);
0250         auto const* __restrict__ e = phiBinner.end(kk + hoff);
0251         p += first;
0252         for (; p < e; p += stride) {
0253           auto oi = __ldg(p);
0254           assert(oi >= offsets[outer]);
0255           assert(oi < offsets[outer + 1]);
0256           auto mo = hh[oi].detectorIndex();
0257 
0258           if (mo > gpuClustering::maxNumModules)
0259             continue;  //    invalid
0260 
0261           if (doZ0Cut && z0cutoff(oi))
0262             continue;
0263           auto mop = hh[oi].iphi();
0264           uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop)));
0265           if (idphi > iphicut)
0266             continue;
0267 
0268           if (doClusterCut && cuts.zSizeCut(hh, i, oi))
0269             continue;
0270           if (doPtCut && ptcut(oi, idphi))
0271             continue;
0272 
0273           auto ind = atomicAdd(nCells, 1);
0274           if (ind >= maxNumOfDoublets) {
0275             atomicSub(nCells, 1);
0276             break;
0277           }  // move to SimpleVector??
0278           // int layerPairId, int doubletId, int innerHitId, int outerHitId)
0279           cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi);
0280           isOuterHitOfCell[oi].push_back(ind);
0281 #ifdef GPU_DEBUG
0282           if (isOuterHitOfCell[oi].full())
0283             ++tooMany;
0284           ++tot;
0285 #endif
0286         }
0287       }
0288 //      #endif
0289 #ifdef GPU_DEBUG
0290       if (tooMany > 0 || tot > 0)
0291         printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n",
0292                i,
0293                inner,
0294                outer,
0295                nmin,
0296                tot,
0297                tooMany,
0298                iphicut,
0299                TrackerTraits::minz[pairLayerId],
0300                TrackerTraits::maxz[pairLayerId],
0301                tooMany > 0 ? "FULL!!" : "not full.");
0302 #endif
0303     }  // loop in block...
0304   }
0305 
0306 }  // namespace gpuPixelDoublets
0307 
0308 #endif  // RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h