Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-01-19 02:53:22

0001 #ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACellT_h
0002 #define RecoPixelVertexing_PixelTriplets_plugins_GPUCACellT_h
0003 
0004 //
0005 // Author: Felice Pantaleo, CERN
0006 //
0007 
0008 // #define ONLY_TRIPLETS_IN_HOLE
0009 
0010 #include <cuda_runtime.h>
0011 
0012 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0016 #include "RecoPixelVertexing/PixelTriplets/interface/CircleEq.h"
0017 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0018 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0019 #include "CAStructures.h"
0020 
0021 template <typename TrackerTraits>
0022 class GPUCACellT {
0023 public:
0024   using PtrAsInt = unsigned long long;
0025 
0026   static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit;
0027   using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT<TrackerTraits>;
0028   using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0029   using CellNeighbors = caStructures::CellNeighborsT<TrackerTraits>;
0030   using CellTracks = caStructures::CellTracksT<TrackerTraits>;
0031   using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0032   using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0033 
0034   using HitsConstView = TrackingRecHitSoAConstView<TrackerTraits>;
0035   using hindex_type = typename TrackerTraits::hindex_type;
0036   using tindex_type = typename TrackerTraits::tindex_type;
0037   static constexpr auto invalidHitId = std::numeric_limits<hindex_type>::max();
0038 
0039   using TmpTuple = cms::cuda::VecArray<uint32_t, TrackerTraits::maxDepth>;
0040 
0041   using HitContainer = typename TrackSoA<TrackerTraits>::HitContainer;
0042   using Quality = pixelTrack::Quality;
0043   static constexpr auto bad = pixelTrack::Quality::bad;
0044 
0045   enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
0046 
0047   GPUCACellT() = default;
0048 
0049   __device__ __forceinline__ void init(CellNeighborsVector& cellNeighbors,
0050                                        CellTracksVector& cellTracks,
0051                                        const HitsConstView& hh,
0052                                        int layerPairId,
0053                                        hindex_type innerHitId,
0054                                        hindex_type outerHitId) {
0055     theInnerHitId = innerHitId;
0056     theOuterHitId = outerHitId;
0057     theLayerPairId_ = layerPairId;
0058     theStatus_ = 0;
0059     theFishboneId = invalidHitId;
0060 
0061     // optimization that depends on access pattern
0062     theInnerZ = hh[innerHitId].zGlobal();
0063     theInnerR = hh[innerHitId].rGlobal();
0064 
0065     // link to default empty
0066     theOuterNeighbors = &cellNeighbors[0];
0067     theTracks = &cellTracks[0];
0068     assert(outerNeighbors().empty());
0069     assert(tracks().empty());
0070   }
0071 
0072   __device__ __forceinline__ int addOuterNeighbor(typename TrackerTraits::cindex_type t,
0073                                                   CellNeighborsVector& cellNeighbors) {
0074     // use smart cache
0075     if (outerNeighbors().empty()) {
0076       auto i = cellNeighbors.extend();  // maybe wasted....
0077       if (i > 0) {
0078         cellNeighbors[i].reset();
0079         __threadfence();
0080 #ifdef __CUDACC__
0081         auto zero = (PtrAsInt)(&cellNeighbors[0]);
0082         atomicCAS((PtrAsInt*)(&theOuterNeighbors),
0083                   zero,
0084                   (PtrAsInt)(&cellNeighbors[i]));  // if fails we cannot give "i" back...
0085 #else
0086         theOuterNeighbors = &cellNeighbors[i];
0087 #endif
0088       } else
0089         return -1;
0090     }
0091     __threadfence();
0092     return outerNeighbors().push_back(t);
0093   }
0094 
0095   __device__ __forceinline__ int addTrack(tindex_type t, CellTracksVector& cellTracks) {
0096     if (tracks().empty()) {
0097       auto i = cellTracks.extend();  // maybe wasted....
0098       if (i > 0) {
0099         cellTracks[i].reset();
0100         __threadfence();
0101 #ifdef __CUDACC__
0102         auto zero = (PtrAsInt)(&cellTracks[0]);
0103         atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i]));  // if fails we cannot give "i" back...
0104 #else
0105         theTracks = &cellTracks[i];
0106 #endif
0107       } else
0108         return -1;
0109     }
0110     __threadfence();
0111     return tracks().push_back(t);
0112   }
0113 
0114   __device__ __forceinline__ CellTracks& tracks() { return *theTracks; }
0115   __device__ __forceinline__ CellTracks const& tracks() const { return *theTracks; }
0116   __device__ __forceinline__ CellNeighbors& outerNeighbors() { return *theOuterNeighbors; }
0117   __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
0118   __device__ __forceinline__ float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); }
0119   __device__ __forceinline__ float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); }
0120   __device__ __forceinline__ float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); }
0121   __device__ __forceinline__ float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); }
0122   __device__ __forceinline__ float inner_z(const HitsConstView& hh) const { return theInnerZ; }
0123   // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
0124   __device__ __forceinline__ float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); }
0125   __device__ __forceinline__ float inner_r(const HitsConstView& hh) const { return theInnerR; }
0126   // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
0127   __device__ __forceinline__ float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); }
0128 
0129   __device__ __forceinline__ auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); }
0130   __device__ __forceinline__ auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); }
0131 
0132   __device__ __forceinline__ float inner_detIndex(const HitsConstView& hh) const {
0133     return hh[theInnerHitId].detectorIndex();
0134   }
0135   __device__ __forceinline__ float outer_detIndex(const HitsConstView& hh) const {
0136     return hh[theOuterHitId].detectorIndex();
0137   }
0138 
0139   constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
0140   constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
0141 
0142   __device__ void print_cell() const {
0143     printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
0144            theLayerPairId_,
0145            theInnerHitId,
0146            theOuterHitId);
0147   }
0148 
0149   __device__ bool check_alignment(const HitsConstView& hh,
0150                                   GPUCACellT const& otherCell,
0151                                   const float ptmin,
0152                                   const float hardCurvCut,
0153                                   const float caThetaCutBarrel,
0154                                   const float caThetaCutForward,
0155                                   const float dcaCutInnerTriplet,
0156                                   const float dcaCutOuterTriplet) const {
0157     // detIndex of the layerStart for the Phase1 Pixel Detector:
0158     // [BPX1, BPX2, BPX3, BPX4,  FP1,  FP2,  FP3,  FN1,  FN2,  FN3, LAST_VALID]
0159     // [   0,   96,  320,  672, 1184, 1296, 1408, 1520, 1632, 1744,       1856]
0160     auto ri = inner_r(hh);
0161     auto zi = inner_z(hh);
0162 
0163     auto ro = outer_r(hh);
0164     auto zo = outer_z(hh);
0165 
0166     auto r1 = otherCell.inner_r(hh);
0167     auto z1 = otherCell.inner_z(hh);
0168     auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex;
0169     bool aligned = areAlignedRZ(r1,
0170                                 z1,
0171                                 ri,
0172                                 zi,
0173                                 ro,
0174                                 zo,
0175                                 ptmin,
0176                                 isBarrel ? caThetaCutBarrel : caThetaCutForward);  // 2.f*thetaCut); // FIXME tune cuts
0177     return (aligned && dcaCut(hh,
0178                               otherCell,
0179                               otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet
0180                                                                                                 : dcaCutOuterTriplet,
0181                               hardCurvCut));  // FIXME tune cuts
0182   }
0183 
0184   __device__ __forceinline__ static bool areAlignedRZ(
0185       float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
0186     float radius_diff = std::abs(r1 - ro);
0187     float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
0188 
0189     float pMin = ptmin * std::sqrt(distance_13_squared);  // this needs to be divided by
0190                                                           // radius_diff later
0191 
0192     float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
0193     return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff;
0194   }
0195 
0196   __device__ inline bool dcaCut(const HitsConstView& hh,
0197                                 GPUCACellT const& otherCell,
0198                                 const float region_origin_radius_plus_tolerance,
0199                                 const float maxCurv) const {
0200     auto x1 = otherCell.inner_x(hh);
0201     auto y1 = otherCell.inner_y(hh);
0202 
0203     auto x2 = inner_x(hh);
0204     auto y2 = inner_y(hh);
0205 
0206     auto x3 = outer_x(hh);
0207     auto y3 = outer_y(hh);
0208 
0209     CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
0210 
0211     if (eq.curvature() > maxCurv)
0212       return false;
0213 
0214     return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
0215   }
0216 
0217   __device__ __forceinline__ static bool dcaCutH(float x1,
0218                                                  float y1,
0219                                                  float x2,
0220                                                  float y2,
0221                                                  float x3,
0222                                                  float y3,
0223                                                  const float region_origin_radius_plus_tolerance,
0224                                                  const float maxCurv) {
0225     CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
0226 
0227     if (eq.curvature() > maxCurv)
0228       return false;
0229 
0230     return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
0231   }
0232 
0233   __device__ inline bool hole0(const HitsConstView& hh, GPUCACellT const& innerCell) const {
0234     using namespace phase1PixelTopology;
0235 
0236     int p = innerCell.inner_iphi(hh);
0237     if (p < 0)
0238       p += std::numeric_limits<unsigned short>::max();
0239     p = (max_ladder_bpx0 * p) / std::numeric_limits<unsigned short>::max();
0240     p %= max_ladder_bpx0;
0241     auto il = first_ladder_bpx0 + p;
0242     auto r0 = hh.averageGeometry().ladderR[il];
0243     auto ri = innerCell.inner_r(hh);
0244     auto zi = innerCell.inner_z(hh);
0245     auto ro = outer_r(hh);
0246     auto zo = outer_z(hh);
0247     auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
0248     auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
0249     auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
0250     auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
0251     return gap;
0252   }
0253 
0254   __device__ inline bool hole4(const HitsConstView& hh, GPUCACellT const& innerCell) const {
0255     using namespace phase1PixelTopology;
0256 
0257     int p = outer_iphi(hh);
0258     if (p < 0)
0259       p += std::numeric_limits<unsigned short>::max();
0260     p = (max_ladder_bpx4 * p) / std::numeric_limits<unsigned short>::max();
0261     p %= max_ladder_bpx4;
0262     auto il = first_ladder_bpx4 + p;
0263     auto r4 = hh.averageGeometry().ladderR[il];
0264     auto ri = innerCell.inner_r(hh);
0265     auto zi = innerCell.inner_z(hh);
0266     auto ro = outer_r(hh);
0267     auto zo = outer_z(hh);
0268     auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
0269     auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
0270     auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
0271     auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
0272     auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
0273     auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
0274     return gap || holeP || holeN;
0275   }
0276 
0277   // trying to free the track building process from hardcoded layers, leaving
0278   // the visit of the graph based on the neighborhood connections between cells.
0279 
0280   template <int DEPTH>
0281   __device__ inline void find_ntuplets(const HitsConstView& hh,
0282                                        GPUCACellT* __restrict__ cells,
0283                                        CellTracksVector& cellTracks,
0284                                        HitContainer& foundNtuplets,
0285                                        cms::cuda::AtomicPairCounter& apc,
0286                                        Quality* __restrict__ quality,
0287                                        TmpTuple& tmpNtuplet,
0288                                        const unsigned int minHitsPerNtuplet,
0289                                        bool startAt0) const {
0290     // the building process for a track ends if:
0291     // it has no right neighbor
0292     // it has no compatible neighbor
0293     // the ntuplets is then saved if the number of hits it contains is greater
0294     // than a threshold
0295 
0296     if constexpr (DEPTH <= 0) {
0297       printf("ERROR: GPUCACellT::find_ntuplets reached full depth!\n");
0298 #ifdef __CUDA_ARCH__
0299       __trap();
0300 #else
0301       abort();
0302 #endif
0303     } else {
0304       auto doubletId = this - cells;
0305       tmpNtuplet.push_back_unsafe(doubletId);
0306       assert(tmpNtuplet.size() <=
0307              int(TrackerTraits::maxHitsOnTrack -
0308                  3));  //1 for the container, 1 because these are doublets, 1 because we may push another
0309 
0310       bool last = true;
0311       for (unsigned int otherCell : outerNeighbors()) {
0312         if (cells[otherCell].isKilled())
0313           continue;  // killed by earlyFishbone
0314         last = false;
0315         cells[otherCell].template find_ntuplets<DEPTH - 1>(
0316             hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
0317       }
0318 
0319       if (last) {  // if long enough save...
0320         if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
0321 #ifdef ONLY_TRIPLETS_IN_HOLE
0322           // triplets accepted only pointing to the hole
0323           if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
0324               ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
0325 #endif
0326           {
0327             hindex_type hits[TrackerTraits::maxDepth + 2];
0328             auto nh = 0U;
0329             constexpr int maxFB = 2;  // for the time being let's limit this
0330             int nfb = 0;
0331             for (auto c : tmpNtuplet) {
0332               hits[nh++] = cells[c].theInnerHitId;
0333               if (nfb < maxFB && cells[c].hasFishbone()) {
0334                 ++nfb;
0335                 hits[nh++] = cells[c].theFishboneId;  // fishbone hit is always outer than inner hit
0336               }
0337             }
0338             assert(nh < TrackerTraits::maxHitsOnTrack);
0339             hits[nh] = theOuterHitId;
0340             auto it = foundNtuplets.bulkFill(apc, hits, nh + 1);
0341             if (it >= 0) {  // if negative is overflow....
0342               for (auto c : tmpNtuplet)
0343                 cells[c].addTrack(it, cellTracks);
0344               quality[it] = bad;  // initialize to bad
0345             }
0346           }
0347         }
0348       }
0349       tmpNtuplet.pop_back();
0350       assert(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1));
0351     }
0352   }
0353 
0354   // Cell status management
0355   __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
0356   __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
0357 
0358   __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
0359 
0360   __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
0361   __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
0362 
0363   __device__ __forceinline__ void setFishbone(hindex_type id, float z, const HitsConstView& hh) {
0364     // make it deterministic: use the farther apart (in z)
0365     auto old = theFishboneId;
0366     while (old !=
0367            atomicCAS(
0368                &theFishboneId,
0369                old,
0370                (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old))
0371       old = theFishboneId;
0372   }
0373   __device__ __forceinline__ auto fishboneId() const { return theFishboneId; }
0374   __device__ __forceinline__ bool hasFishbone() const { return theFishboneId != invalidHitId; }
0375 
0376 private:
0377   CellNeighbors* theOuterNeighbors;
0378   CellTracks* theTracks;
0379 
0380   int16_t theLayerPairId_;
0381   uint16_t theStatus_;  // tbd
0382 
0383   float theInnerZ;
0384   float theInnerR;
0385   hindex_type theInnerHitId;
0386   hindex_type theOuterHitId;
0387   hindex_type theFishboneId;
0388 };
0389 
0390 #endif  // RecoPixelVertexing_PixelTriplets_plugins_GPUCACellT_h