Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-04-04 00:15:52

0001 #ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
0002 #define RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h
0003 
0004 //
0005 // Author: Felice Pantaleo, CERN
0006 //
0007 
0008 // #define ONLY_TRIPLETS_IN_HOLE
0009 
0010 #include <cuda_runtime.h>
0011 
0012 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0016 #include "RecoPixelVertexing/PixelTriplets/interface/CircleEq.h"
0017 #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
0018 #include "CAConstants.h"
0019 
0020 class GPUCACell {
0021 public:
0022   using PtrAsInt = unsigned long long;
0023 
0024   static constexpr auto maxCellsPerHit = caConstants::maxCellsPerHit;
0025   static constexpr auto invalidHitId = std::numeric_limits<caConstants::hindex_type>::max();
0026   using OuterHitOfCellContainer = caConstants::OuterHitOfCellContainer;
0027   using OuterHitOfCell = caConstants::OuterHitOfCell;
0028   using CellNeighbors = caConstants::CellNeighbors;
0029   using CellTracks = caConstants::CellTracks;
0030   using CellNeighborsVector = caConstants::CellNeighborsVector;
0031   using CellTracksVector = caConstants::CellTracksVector;
0032 
0033   using Hits = TrackingRecHit2DSOAView;
0034   using hindex_type = Hits::hindex_type;
0035 
0036   using TmpTuple = cms::cuda::VecArray<uint32_t, 6>;
0037 
0038   using HitContainer = pixelTrack::HitContainer;
0039   using Quality = pixelTrack::Quality;
0040   static constexpr auto bad = pixelTrack::Quality::bad;
0041 
0042   enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
0043 
0044   GPUCACell() = default;
0045 
0046   __device__ __forceinline__ void init(CellNeighborsVector& cellNeighbors,
0047                                        CellTracksVector& cellTracks,
0048                                        Hits const& hh,
0049                                        int layerPairId,
0050                                        hindex_type innerHitId,
0051                                        hindex_type outerHitId) {
0052     theInnerHitId = innerHitId;
0053     theOuterHitId = outerHitId;
0054     theLayerPairId_ = layerPairId;
0055     theStatus_ = 0;
0056     theFishboneId = invalidHitId;
0057 
0058     // optimization that depends on access pattern
0059     theInnerZ = hh.zGlobal(innerHitId);
0060     theInnerR = hh.rGlobal(innerHitId);
0061 
0062     // link to default empty
0063     theOuterNeighbors = &cellNeighbors[0];
0064     theTracks = &cellTracks[0];
0065     assert(outerNeighbors().empty());
0066     assert(tracks().empty());
0067   }
0068 
0069   __device__ __forceinline__ int addOuterNeighbor(CellNeighbors::value_t t, CellNeighborsVector& cellNeighbors) {
0070     // use smart cache
0071     if (outerNeighbors().empty()) {
0072       auto i = cellNeighbors.extend();  // maybe wasted....
0073       if (i > 0) {
0074         cellNeighbors[i].reset();
0075         __threadfence();
0076 #ifdef __CUDACC__
0077         auto zero = (PtrAsInt)(&cellNeighbors[0]);
0078         atomicCAS((PtrAsInt*)(&theOuterNeighbors),
0079                   zero,
0080                   (PtrAsInt)(&cellNeighbors[i]));  // if fails we cannot give "i" back...
0081 #else
0082         theOuterNeighbors = &cellNeighbors[i];
0083 #endif
0084       } else
0085         return -1;
0086     }
0087     __threadfence();
0088     return outerNeighbors().push_back(t);
0089   }
0090 
0091   __device__ __forceinline__ int addTrack(CellTracks::value_t t, CellTracksVector& cellTracks) {
0092     if (tracks().empty()) {
0093       auto i = cellTracks.extend();  // maybe wasted....
0094       if (i > 0) {
0095         cellTracks[i].reset();
0096         __threadfence();
0097 #ifdef __CUDACC__
0098         auto zero = (PtrAsInt)(&cellTracks[0]);
0099         atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i]));  // if fails we cannot give "i" back...
0100 #else
0101         theTracks = &cellTracks[i];
0102 #endif
0103       } else
0104         return -1;
0105     }
0106     __threadfence();
0107     return tracks().push_back(t);
0108   }
0109 
0110   __device__ __forceinline__ CellTracks& tracks() { return *theTracks; }
0111   __device__ __forceinline__ CellTracks const& tracks() const { return *theTracks; }
0112   __device__ __forceinline__ CellNeighbors& outerNeighbors() { return *theOuterNeighbors; }
0113   __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
0114   __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); }
0115   __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); }
0116   __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); }
0117   __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); }
0118   __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; }
0119   // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; }
0120   __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); }
0121   __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; }
0122   // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; }
0123   __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); }
0124 
0125   __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); }
0126   __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); }
0127 
0128   __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); }
0129   __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); }
0130 
0131   constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
0132   constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
0133 
0134   __device__ void print_cell() const {
0135     printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
0136            theLayerPairId_,
0137            theInnerHitId,
0138            theOuterHitId);
0139   }
0140 
0141   __device__ bool check_alignment(Hits const& hh,
0142                                   GPUCACell const& otherCell,
0143                                   const float ptmin,
0144                                   const float hardCurvCut,
0145                                   const float caThetaCutBarrel,
0146                                   const float caThetaCutForward,
0147                                   const float dcaCutInnerTriplet,
0148                                   const float dcaCutOuterTriplet) const {
0149     // detIndex of the layerStart for the Phase1 Pixel Detector:
0150     // [BPX1, BPX2, BPX3, BPX4,  FP1,  FP2,  FP3,  FN1,  FN2,  FN3, LAST_VALID]
0151     // [   0,   96,  320,  672, 1184, 1296, 1408, 1520, 1632, 1744,       1856]
0152     auto ri = inner_r(hh);
0153     auto zi = inner_z(hh);
0154 
0155     auto ro = outer_r(hh);
0156     auto zo = outer_z(hh);
0157 
0158     auto r1 = otherCell.inner_r(hh);
0159     auto z1 = otherCell.inner_z(hh);
0160     auto isBarrel = otherCell.outer_detIndex(hh) < caConstants::last_barrel_detIndex;
0161     bool aligned = areAlignedRZ(r1,
0162                                 z1,
0163                                 ri,
0164                                 zi,
0165                                 ro,
0166                                 zo,
0167                                 ptmin,
0168                                 isBarrel ? caThetaCutBarrel : caThetaCutForward);  // 2.f*thetaCut); // FIXME tune cuts
0169     return (aligned && dcaCut(hh,
0170                               otherCell,
0171                               otherCell.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet
0172                                                                                               : dcaCutOuterTriplet,
0173                               hardCurvCut));  // FIXME tune cuts
0174   }
0175 
0176   __device__ __forceinline__ static bool areAlignedRZ(
0177       float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
0178     float radius_diff = std::abs(r1 - ro);
0179     float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
0180 
0181     float pMin = ptmin * std::sqrt(distance_13_squared);  // this needs to be divided by
0182                                                           // radius_diff later
0183 
0184     float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
0185     return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff;
0186   }
0187 
0188   __device__ inline bool dcaCut(Hits const& hh,
0189                                 GPUCACell const& otherCell,
0190                                 const float region_origin_radius_plus_tolerance,
0191                                 const float maxCurv) const {
0192     auto x1 = otherCell.inner_x(hh);
0193     auto y1 = otherCell.inner_y(hh);
0194 
0195     auto x2 = inner_x(hh);
0196     auto y2 = inner_y(hh);
0197 
0198     auto x3 = outer_x(hh);
0199     auto y3 = outer_y(hh);
0200 
0201     CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
0202 
0203     if (eq.curvature() > maxCurv)
0204       return false;
0205 
0206     return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
0207   }
0208 
0209   __device__ __forceinline__ static bool dcaCutH(float x1,
0210                                                  float y1,
0211                                                  float x2,
0212                                                  float y2,
0213                                                  float x3,
0214                                                  float y3,
0215                                                  const float region_origin_radius_plus_tolerance,
0216                                                  const float maxCurv) {
0217     CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
0218 
0219     if (eq.curvature() > maxCurv)
0220       return false;
0221 
0222     return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
0223   }
0224 
0225   __device__ inline bool hole0(Hits const& hh, GPUCACell const& innerCell) const {
0226     using caConstants::first_ladder_bpx0;
0227     using caConstants::max_ladder_bpx0;
0228     using caConstants::module_length_bpx0;
0229     using caConstants::module_tolerance_bpx0;
0230     int p = innerCell.inner_iphi(hh);
0231     if (p < 0)
0232       p += std::numeric_limits<unsigned short>::max();
0233     p = (max_ladder_bpx0 * p) / std::numeric_limits<unsigned short>::max();
0234     p %= max_ladder_bpx0;
0235     auto il = first_ladder_bpx0 + p;
0236     auto r0 = hh.averageGeometry().ladderR[il];
0237     auto ri = innerCell.inner_r(hh);
0238     auto zi = innerCell.inner_z(hh);
0239     auto ro = outer_r(hh);
0240     auto zo = outer_z(hh);
0241     auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
0242     auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
0243     auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
0244     auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
0245     return gap;
0246   }
0247 
0248   __device__ inline bool hole4(Hits const& hh, GPUCACell const& innerCell) const {
0249     using caConstants::first_ladder_bpx4;
0250     using caConstants::max_ladder_bpx4;
0251     using caConstants::module_length_bpx4;
0252     using caConstants::module_tolerance_bpx4;
0253     int p = outer_iphi(hh);
0254     if (p < 0)
0255       p += std::numeric_limits<unsigned short>::max();
0256     p = (max_ladder_bpx4 * p) / std::numeric_limits<unsigned short>::max();
0257     p %= max_ladder_bpx4;
0258     auto il = first_ladder_bpx4 + p;
0259     auto r4 = hh.averageGeometry().ladderR[il];
0260     auto ri = innerCell.inner_r(hh);
0261     auto zi = innerCell.inner_z(hh);
0262     auto ro = outer_r(hh);
0263     auto zo = outer_z(hh);
0264     auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
0265     auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
0266     auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
0267     auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
0268     auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
0269     auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
0270     return gap || holeP || holeN;
0271   }
0272 
0273   // trying to free the track building process from hardcoded layers, leaving
0274   // the visit of the graph based on the neighborhood connections between cells.
0275   template <int DEPTH>
0276   __device__ inline void find_ntuplets(Hits const& hh,
0277                                        GPUCACell* __restrict__ cells,
0278                                        CellTracksVector& cellTracks,
0279                                        HitContainer& foundNtuplets,
0280                                        cms::cuda::AtomicPairCounter& apc,
0281                                        Quality* __restrict__ quality,
0282                                        TmpTuple& tmpNtuplet,
0283                                        const unsigned int minHitsPerNtuplet,
0284                                        bool startAt0) const {
0285     // the building process for a track ends if:
0286     // it has no right neighbor
0287     // it has no compatible neighbor
0288     // the ntuplets is then saved if the number of hits it contains is greater
0289     // than a threshold
0290 
0291     auto doubletId = this - cells;
0292     tmpNtuplet.push_back_unsafe(doubletId);
0293     assert(tmpNtuplet.size() <= 4);
0294 
0295     bool last = true;
0296     for (unsigned int otherCell : outerNeighbors()) {
0297       if (cells[otherCell].isKilled())
0298         continue;  // killed by earlyFishbone
0299       last = false;
0300       cells[otherCell].find_ntuplets<DEPTH - 1>(
0301           hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
0302     }
0303     if (last) {  // if long enough save...
0304       if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
0305 #ifdef ONLY_TRIPLETS_IN_HOLE
0306         // triplets accepted only pointing to the hole
0307         if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
0308             ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
0309 #endif
0310         {
0311           hindex_type hits[8];
0312           auto nh = 0U;
0313           constexpr int maxFB = 2;  // for the time being let's limit this
0314           int nfb = 0;
0315           for (auto c : tmpNtuplet) {
0316             hits[nh++] = cells[c].theInnerHitId;
0317             if (nfb < maxFB && cells[c].hasFishbone()) {
0318               ++nfb;
0319               hits[nh++] = cells[c].theFishboneId;  // fishbone hit is always outer than inner hit
0320             }
0321           }
0322           assert(nh < caConstants::maxHitsOnTrack);
0323           hits[nh] = theOuterHitId;
0324           auto it = foundNtuplets.bulkFill(apc, hits, nh + 1);
0325           if (it >= 0) {  // if negative is overflow....
0326             for (auto c : tmpNtuplet)
0327               cells[c].addTrack(it, cellTracks);
0328             quality[it] = bad;  // initialize to bad
0329           }
0330         }
0331       }
0332     }
0333     tmpNtuplet.pop_back();
0334     assert(tmpNtuplet.size() < 4);
0335   }
0336 
0337   // Cell status management
0338   __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
0339   __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
0340 
0341   __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
0342 
0343   __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
0344   __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
0345 
0346   __device__ __forceinline__ void setFishbone(hindex_type id, float z, Hits const& hh) {
0347     // make it deterministic: use the farther apart (in z)
0348     auto old = theFishboneId;
0349     while (
0350         old !=
0351         atomicCAS(&theFishboneId,
0352                   old,
0353                   (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh.zGlobal(old) - theInnerZ)) ? id : old))
0354       old = theFishboneId;
0355   }
0356   __device__ __forceinline__ auto fishboneId() const { return theFishboneId; }
0357   __device__ __forceinline__ bool hasFishbone() const { return theFishboneId != invalidHitId; }
0358 
0359 private:
0360   CellNeighbors* theOuterNeighbors;
0361   CellTracks* theTracks;
0362 
0363   int16_t theLayerPairId_;
0364   uint16_t theStatus_;  // tbd
0365 
0366   float theInnerZ;
0367   float theInnerR;
0368   hindex_type theInnerHitId;
0369   hindex_type theOuterHitId;
0370   hindex_type theFishboneId;
0371 };
0372 
0373 template <>
0374 __device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh,
0375                                                    GPUCACell* __restrict__ cells,
0376                                                    CellTracksVector& cellTracks,
0377                                                    HitContainer& foundNtuplets,
0378                                                    cms::cuda::AtomicPairCounter& apc,
0379                                                    Quality* __restrict__ quality,
0380                                                    TmpTuple& tmpNtuplet,
0381                                                    const unsigned int minHitsPerNtuplet,
0382                                                    bool startAt0) const {
0383   printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n");
0384 #ifdef __CUDA_ARCH__
0385   __trap();
0386 #else
0387   abort();
0388 #endif
0389 }
0390 
0391 #endif  // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h