File indexing completed on 2024-04-06 12:28:34
0001 #ifndef RecoTracker_PixelSeeding_plugins_GPUCACell_h
0002 #define RecoTracker_PixelSeeding_plugins_GPUCACell_h
0003
0004
0005
0006
0007
0008
0009
0010 #include <cuda_runtime.h>
0011
0012 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0016 #include "RecoTracker/PixelSeeding/interface/CircleEq.h"
0017 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0018 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0019 #include "CAStructures.h"
0020
0021 template <typename TrackerTraits>
0022 class GPUCACellT {
0023 public:
0024 using PtrAsInt = unsigned long long;
0025
0026 static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit;
0027 using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT<TrackerTraits>;
0028 using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0029 using CellNeighbors = caStructures::CellNeighborsT<TrackerTraits>;
0030 using CellTracks = caStructures::CellTracksT<TrackerTraits>;
0031 using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0032 using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0033
0034 using HitsConstView = TrackingRecHitSoAConstView<TrackerTraits>;
0035 using hindex_type = typename TrackerTraits::hindex_type;
0036 using tindex_type = typename TrackerTraits::tindex_type;
0037 static constexpr auto invalidHitId = std::numeric_limits<hindex_type>::max();
0038
0039 using TmpTuple = cms::cuda::VecArray<uint32_t, TrackerTraits::maxDepth>;
0040
0041 using HitContainer = typename TrackSoA<TrackerTraits>::HitContainer;
0042 using Quality = pixelTrack::Quality;
0043 static constexpr auto bad = pixelTrack::Quality::bad;
0044
0045 enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
0046
0047 GPUCACellT() = default;
0048
0049 __device__ __forceinline__ void init(CellNeighborsVector& cellNeighbors,
0050 CellTracksVector& cellTracks,
0051 const HitsConstView& hh,
0052 int layerPairId,
0053 hindex_type innerHitId,
0054 hindex_type outerHitId) {
0055 theInnerHitId = innerHitId;
0056 theOuterHitId = outerHitId;
0057 theLayerPairId_ = layerPairId;
0058 theStatus_ = 0;
0059 theFishboneId = invalidHitId;
0060
0061
0062 theInnerZ = hh[innerHitId].zGlobal();
0063 theInnerR = hh[innerHitId].rGlobal();
0064
0065
0066 theOuterNeighbors = &cellNeighbors[0];
0067 theTracks = &cellTracks[0];
0068 assert(outerNeighbors().empty());
0069 assert(tracks().empty());
0070 }
0071
0072 __device__ __forceinline__ int addOuterNeighbor(typename TrackerTraits::cindex_type t,
0073 CellNeighborsVector& cellNeighbors) {
0074
0075 if (outerNeighbors().empty()) {
0076 auto i = cellNeighbors.extend();
0077 if (i > 0) {
0078 cellNeighbors[i].reset();
0079 __threadfence();
0080 #ifdef __CUDACC__
0081 auto zero = (PtrAsInt)(&cellNeighbors[0]);
0082 atomicCAS((PtrAsInt*)(&theOuterNeighbors),
0083 zero,
0084 (PtrAsInt)(&cellNeighbors[i]));
0085 #else
0086 theOuterNeighbors = &cellNeighbors[i];
0087 #endif
0088 } else
0089 return -1;
0090 }
0091 __threadfence();
0092 return outerNeighbors().push_back(t);
0093 }
0094
0095 __device__ __forceinline__ int addTrack(tindex_type t, CellTracksVector& cellTracks) {
0096 if (tracks().empty()) {
0097 auto i = cellTracks.extend();
0098 if (i > 0) {
0099 cellTracks[i].reset();
0100 __threadfence();
0101 #ifdef __CUDACC__
0102 auto zero = (PtrAsInt)(&cellTracks[0]);
0103 atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i]));
0104 #else
0105 theTracks = &cellTracks[i];
0106 #endif
0107 } else
0108 return -1;
0109 }
0110 __threadfence();
0111 return tracks().push_back(t);
0112 }
0113
0114 __device__ __forceinline__ CellTracks& tracks() { return *theTracks; }
0115 __device__ __forceinline__ CellTracks const& tracks() const { return *theTracks; }
0116 __device__ __forceinline__ CellNeighbors& outerNeighbors() { return *theOuterNeighbors; }
0117 __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
0118 __device__ __forceinline__ float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); }
0119 __device__ __forceinline__ float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); }
0120 __device__ __forceinline__ float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); }
0121 __device__ __forceinline__ float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); }
0122 __device__ __forceinline__ float inner_z(const HitsConstView& hh) const { return theInnerZ; }
0123
0124 __device__ __forceinline__ float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); }
0125 __device__ __forceinline__ float inner_r(const HitsConstView& hh) const { return theInnerR; }
0126
0127 __device__ __forceinline__ float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); }
0128
0129 __device__ __forceinline__ auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); }
0130 __device__ __forceinline__ auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); }
0131
0132 __device__ __forceinline__ float inner_detIndex(const HitsConstView& hh) const {
0133 return hh[theInnerHitId].detectorIndex();
0134 }
0135 __device__ __forceinline__ float outer_detIndex(const HitsConstView& hh) const {
0136 return hh[theOuterHitId].detectorIndex();
0137 }
0138
0139 constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
0140 constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
0141
0142 __device__ void print_cell() const {
0143 printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
0144 theLayerPairId_,
0145 theInnerHitId,
0146 theOuterHitId);
0147 }
0148
0149 __device__ bool check_alignment(const HitsConstView& hh,
0150 GPUCACellT const& otherCell,
0151 const float ptmin,
0152 const float hardCurvCut,
0153 const float caThetaCutBarrel,
0154 const float caThetaCutForward,
0155 const float dcaCutInnerTriplet,
0156 const float dcaCutOuterTriplet) const {
0157
0158
0159
0160 auto ri = inner_r(hh);
0161 auto zi = inner_z(hh);
0162
0163 auto ro = outer_r(hh);
0164 auto zo = outer_z(hh);
0165
0166 auto r1 = otherCell.inner_r(hh);
0167 auto z1 = otherCell.inner_z(hh);
0168 auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex;
0169 bool aligned = areAlignedRZ(r1,
0170 z1,
0171 ri,
0172 zi,
0173 ro,
0174 zo,
0175 ptmin,
0176 isBarrel ? caThetaCutBarrel : caThetaCutForward);
0177 return (aligned && dcaCut(hh,
0178 otherCell,
0179 otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet
0180 : dcaCutOuterTriplet,
0181 hardCurvCut));
0182 }
0183
0184 __device__ __forceinline__ static bool areAlignedRZ(
0185 float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
0186 float radius_diff = std::abs(r1 - ro);
0187 float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
0188
0189 float pMin = ptmin * std::sqrt(distance_13_squared);
0190
0191
0192 float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
0193 return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff;
0194 }
0195
0196 __device__ inline bool dcaCut(const HitsConstView& hh,
0197 GPUCACellT const& otherCell,
0198 const float region_origin_radius_plus_tolerance,
0199 const float maxCurv) const {
0200 auto x1 = otherCell.inner_x(hh);
0201 auto y1 = otherCell.inner_y(hh);
0202
0203 auto x2 = inner_x(hh);
0204 auto y2 = inner_y(hh);
0205
0206 auto x3 = outer_x(hh);
0207 auto y3 = outer_y(hh);
0208
0209 CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
0210
0211 if (eq.curvature() > maxCurv)
0212 return false;
0213
0214 return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
0215 }
0216
0217 __device__ __forceinline__ static bool dcaCutH(float x1,
0218 float y1,
0219 float x2,
0220 float y2,
0221 float x3,
0222 float y3,
0223 const float region_origin_radius_plus_tolerance,
0224 const float maxCurv) {
0225 CircleEq<float> eq(x1, y1, x2, y2, x3, y3);
0226
0227 if (eq.curvature() > maxCurv)
0228 return false;
0229
0230 return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
0231 }
0232
0233 __device__ inline bool hole0(const HitsConstView& hh, GPUCACellT const& innerCell) const {
0234 using namespace phase1PixelTopology;
0235
0236 int p = innerCell.inner_iphi(hh);
0237 if (p < 0)
0238 p += std::numeric_limits<unsigned short>::max();
0239 p = (max_ladder_bpx0 * p) / std::numeric_limits<unsigned short>::max();
0240 p %= max_ladder_bpx0;
0241 auto il = first_ladder_bpx0 + p;
0242 auto r0 = hh.averageGeometry().ladderR[il];
0243 auto ri = innerCell.inner_r(hh);
0244 auto zi = innerCell.inner_z(hh);
0245 auto ro = outer_r(hh);
0246 auto zo = outer_z(hh);
0247 auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
0248 auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
0249 auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
0250 auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
0251 return gap;
0252 }
0253
0254 __device__ inline bool hole4(const HitsConstView& hh, GPUCACellT const& innerCell) const {
0255 using namespace phase1PixelTopology;
0256
0257 int p = outer_iphi(hh);
0258 if (p < 0)
0259 p += std::numeric_limits<unsigned short>::max();
0260 p = (max_ladder_bpx4 * p) / std::numeric_limits<unsigned short>::max();
0261 p %= max_ladder_bpx4;
0262 auto il = first_ladder_bpx4 + p;
0263 auto r4 = hh.averageGeometry().ladderR[il];
0264 auto ri = innerCell.inner_r(hh);
0265 auto zi = innerCell.inner_z(hh);
0266 auto ro = outer_r(hh);
0267 auto zo = outer_z(hh);
0268 auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
0269 auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
0270 auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
0271 auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
0272 auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
0273 auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
0274 return gap || holeP || holeN;
0275 }
0276
0277
0278
0279
0280 template <int DEPTH>
0281 __device__ inline void find_ntuplets(const HitsConstView& hh,
0282 GPUCACellT* __restrict__ cells,
0283 CellTracksVector& cellTracks,
0284 HitContainer& foundNtuplets,
0285 cms::cuda::AtomicPairCounter& apc,
0286 Quality* __restrict__ quality,
0287 TmpTuple& tmpNtuplet,
0288 const unsigned int minHitsPerNtuplet,
0289 bool startAt0) const {
0290
0291
0292
0293
0294
0295
0296 if constexpr (DEPTH <= 0) {
0297 printf("ERROR: GPUCACellT::find_ntuplets reached full depth!\n");
0298 #ifdef __CUDA_ARCH__
0299 __trap();
0300 #else
0301 abort();
0302 #endif
0303 } else {
0304 auto doubletId = this - cells;
0305 tmpNtuplet.push_back_unsafe(doubletId);
0306 assert(tmpNtuplet.size() <=
0307 int(TrackerTraits::maxHitsOnTrack -
0308 3));
0309
0310 bool last = true;
0311 for (unsigned int otherCell : outerNeighbors()) {
0312 if (cells[otherCell].isKilled())
0313 continue;
0314 last = false;
0315 cells[otherCell].template find_ntuplets<DEPTH - 1>(
0316 hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
0317 }
0318
0319 if (last) {
0320 if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
0321 #ifdef ONLY_TRIPLETS_IN_HOLE
0322
0323 if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
0324 ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
0325 #endif
0326 {
0327 hindex_type hits[TrackerTraits::maxDepth + 2];
0328 auto nh = 0U;
0329 constexpr int maxFB = 2;
0330 int nfb = 0;
0331 for (auto c : tmpNtuplet) {
0332 hits[nh++] = cells[c].theInnerHitId;
0333 if (nfb < maxFB && cells[c].hasFishbone()) {
0334 ++nfb;
0335 hits[nh++] = cells[c].theFishboneId;
0336 }
0337 }
0338 assert(nh < TrackerTraits::maxHitsOnTrack);
0339 hits[nh] = theOuterHitId;
0340 auto it = foundNtuplets.bulkFill(apc, hits, nh + 1);
0341 if (it >= 0) {
0342 for (auto c : tmpNtuplet)
0343 cells[c].addTrack(it, cellTracks);
0344 quality[it] = bad;
0345 }
0346 }
0347 }
0348 }
0349 tmpNtuplet.pop_back();
0350 assert(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1));
0351 }
0352 }
0353
0354
0355 __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
0356 __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
0357
0358 __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; }
0359
0360 __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
0361 __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
0362
0363 __device__ __forceinline__ void setFishbone(hindex_type id, float z, const HitsConstView& hh) {
0364
0365 auto old = theFishboneId;
0366 while (old !=
0367 atomicCAS(
0368 &theFishboneId,
0369 old,
0370 (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old))
0371 old = theFishboneId;
0372 }
0373 __device__ __forceinline__ auto fishboneId() const { return theFishboneId; }
0374 __device__ __forceinline__ bool hasFishbone() const { return theFishboneId != invalidHitId; }
0375
0376 private:
0377 CellNeighbors* theOuterNeighbors;
0378 CellTracks* theTracks;
0379
0380 int16_t theLayerPairId_;
0381 uint16_t theStatus_;
0382
0383 float theInnerZ;
0384 float theInnerR;
0385 hindex_type theInnerHitId;
0386 hindex_type theOuterHitId;
0387 hindex_type theFishboneId;
0388 };
0389
0390 #endif