File indexing completed on 2023-03-31 23:02:15
0001
0002
0003
0004
0005
0006
0007
0008 #include <cmath>
0009 #include <cstdint>
0010 #include <limits>
0011
0012 #include <cuda_runtime.h>
0013
0014 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0016 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0017
0018 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0019 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0020
0021 #include "CAStructures.h"
0022 #include "CAHitNtupletGeneratorKernels.h"
0023 #include "GPUCACell.h"
0024 #include "gpuFishbone.h"
0025 #include "gpuPixelDoublets.h"
0026
0027 namespace caHitNtupletGeneratorKernels {
0028
0029 constexpr uint32_t tkNotFound = std::numeric_limits<uint16_t>::max();
0030 constexpr float maxScore = std::numeric_limits<float>::max();
0031 constexpr float nSigma2 = 25.f;
0032
0033
0034
0035 template <typename TrackerTraits>
0036 using HitToTuple = caStructures::HitToTupleT<TrackerTraits>;
0037
0038 template <typename TrackerTraits>
0039 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0040
0041 template <typename TrackerTraits>
0042 using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0043
0044 template <typename TrackerTraits>
0045 using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0046
0047 template <typename TrackerTraits>
0048 using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0049
0050 using Quality = pixelTrack::Quality;
0051
0052 template <typename TrackerTraits>
0053 using TkSoAView = TrackSoAView<TrackerTraits>;
0054
0055 template <typename TrackerTraits>
0056 using HitContainer = typename TrackSoA<TrackerTraits>::HitContainer;
0057
0058 template <typename TrackerTraits>
0059 using HitsConstView = typename GPUCACellT<TrackerTraits>::HitsConstView;
0060
0061 template <typename TrackerTraits>
0062 using QualityCuts = pixelTrack::QualityCutsT<TrackerTraits>;
0063
0064 template <typename TrackerTraits>
0065 using CAParams = caHitNtupletGenerator::CAParamsT<TrackerTraits>;
0066
0067 using Counters = caHitNtupletGenerator::Counters;
0068
0069 template <typename TrackerTraits>
0070 __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
0071 TupleMultiplicity<TrackerTraits> const *tupleMultiplicity,
0072 HitToTuple<TrackerTraits> const *hitToTuple,
0073 cms::cuda::AtomicPairCounter *apc,
0074 GPUCACellT<TrackerTraits> const *__restrict__ cells,
0075 uint32_t const *__restrict__ nCells,
0076 CellNeighborsVector<TrackerTraits> const *cellNeighbors,
0077 CellTracksVector<TrackerTraits> const *cellTracks,
0078 OuterHitOfCell<TrackerTraits> const isOuterHitOfCell,
0079 int32_t nHits,
0080 uint32_t maxNumberOfDoublets,
0081 Counters *counters) {
0082 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0083
0084 auto &c = *counters;
0085
0086 if (0 == first) {
0087 atomicAdd(&c.nEvents, 1);
0088 atomicAdd(&c.nHits, nHits);
0089 atomicAdd(&c.nCells, *nCells);
0090 atomicAdd(&c.nTuples, apc->get().m);
0091 atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
0092 }
0093
0094 #ifdef NTUPLE_DEBUG
0095 if (0 == first) {
0096 printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
0097 *nCells,
0098 apc->get().m,
0099 apc->get().n,
0100 nHits,
0101 hitToTuple->totOnes());
0102 if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
0103 assert(tracks_view.hitIndices().size(apc->get().m) == 0);
0104 assert(tracks_view.hitIndices().size() == apc->get().n);
0105 }
0106 }
0107
0108 for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
0109 if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack)
0110 printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
0111 assert(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
0112 for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
0113 assert(int(*ih) < nHits);
0114 }
0115 #endif
0116
0117 if (0 == first) {
0118 if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
0119 printf("Tuples overflow\n");
0120 if (*nCells >= maxNumberOfDoublets)
0121 printf("Cells overflow\n");
0122 if (cellNeighbors && cellNeighbors->full())
0123 printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
0124 if (cellTracks && cellTracks->full())
0125 printf("cellTracks overflow\n");
0126 if (int(hitToTuple->nOnes()) < nHits)
0127 printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
0128 #ifdef GPU_DEBUG
0129 printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
0130 cellNeighbors->size(),
0131 cellTracks->size(),
0132 hitToTuple->size());
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144 #endif
0145 }
0146
0147 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0148 auto const &thisCell = cells[idx];
0149 if (thisCell.hasFishbone() && !thisCell.isKilled())
0150 atomicAdd(&c.nFishCells, 1);
0151 if (thisCell.outerNeighbors().full())
0152 printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
0153 if (thisCell.tracks().full())
0154 printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
0155 if (thisCell.isKilled())
0156 atomicAdd(&c.nKilledCells, 1);
0157 if (!thisCell.unused())
0158 atomicAdd(&c.nEmptyCells, 1);
0159 if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
0160 atomicAdd(&c.nZeroTrackCells, 1);
0161 }
0162
0163 for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
0164 if (isOuterHitOfCell.container[idx].full())
0165 printf("OuterHitOfCell overflow %d\n", idx);
0166 }
0167 }
0168
0169 template <typename TrackerTraits>
0170 __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
0171 uint32_t const *__restrict__ nCells,
0172 TkSoAView<TrackerTraits> tracks_view) {
0173 constexpr auto reject = pixelTrack::Quality::dup;
0174
0175 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0176 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0177 auto const &thisCell = cells[idx];
0178 if (!thisCell.isKilled())
0179 continue;
0180
0181 for (auto it : thisCell.tracks())
0182 tracks_view[it].quality() = reject;
0183 }
0184 }
0185
0186
0187
0188 template <typename TrackerTraits>
0189 __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
0190 uint32_t const *__restrict__ nCells,
0191 TkSoAView<TrackerTraits> tracks_view,
0192 bool dupPassThrough) {
0193
0194 constexpr auto reject = pixelTrack::Quality::edup;
0195
0196 assert(nCells);
0197 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0198 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0199 auto const &thisCell = cells[idx];
0200
0201 if (thisCell.tracks().size() < 2)
0202 continue;
0203
0204 int8_t maxNl = 0;
0205
0206
0207 for (auto it : thisCell.tracks()) {
0208 auto nl = tracks_view[it].nLayers();
0209 maxNl = std::max(nl, maxNl);
0210 }
0211
0212
0213
0214
0215
0216 for (auto it : thisCell.tracks()) {
0217 if (tracks_view[it].nLayers() < maxNl)
0218 tracks_view[it].quality() = reject;
0219 }
0220 }
0221 }
0222
0223
0224 template <typename TrackerTraits>
0225 __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
0226 uint32_t const *__restrict__ nCells,
0227 TkSoAView<TrackerTraits> tracks_view,
0228 bool dupPassThrough) {
0229
0230 auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0231 constexpr auto loose = pixelTrack::Quality::loose;
0232
0233 assert(nCells);
0234
0235 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0236 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0237 auto const &thisCell = cells[idx];
0238 if (thisCell.tracks().size() < 2)
0239 continue;
0240
0241 float mc = maxScore;
0242 uint16_t im = tkNotFound;
0243
0244 auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
0245
0246
0247
0248 int ntr = thisCell.tracks().size();
0249 for (int i = 0; i < ntr - 1; ++i) {
0250 auto it = thisCell.tracks()[i];
0251 auto qi = tracks_view[it].quality();
0252 if (qi <= reject)
0253 continue;
0254 auto opi = tracks_view[it].state()(2);
0255 auto e2opi = tracks_view[it].covariance()(9);
0256 auto cti = tracks_view[it].state()(3);
0257 auto e2cti = tracks_view[it].covariance()(12);
0258 for (auto j = i + 1; j < ntr; ++j) {
0259 auto jt = thisCell.tracks()[j];
0260 auto qj = tracks_view[jt].quality();
0261 if (qj <= reject)
0262 continue;
0263 auto opj = tracks_view[jt].state()(2);
0264 auto ctj = tracks_view[jt].state()(3);
0265 auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0266 if ((cti - ctj) * (cti - ctj) > dct)
0267 continue;
0268 auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0269 if ((opi - opj) * (opi - opj) > dop)
0270 continue;
0271 if ((qj < qi) || (qj == qi && score(it) < score(jt)))
0272 tracks_view[jt].quality() = reject;
0273 else {
0274 tracks_view[it].quality() = reject;
0275 break;
0276 }
0277 }
0278 }
0279
0280
0281 auto maxQual = reject;
0282 for (auto it : thisCell.tracks()) {
0283 if (tracks_view[it].quality() > maxQual)
0284 maxQual = tracks_view[it].quality();
0285 }
0286
0287 if (maxQual <= loose)
0288 continue;
0289
0290
0291 for (auto it : thisCell.tracks()) {
0292 if (tracks_view[it].quality() == maxQual && score(it) < mc) {
0293 mc = score(it);
0294 im = it;
0295 }
0296 }
0297
0298 if (tkNotFound == im)
0299 continue;
0300
0301
0302 for (auto it : thisCell.tracks()) {
0303 if (tracks_view[it].quality() > loose && it != im)
0304 tracks_view[it].quality() = loose;
0305 }
0306 }
0307 }
0308
0309 template <typename TrackerTraits>
0310 __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
0311 cms::cuda::AtomicPairCounter *apc2,
0312 HitsConstView<TrackerTraits> hh,
0313 GPUCACellT<TrackerTraits> *cells,
0314 uint32_t const *__restrict__ nCells,
0315 CellNeighborsVector<TrackerTraits> *cellNeighbors,
0316 OuterHitOfCell<TrackerTraits> const isOuterHitOfCell,
0317 CAParams<TrackerTraits> params) {
0318 using Cell = GPUCACellT<TrackerTraits>;
0319
0320 auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
0321 auto first = threadIdx.x;
0322 auto stride = blockDim.x;
0323
0324 if (0 == (firstCellIndex + first)) {
0325 (*apc1) = 0;
0326 (*apc2) = 0;
0327 }
0328
0329 constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
0330 constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
0331 for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
0332 auto cellIndex = idx;
0333 auto &thisCell = cells[idx];
0334 auto innerHitId = thisCell.inner_hit_id();
0335 if (int(innerHitId) < isOuterHitOfCell.offset)
0336 continue;
0337 int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
0338 auto vi = isOuterHitOfCell[innerHitId].data();
0339
0340 auto ri = thisCell.inner_r(hh);
0341 auto zi = thisCell.inner_z(hh);
0342
0343 auto ro = thisCell.outer_r(hh);
0344 auto zo = thisCell.outer_z(hh);
0345 auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
0346
0347 for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
0348 auto otherCell = __ldg(vi + j);
0349 auto &oc = cells[otherCell];
0350 auto r1 = oc.inner_r(hh);
0351 auto z1 = oc.inner_z(hh);
0352 bool aligned = Cell::areAlignedRZ(
0353 r1,
0354 z1,
0355 ri,
0356 zi,
0357 ro,
0358 zo,
0359 params.ptmin_,
0360 isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_);
0361 if (aligned && thisCell.dcaCut(hh,
0362 oc,
0363 oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
0364 : params.dcaCutOuterTriplet_,
0365 params.hardCurvCut_)) {
0366 oc.addOuterNeighbor(cellIndex, *cellNeighbors);
0367 thisCell.setStatusBits(Cell::StatusBit::kUsed);
0368 oc.setStatusBits(Cell::StatusBit::kUsed);
0369 }
0370 }
0371 }
0372 }
0373
0374 template <typename TrackerTraits>
0375 __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
0376 TkSoAView<TrackerTraits> tracks_view,
0377 GPUCACellT<TrackerTraits> *__restrict__ cells,
0378 uint32_t const *nCells,
0379 CellTracksVector<TrackerTraits> *cellTracks,
0380 cms::cuda::AtomicPairCounter *apc,
0381 CAParams<TrackerTraits> params) {
0382
0383
0384 using Cell = GPUCACellT<TrackerTraits>;
0385
0386 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0387
0388 #ifdef GPU_DEBUG
0389 if (first == 0)
0390 printf("starting producing ntuplets from %d cells \n", *nCells);
0391 #endif
0392 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0393 auto const &thisCell = cells[idx];
0394
0395 if (thisCell.isKilled())
0396 continue;
0397
0398
0399 if (thisCell.outerNeighbors().empty())
0400 continue;
0401
0402 auto pid = thisCell.layerPairId();
0403 bool doit = params.startingLayerPair(pid);
0404
0405 constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
0406 if (doit) {
0407 typename Cell::TmpTuple stack;
0408 stack.reset();
0409
0410 bool bpix1Start = params.startAt0(pid);
0411
0412 thisCell.template find_ntuplets<maxDepth>(hh,
0413 cells,
0414 *cellTracks,
0415 tracks_view.hitIndices(),
0416 *apc,
0417 tracks_view.quality(),
0418 stack,
0419 params.minHitsPerNtuplet_,
0420 bpix1Start);
0421
0422 assert(stack.empty());
0423 }
0424 }
0425 }
0426 template <typename TrackerTraits>
0427 __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
0428 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0429 using Cell = GPUCACellT<TrackerTraits>;
0430 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0431 auto &thisCell = cells[idx];
0432 if (!thisCell.tracks().empty())
0433 thisCell.setStatusBits(Cell::StatusBit::kInTrack);
0434 }
0435 }
0436
0437 template <typename TrackerTraits>
0438 __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
0439 TupleMultiplicity<TrackerTraits> *tupleMultiplicity) {
0440 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0441 for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0442 auto nhits = tracks_view.hitIndices().size(it);
0443 if (nhits < 3)
0444 continue;
0445 if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0446 continue;
0447 assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0448 if (nhits > TrackerTraits::maxHitsOnTrack)
0449 printf("wrong mult %d %d\n", it, nhits);
0450 assert(nhits <= TrackerTraits::maxHitsOnTrack);
0451 tupleMultiplicity->count(nhits);
0452 }
0453 }
0454
0455 template <typename TrackerTraits>
0456 __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
0457 TupleMultiplicity<TrackerTraits> *tupleMultiplicity) {
0458 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0459 for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0460 auto nhits = tracks_view.hitIndices().size(it);
0461 if (nhits < 3)
0462 continue;
0463 if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0464 continue;
0465 assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0466 if (nhits > TrackerTraits::maxHitsOnTrack)
0467 printf("wrong mult %d %d\n", it, nhits);
0468 assert(nhits <= TrackerTraits::maxHitsOnTrack);
0469 tupleMultiplicity->fill(nhits, it);
0470 }
0471 }
0472
0473
0474 template <typename TrackerTraits>
0475 __global__ void kernel_classifyTracks(TkSoAView<TrackerTraits> tracks_view, QualityCuts<TrackerTraits> cuts) {
0476
0477 int first = blockDim.x * blockIdx.x + threadIdx.x;
0478 for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0479 auto nhits = tracks_view.hitIndices().size(it);
0480 if (nhits == 0)
0481 break;
0482
0483
0484 if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0485 continue;
0486
0487 assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0488
0489
0490 if (nhits < 3)
0491 continue;
0492
0493
0494 bool isNaN = false;
0495 for (int i = 0; i < 5; ++i) {
0496 isNaN |= std::isnan(tracks_view[it].state()(i));
0497 }
0498 if (isNaN) {
0499 #ifdef NTUPLE_DEBUG
0500 printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
0501 #endif
0502 continue;
0503 }
0504
0505 tracks_view[it].quality() = pixelTrack::Quality::strict;
0506
0507 if (cuts.strictCut(tracks_view, it))
0508 continue;
0509
0510 tracks_view[it].quality() = pixelTrack::Quality::tight;
0511
0512 if (cuts.isHP(tracks_view, nhits, it))
0513 tracks_view[it].quality() = pixelTrack::Quality::highPurity;
0514 }
0515 }
0516
0517 template <typename TrackerTraits>
0518 __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
0519 int first = blockDim.x * blockIdx.x + threadIdx.x;
0520 for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0521 if (tracks_view.hitIndices().size(idx) == 0)
0522 break;
0523 if (tracks_view[idx].quality() < pixelTrack::Quality::loose)
0524 continue;
0525 atomicAdd(&(counters->nLooseTracks), 1);
0526 if (tracks_view[idx].quality() < pixelTrack::Quality::strict)
0527 continue;
0528 atomicAdd(&(counters->nGoodTracks), 1);
0529 }
0530 }
0531
0532 template <typename TrackerTraits>
0533 __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
0534 int first = blockDim.x * blockIdx.x + threadIdx.x;
0535 for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0536 if (tracks_view.hitIndices().size(idx) == 0)
0537 break;
0538 for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0539 hitToTuple->count(*h);
0540 }
0541 }
0542
0543 template <typename TrackerTraits>
0544 __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
0545 int first = blockDim.x * blockIdx.x + threadIdx.x;
0546 for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0547 if (tracks_view.hitIndices().size(idx) == 0)
0548 break;
0549 for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0550 hitToTuple->fill(*h, idx);
0551 }
0552 }
0553
0554 template <typename TrackerTraits>
0555 __global__ void kernel_fillHitDetIndices(TkSoAView<TrackerTraits> tracks_view, HitsConstView<TrackerTraits> hh) {
0556 int first = blockDim.x * blockIdx.x + threadIdx.x;
0557
0558 for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0559 tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
0560 }
0561
0562 auto nhits = hh.nHits();
0563
0564 for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
0565 assert(tracks_view.hitIndices().content[idx] < nhits);
0566 tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
0567 }
0568 }
0569
0570 template <typename TrackerTraits>
0571 __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
0572 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0573
0574 auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
0575 if (0 == first)
0576 tracks_view.nTracks() = ntracks;
0577 for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
0578 auto nHits = TracksUtilities<TrackerTraits>::nHits(tracks_view, idx);
0579 assert(nHits >= 3);
0580 tracks_view[idx].nLayers() = TracksUtilities<TrackerTraits>::computeNumberOfLayers(tracks_view, idx);
0581 }
0582 }
0583
0584 template <typename TrackerTraits>
0585 __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
0586 Counters *counters) {
0587 auto &c = *counters;
0588 int first = blockDim.x * blockIdx.x + threadIdx.x;
0589 for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0590 if (hitToTuple->size(idx) == 0)
0591 continue;
0592 atomicAdd(&c.nUsedHits, 1);
0593 if (hitToTuple->size(idx) > 1)
0594 atomicAdd(&c.nDupHits, 1);
0595 }
0596 }
0597
0598 template <typename TrackerTraits>
0599 __global__ void kernel_countSharedHit(int *__restrict__ nshared,
0600 HitContainer<TrackerTraits> const *__restrict__ ptuples,
0601 Quality const *__restrict__ quality,
0602 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0603 constexpr auto loose = pixelTrack::Quality::loose;
0604
0605 auto &hitToTuple = *phitToTuple;
0606 auto const &foundNtuplets = *ptuples;
0607
0608 int first = blockDim.x * blockIdx.x + threadIdx.x;
0609 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0610 if (hitToTuple.size(idx) < 2)
0611 continue;
0612
0613 int nt = 0;
0614
0615
0616 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0617 if (quality[*it] < loose)
0618 continue;
0619 ++nt;
0620 }
0621
0622 if (nt < 2)
0623 continue;
0624
0625
0626 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0627 if (foundNtuplets.size(*it) > 3)
0628 continue;
0629 atomicAdd(&nshared[*it], 1);
0630 }
0631
0632 }
0633 }
0634
0635 template <typename TrackerTraits>
0636 __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
0637 HitContainer<TrackerTraits> const *__restrict__ tuples,
0638 Quality *__restrict__ quality,
0639 bool dupPassThrough) {
0640
0641 constexpr auto dup = pixelTrack::Quality::dup;
0642 constexpr auto loose = pixelTrack::Quality::loose;
0643
0644
0645
0646 auto const reject = dupPassThrough ? loose : dup;
0647
0648 int first = blockDim.x * blockIdx.x + threadIdx.x;
0649 for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0650 if (tuples->size(idx) == 0)
0651 break;
0652 if (quality[idx] <= reject)
0653 continue;
0654 if (nshared[idx] > 2)
0655 quality[idx] = reject;
0656 }
0657 }
0658
0659
0660 template <typename TrackerTraits>
0661 __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
0662 uint16_t nmin,
0663 bool dupPassThrough,
0664 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0665
0666 auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0667
0668 auto &hitToTuple = *phitToTuple;
0669
0670 int first = blockDim.x * blockIdx.x + threadIdx.x;
0671 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0672 if (hitToTuple.size(idx) < 2)
0673 continue;
0674
0675 auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
0676
0677
0678 for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
0679 auto const it = *ip;
0680 auto qi = tracks_view[it].quality();
0681 if (qi <= reject)
0682 continue;
0683 auto opi = tracks_view[it].state()(2);
0684 auto e2opi = tracks_view[it].covariance()(9);
0685 auto cti = tracks_view[it].state()(3);
0686 auto e2cti = tracks_view[it].covariance()(12);
0687 auto nli = tracks_view[it].nLayers();
0688 for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
0689 auto const jt = *jp;
0690 auto qj = tracks_view[jt].quality();
0691 if (qj <= reject)
0692 continue;
0693 auto opj = tracks_view[jt].state()(2);
0694 auto ctj = tracks_view[jt].state()(3);
0695 auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0696 if ((cti - ctj) * (cti - ctj) > dct)
0697 continue;
0698 auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0699 if ((opi - opj) * (opi - opj) > dop)
0700 continue;
0701 auto nlj = tracks_view[jt].nLayers();
0702 if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
0703 tracks_view[jt].quality() = reject;
0704 else {
0705 tracks_view[it].quality() = reject;
0706 break;
0707 }
0708 }
0709 }
0710 }
0711 }
0712
0713 template <typename TrackerTraits>
0714 __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
0715 TkSoAView<TrackerTraits> tracks_view,
0716 int nmin,
0717 bool dupPassThrough,
0718 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0719
0720 auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0721
0722 auto const longTqual = pixelTrack::Quality::highPurity;
0723
0724 auto &hitToTuple = *phitToTuple;
0725
0726 int l1end = hh.hitsLayerStart()[1];
0727
0728 int first = blockDim.x * blockIdx.x + threadIdx.x;
0729 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0730 if (hitToTuple.size(idx) < 2)
0731 continue;
0732
0733 int8_t maxNl = 0;
0734
0735
0736 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0737 if (tracks_view[*it].quality() < longTqual)
0738 continue;
0739
0740 auto nl = tracks_view[*it].nLayers();
0741 maxNl = std::max(nl, maxNl);
0742 }
0743
0744 if (maxNl < 4)
0745 continue;
0746
0747
0748
0749
0750
0751 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0752 auto nl = tracks_view[*it].nLayers();
0753
0754
0755 if (idx < l1end and nl > nmin)
0756 continue;
0757
0758 if (nl < maxNl && tracks_view[*it].quality() > reject)
0759 tracks_view[*it].quality() = reject;
0760 }
0761 }
0762 }
0763
0764 template <typename TrackerTraits>
0765 __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
0766 uint16_t nmin,
0767 bool dupPassThrough,
0768 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0769
0770 auto const reject = pixelTrack::Quality::loose;
0771
0772 auto const good = pixelTrack::Quality::strict;
0773
0774 auto &hitToTuple = *phitToTuple;
0775
0776 int first = blockDim.x * blockIdx.x + threadIdx.x;
0777 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0778 if (hitToTuple.size(idx) < 2)
0779 continue;
0780
0781 float mc = maxScore;
0782 uint16_t im = tkNotFound;
0783 bool onlyTriplets = true;
0784
0785
0786 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0787 if (tracks_view[*it].quality() <= good)
0788 continue;
0789 onlyTriplets &= TracksUtilities<TrackerTraits>::isTriplet(tracks_view, *it);
0790 if (!onlyTriplets)
0791 break;
0792 }
0793
0794
0795 if (!onlyTriplets)
0796 continue;
0797
0798
0799 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0800 auto const it = *ip;
0801 if (tracks_view[it].quality() >= good && std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
0802 mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
0803 im = it;
0804 }
0805 }
0806
0807 if (tkNotFound == im)
0808 continue;
0809
0810
0811 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0812 auto const it = *ip;
0813 if (tracks_view[it].quality() > reject && it != im)
0814 tracks_view[it].quality() = reject;
0815 }
0816
0817 }
0818 }
0819
0820 template <typename TrackerTraits>
0821 __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
0822 uint16_t nmin,
0823 bool dupPassThrough,
0824 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0825
0826 auto const reject = pixelTrack::Quality::loose;
0827
0828 auto const good = pixelTrack::Quality::loose;
0829
0830 auto &hitToTuple = *phitToTuple;
0831
0832 int first = blockDim.x * blockIdx.x + threadIdx.x;
0833 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0834 if (hitToTuple.size(idx) < 2)
0835 continue;
0836
0837 float mc = maxScore;
0838 uint16_t im = tkNotFound;
0839
0840
0841 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0842 auto const it = *ip;
0843 if (tracks_view[it].quality() >= good && std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
0844 mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
0845 im = it;
0846 }
0847 }
0848
0849 if (tkNotFound == im)
0850 continue;
0851
0852
0853 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0854 auto const it = *ip;
0855 if (tracks_view[it].quality() > reject && TracksUtilities<TrackerTraits>::isTriplet(tracks_view, it) &&
0856 it != im)
0857 tracks_view[it].quality() = reject;
0858 }
0859
0860 }
0861 }
0862
0863 template <typename TrackerTraits>
0864 __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
0865 TkSoAView<TrackerTraits> tracks_view,
0866 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
0867 int32_t firstPrint,
0868 int32_t lastPrint,
0869 int iev) {
0870 constexpr auto loose = pixelTrack::Quality::loose;
0871
0872 int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
0873 for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
0874 i += blockDim.x * gridDim.x) {
0875 auto nh = tracks_view.hitIndices().size(i);
0876 if (nh < 3)
0877 continue;
0878 if (tracks_view[i].quality() < loose)
0879 continue;
0880 printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
0881 10000 * iev + i,
0882 int(tracks_view[i].quality()),
0883 nh,
0884 tracks_view[i].nLayers(),
0885 TracksUtilities<TrackerTraits>::charge(tracks_view, i),
0886 tracks_view[i].pt(),
0887 tracks_view[i].eta(),
0888 TracksUtilities<TrackerTraits>::phi(tracks_view, i),
0889 TracksUtilities<TrackerTraits>::tip(tracks_view, i),
0890 TracksUtilities<TrackerTraits>::zip(tracks_view, i),
0891 tracks_view[i].chi2(),
0892 hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
0893 hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
0894 hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
0895 nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
0896 nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
0897 nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
0898 nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
0899 }
0900 }
0901
0902 __global__ void kernel_printCounters(Counters const *counters) {
0903 auto const &c = *counters;
0904 printf(
0905 "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
0906 "nDupHits | "
0907 "nFishCells | "
0908 "nKilledCells | "
0909 "nUsedCells | nZeroTrackCells ||\n");
0910 printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
0911 c.nEvents,
0912 c.nHits,
0913 c.nCells,
0914 c.nTuples,
0915 c.nFitTracks,
0916 c.nLooseTracks,
0917 c.nGoodTracks,
0918 c.nUsedHits,
0919 c.nDupHits,
0920 c.nFishCells,
0921 c.nKilledCells,
0922 c.nEmptyCells,
0923 c.nZeroTrackCells);
0924 printf(
0925 "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
0926 c.nEvents,
0927 c.nHits / double(c.nEvents),
0928 c.nCells / double(c.nEvents),
0929 c.nTuples / double(c.nEvents),
0930 c.nFitTracks / double(c.nEvents),
0931 c.nLooseTracks / double(c.nEvents),
0932 c.nGoodTracks / double(c.nEvents),
0933 c.nUsedHits / double(c.nEvents),
0934 c.nDupHits / double(c.nEvents),
0935 c.nFishCells / double(c.nCells),
0936 c.nKilledCells / double(c.nCells),
0937 c.nEmptyCells / double(c.nCells),
0938 c.nZeroTrackCells / double(c.nCells));
0939 }
0940
0941 }