File indexing completed on 2024-07-26 17:54:25
0001
0002
0003
0004
0005
0006
0007
0008 #include <cmath>
0009 #include <cstdint>
0010 #include <limits>
0011
0012 #include <cuda_runtime.h>
0013
0014 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0015 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0016 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0017 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0018 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0019
0020 #include "FWCore/Utilities/interface/isFinite.h"
0021
0022 #include "CAHitNtupletGeneratorKernels.h"
0023 #include "CAStructures.h"
0024 #include "GPUCACell.h"
0025 #include "gpuFishbone.h"
0026 #include "gpuPixelDoublets.h"
0027
0028 namespace caHitNtupletGeneratorKernels {
0029
0030 constexpr uint32_t tkNotFound = std::numeric_limits<uint16_t>::max();
0031 constexpr float maxScore = std::numeric_limits<float>::max();
0032 constexpr float nSigma2 = 25.f;
0033
0034
0035
0036 template <typename TrackerTraits>
0037 using HitToTuple = caStructures::HitToTupleT<TrackerTraits>;
0038
0039 template <typename TrackerTraits>
0040 using TupleMultiplicity = caStructures::TupleMultiplicityT<TrackerTraits>;
0041
0042 template <typename TrackerTraits>
0043 using CellNeighborsVector = caStructures::CellNeighborsVectorT<TrackerTraits>;
0044
0045 template <typename TrackerTraits>
0046 using CellTracksVector = caStructures::CellTracksVectorT<TrackerTraits>;
0047
0048 template <typename TrackerTraits>
0049 using OuterHitOfCell = caStructures::OuterHitOfCellT<TrackerTraits>;
0050
0051 using Quality = pixelTrack::Quality;
0052
0053 template <typename TrackerTraits>
0054 using TkSoAView = TrackSoAView<TrackerTraits>;
0055
0056 template <typename TrackerTraits>
0057 using HitContainer = typename TrackSoA<TrackerTraits>::HitContainer;
0058
0059 template <typename TrackerTraits>
0060 using HitsConstView = typename GPUCACellT<TrackerTraits>::HitsConstView;
0061
0062 template <typename TrackerTraits>
0063 using QualityCuts = pixelTrack::QualityCutsT<TrackerTraits>;
0064
0065 template <typename TrackerTraits>
0066 using CAParams = caHitNtupletGenerator::CAParamsT<TrackerTraits>;
0067
0068 using Counters = caHitNtupletGenerator::Counters;
0069
0070 template <typename TrackerTraits>
0071 __global__ void kernel_checkOverflows(TkSoAView<TrackerTraits> tracks_view,
0072 TupleMultiplicity<TrackerTraits> const *tupleMultiplicity,
0073 HitToTuple<TrackerTraits> const *hitToTuple,
0074 cms::cuda::AtomicPairCounter *apc,
0075 GPUCACellT<TrackerTraits> const *__restrict__ cells,
0076 uint32_t const *__restrict__ nCells,
0077 CellNeighborsVector<TrackerTraits> const *cellNeighbors,
0078 CellTracksVector<TrackerTraits> const *cellTracks,
0079 OuterHitOfCell<TrackerTraits> const isOuterHitOfCell,
0080 int32_t nHits,
0081 uint32_t maxNumberOfDoublets,
0082 Counters *counters) {
0083 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0084
0085 auto &c = *counters;
0086
0087 if (0 == first) {
0088 atomicAdd(&c.nEvents, 1);
0089 atomicAdd(&c.nHits, nHits);
0090 atomicAdd(&c.nCells, *nCells);
0091 atomicAdd(&c.nTuples, apc->get().m);
0092 atomicAdd(&c.nFitTracks, tupleMultiplicity->size());
0093 }
0094
0095 #ifdef NTUPLE_DEBUG
0096 if (0 == first) {
0097 printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n",
0098 *nCells,
0099 apc->get().m,
0100 apc->get().n,
0101 nHits,
0102 hitToTuple->totOnes());
0103 if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) {
0104 assert(tracks_view.hitIndices().size(apc->get().m) == 0);
0105 assert(tracks_view.hitIndices().size() == apc->get().n);
0106 }
0107 }
0108
0109 for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) {
0110 if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack)
0111 printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx));
0112 assert(tracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack);
0113 for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih)
0114 assert(int(*ih) < nHits);
0115 }
0116 #endif
0117
0118 if (0 == first) {
0119 if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets)
0120 printf("Tuples overflow\n");
0121 if (*nCells >= maxNumberOfDoublets)
0122 printf("Cells overflow\n");
0123 if (cellNeighbors && cellNeighbors->full())
0124 printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size());
0125 if (cellTracks && cellTracks->full())
0126 printf("cellTracks overflow\n");
0127 if (int(hitToTuple->nOnes()) < nHits)
0128 printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits);
0129 #ifdef GPU_DEBUG
0130 printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n",
0131 cellNeighbors->size(),
0132 cellTracks->size(),
0133 hitToTuple->size());
0134
0135 #endif
0136 }
0137
0138 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0139 auto const &thisCell = cells[idx];
0140 if (thisCell.hasFishbone() && !thisCell.isKilled())
0141 atomicAdd(&c.nFishCells, 1);
0142 if (thisCell.outerNeighbors().full())
0143 printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId());
0144 if (thisCell.tracks().full())
0145 printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId());
0146 if (thisCell.isKilled())
0147 atomicAdd(&c.nKilledCells, 1);
0148 if (!thisCell.unused())
0149 atomicAdd(&c.nEmptyCells, 1);
0150 if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id())))
0151 atomicAdd(&c.nZeroTrackCells, 1);
0152 }
0153
0154 for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) {
0155 if (isOuterHitOfCell.container[idx].full())
0156 printf("OuterHitOfCell overflow %d\n", idx);
0157 }
0158 }
0159
0160 template <typename TrackerTraits>
0161 __global__ void kernel_fishboneCleaner(GPUCACellT<TrackerTraits> const *cells,
0162 uint32_t const *__restrict__ nCells,
0163 TkSoAView<TrackerTraits> tracks_view) {
0164 constexpr auto reject = pixelTrack::Quality::dup;
0165
0166 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0167 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0168 auto const &thisCell = cells[idx];
0169 if (!thisCell.isKilled())
0170 continue;
0171
0172 for (auto it : thisCell.tracks())
0173 tracks_view[it].quality() = reject;
0174 }
0175 }
0176
0177
0178
0179 template <typename TrackerTraits>
0180 __global__ void kernel_earlyDuplicateRemover(GPUCACellT<TrackerTraits> const *cells,
0181 uint32_t const *__restrict__ nCells,
0182 TkSoAView<TrackerTraits> tracks_view,
0183 bool dupPassThrough) {
0184
0185 constexpr auto reject = pixelTrack::Quality::edup;
0186
0187 assert(nCells);
0188 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0189 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0190 auto const &thisCell = cells[idx];
0191
0192 if (thisCell.tracks().size() < 2)
0193 continue;
0194
0195 int8_t maxNl = 0;
0196
0197
0198 for (auto it : thisCell.tracks()) {
0199 auto nl = tracks_view[it].nLayers();
0200 maxNl = std::max(nl, maxNl);
0201 }
0202
0203
0204
0205
0206
0207 for (auto it : thisCell.tracks()) {
0208 if (tracks_view[it].nLayers() < maxNl)
0209 tracks_view[it].quality() = reject;
0210 }
0211 }
0212 }
0213
0214
0215 template <typename TrackerTraits>
0216 __global__ void kernel_fastDuplicateRemover(GPUCACellT<TrackerTraits> const *__restrict__ cells,
0217 uint32_t const *__restrict__ nCells,
0218 TkSoAView<TrackerTraits> tracks_view,
0219 bool dupPassThrough) {
0220
0221 auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0222 constexpr auto loose = pixelTrack::Quality::loose;
0223
0224 assert(nCells);
0225
0226 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0227 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0228 auto const &thisCell = cells[idx];
0229 if (thisCell.tracks().size() < 2)
0230 continue;
0231
0232 float mc = maxScore;
0233 uint16_t im = tkNotFound;
0234
0235 auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
0236
0237
0238
0239 int ntr = thisCell.tracks().size();
0240 for (int i = 0; i < ntr - 1; ++i) {
0241 auto it = thisCell.tracks()[i];
0242 auto qi = tracks_view[it].quality();
0243 if (qi <= reject)
0244 continue;
0245 auto opi = tracks_view[it].state()(2);
0246 auto e2opi = tracks_view[it].covariance()(9);
0247 auto cti = tracks_view[it].state()(3);
0248 auto e2cti = tracks_view[it].covariance()(12);
0249 for (auto j = i + 1; j < ntr; ++j) {
0250 auto jt = thisCell.tracks()[j];
0251 auto qj = tracks_view[jt].quality();
0252 if (qj <= reject)
0253 continue;
0254 auto opj = tracks_view[jt].state()(2);
0255 auto ctj = tracks_view[jt].state()(3);
0256 auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0257 if ((cti - ctj) * (cti - ctj) > dct)
0258 continue;
0259 auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0260 if ((opi - opj) * (opi - opj) > dop)
0261 continue;
0262 if ((qj < qi) || (qj == qi && score(it) < score(jt)))
0263 tracks_view[jt].quality() = reject;
0264 else {
0265 tracks_view[it].quality() = reject;
0266 break;
0267 }
0268 }
0269 }
0270
0271
0272 auto maxQual = reject;
0273 for (auto it : thisCell.tracks()) {
0274 if (tracks_view[it].quality() > maxQual)
0275 maxQual = tracks_view[it].quality();
0276 }
0277
0278 if (maxQual <= loose)
0279 continue;
0280
0281
0282 for (auto it : thisCell.tracks()) {
0283 if (tracks_view[it].quality() == maxQual && score(it) < mc) {
0284 mc = score(it);
0285 im = it;
0286 }
0287 }
0288
0289 if (tkNotFound == im)
0290 continue;
0291
0292
0293 for (auto it : thisCell.tracks()) {
0294 if (tracks_view[it].quality() > loose && it != im)
0295 tracks_view[it].quality() = loose;
0296 }
0297 }
0298 }
0299
0300 template <typename TrackerTraits>
0301 __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
0302 cms::cuda::AtomicPairCounter *apc2,
0303 HitsConstView<TrackerTraits> hh,
0304 GPUCACellT<TrackerTraits> *cells,
0305 uint32_t const *__restrict__ nCells,
0306 CellNeighborsVector<TrackerTraits> *cellNeighbors,
0307 OuterHitOfCell<TrackerTraits> const isOuterHitOfCell,
0308 CAParams<TrackerTraits> params) {
0309 using Cell = GPUCACellT<TrackerTraits>;
0310
0311 auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y;
0312 auto first = threadIdx.x;
0313 auto stride = blockDim.x;
0314
0315 if (0 == (firstCellIndex + first)) {
0316 (*apc1) = 0;
0317 (*apc2) = 0;
0318 }
0319
0320 constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex;
0321 constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex;
0322 for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) {
0323 auto cellIndex = idx;
0324 auto &thisCell = cells[idx];
0325 auto innerHitId = thisCell.inner_hit_id();
0326 if (int(innerHitId) < isOuterHitOfCell.offset)
0327 continue;
0328 int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
0329 auto vi = isOuterHitOfCell[innerHitId].data();
0330
0331 auto ri = thisCell.inner_r(hh);
0332 auto zi = thisCell.inner_z(hh);
0333
0334 auto ro = thisCell.outer_r(hh);
0335 auto zo = thisCell.outer_z(hh);
0336 auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex;
0337
0338 for (int j = first; j < numberOfPossibleNeighbors; j += stride) {
0339 auto otherCell = __ldg(vi + j);
0340 auto &oc = cells[otherCell];
0341 auto r1 = oc.inner_r(hh);
0342 auto z1 = oc.inner_z(hh);
0343 bool aligned = Cell::areAlignedRZ(
0344 r1,
0345 z1,
0346 ri,
0347 zi,
0348 ro,
0349 zo,
0350 params.ptmin_,
0351 isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_);
0352 if (aligned && thisCell.dcaCut(hh,
0353 oc,
0354 oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_
0355 : params.dcaCutOuterTriplet_,
0356 params.hardCurvCut_)) {
0357 oc.addOuterNeighbor(cellIndex, *cellNeighbors);
0358 thisCell.setStatusBits(Cell::StatusBit::kUsed);
0359 oc.setStatusBits(Cell::StatusBit::kUsed);
0360 }
0361 }
0362 }
0363 }
0364
0365 template <typename TrackerTraits>
0366 __global__ void kernel_find_ntuplets(HitsConstView<TrackerTraits> hh,
0367 TkSoAView<TrackerTraits> tracks_view,
0368 GPUCACellT<TrackerTraits> *__restrict__ cells,
0369 uint32_t const *nCells,
0370 CellTracksVector<TrackerTraits> *cellTracks,
0371 cms::cuda::AtomicPairCounter *apc,
0372 CAParams<TrackerTraits> params) {
0373
0374
0375 using Cell = GPUCACellT<TrackerTraits>;
0376
0377 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0378
0379 #ifdef GPU_DEBUG
0380 if (first == 0)
0381 printf("starting producing ntuplets from %d cells \n", *nCells);
0382 #endif
0383 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0384 auto const &thisCell = cells[idx];
0385
0386 if (thisCell.isKilled())
0387 continue;
0388
0389
0390 if (thisCell.outerNeighbors().empty())
0391 continue;
0392
0393 auto pid = thisCell.layerPairId();
0394 bool doit = params.startingLayerPair(pid);
0395
0396 constexpr uint32_t maxDepth = TrackerTraits::maxDepth;
0397 if (doit) {
0398 typename Cell::TmpTuple stack;
0399 stack.reset();
0400
0401 bool bpix1Start = params.startAt0(pid);
0402
0403 thisCell.template find_ntuplets<maxDepth>(hh,
0404 cells,
0405 *cellTracks,
0406 tracks_view.hitIndices(),
0407 *apc,
0408 tracks_view.quality(),
0409 stack,
0410 params.minHitsPerNtuplet_,
0411 bpix1Start);
0412
0413 assert(stack.empty());
0414 }
0415 }
0416 }
0417 template <typename TrackerTraits>
0418 __global__ void kernel_mark_used(GPUCACellT<TrackerTraits> *__restrict__ cells, uint32_t const *nCells) {
0419 auto first = threadIdx.x + blockIdx.x * blockDim.x;
0420 using Cell = GPUCACellT<TrackerTraits>;
0421 for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) {
0422 auto &thisCell = cells[idx];
0423 if (!thisCell.tracks().empty())
0424 thisCell.setStatusBits(Cell::StatusBit::kInTrack);
0425 }
0426 }
0427
0428 template <typename TrackerTraits>
0429 __global__ void kernel_countMultiplicity(TkSoAView<TrackerTraits> tracks_view,
0430 TupleMultiplicity<TrackerTraits> *tupleMultiplicity) {
0431 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0432 for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0433 auto nhits = tracks_view.hitIndices().size(it);
0434 if (nhits < 3)
0435 continue;
0436 if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0437 continue;
0438 assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0439 if (nhits > TrackerTraits::maxHitsOnTrack)
0440 printf("wrong mult %d %d\n", it, nhits);
0441 assert(nhits <= TrackerTraits::maxHitsOnTrack);
0442 tupleMultiplicity->count(nhits);
0443 }
0444 }
0445
0446 template <typename TrackerTraits>
0447 __global__ void kernel_fillMultiplicity(TkSoAView<TrackerTraits> tracks_view,
0448 TupleMultiplicity<TrackerTraits> *tupleMultiplicity) {
0449 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0450 for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0451 auto nhits = tracks_view.hitIndices().size(it);
0452 if (nhits < 3)
0453 continue;
0454 if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0455 continue;
0456 assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0457 if (nhits > TrackerTraits::maxHitsOnTrack)
0458 printf("wrong mult %d %d\n", it, nhits);
0459 assert(nhits <= TrackerTraits::maxHitsOnTrack);
0460 tupleMultiplicity->fill(nhits, it);
0461 }
0462 }
0463
0464
0465 template <typename TrackerTraits>
0466 __global__ void kernel_classifyTracks(TkSoAView<TrackerTraits> tracks_view, QualityCuts<TrackerTraits> cuts) {
0467
0468 int first = blockDim.x * blockIdx.x + threadIdx.x;
0469 for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) {
0470 auto nhits = tracks_view.hitIndices().size(it);
0471 if (nhits == 0)
0472 break;
0473
0474
0475 if (tracks_view[it].quality() == pixelTrack::Quality::edup)
0476 continue;
0477
0478 assert(tracks_view[it].quality() == pixelTrack::Quality::bad);
0479
0480
0481 if (nhits < 3)
0482 continue;
0483
0484
0485 bool isNaN = false;
0486 for (int i = 0; i < 5; ++i) {
0487 isNaN |= edm::isNotFinite(tracks_view[it].state()(i));
0488 }
0489 if (isNaN) {
0490 #ifdef NTUPLE_DEBUG
0491 printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2());
0492 #endif
0493 continue;
0494 }
0495
0496 tracks_view[it].quality() = pixelTrack::Quality::strict;
0497
0498 if (cuts.strictCut(tracks_view, it))
0499 continue;
0500
0501 tracks_view[it].quality() = pixelTrack::Quality::tight;
0502
0503 if (cuts.isHP(tracks_view, nhits, it))
0504 tracks_view[it].quality() = pixelTrack::Quality::highPurity;
0505 }
0506 }
0507
0508 template <typename TrackerTraits>
0509 __global__ void kernel_doStatsForTracks(TkSoAView<TrackerTraits> tracks_view, Counters *counters) {
0510 int first = blockDim.x * blockIdx.x + threadIdx.x;
0511 for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0512 if (tracks_view.hitIndices().size(idx) == 0)
0513 break;
0514 if (tracks_view[idx].quality() < pixelTrack::Quality::loose)
0515 continue;
0516 atomicAdd(&(counters->nLooseTracks), 1);
0517 if (tracks_view[idx].quality() < pixelTrack::Quality::strict)
0518 continue;
0519 atomicAdd(&(counters->nGoodTracks), 1);
0520 }
0521 }
0522
0523 template <typename TrackerTraits>
0524 __global__ void kernel_countHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
0525 int first = blockDim.x * blockIdx.x + threadIdx.x;
0526 for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0527 if (tracks_view.hitIndices().size(idx) == 0)
0528 break;
0529 for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0530 hitToTuple->count(*h);
0531 }
0532 }
0533
0534 template <typename TrackerTraits>
0535 __global__ void kernel_fillHitInTracks(TkSoAView<TrackerTraits> tracks_view, HitToTuple<TrackerTraits> *hitToTuple) {
0536 int first = blockDim.x * blockIdx.x + threadIdx.x;
0537 for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0538 if (tracks_view.hitIndices().size(idx) == 0)
0539 break;
0540 for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h)
0541 hitToTuple->fill(*h, idx);
0542 }
0543 }
0544
0545 template <typename TrackerTraits>
0546 __global__ void kernel_fillHitDetIndices(TkSoAView<TrackerTraits> tracks_view, HitsConstView<TrackerTraits> hh) {
0547 int first = blockDim.x * blockIdx.x + threadIdx.x;
0548
0549 for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0550 tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx];
0551 }
0552
0553 auto nhits = hh.nHits();
0554
0555 for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) {
0556 assert(tracks_view.hitIndices().content[idx] < nhits);
0557 tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex();
0558 }
0559 }
0560
0561 template <typename TrackerTraits>
0562 __global__ void kernel_fillNLayers(TkSoAView<TrackerTraits> tracks_view, cms::cuda::AtomicPairCounter *apc) {
0563 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0564
0565 auto ntracks = std::min<int>(apc->get().m, tracks_view.metadata().size() - 1);
0566 if (0 == first)
0567 tracks_view.nTracks() = ntracks;
0568 for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) {
0569 auto nHits = TracksUtilities<TrackerTraits>::nHits(tracks_view, idx);
0570 assert(nHits >= 3);
0571 tracks_view[idx].nLayers() = TracksUtilities<TrackerTraits>::computeNumberOfLayers(tracks_view, idx);
0572 }
0573 }
0574
0575 template <typename TrackerTraits>
0576 __global__ void kernel_doStatsForHitInTracks(HitToTuple<TrackerTraits> const *__restrict__ hitToTuple,
0577 Counters *counters) {
0578 auto &c = *counters;
0579 int first = blockDim.x * blockIdx.x + threadIdx.x;
0580 for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0581 if (hitToTuple->size(idx) == 0)
0582 continue;
0583 atomicAdd(&c.nUsedHits, 1);
0584 if (hitToTuple->size(idx) > 1)
0585 atomicAdd(&c.nDupHits, 1);
0586 }
0587 }
0588
0589 template <typename TrackerTraits>
0590 __global__ void kernel_countSharedHit(int *__restrict__ nshared,
0591 HitContainer<TrackerTraits> const *__restrict__ ptuples,
0592 Quality const *__restrict__ quality,
0593 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0594 constexpr auto loose = pixelTrack::Quality::loose;
0595
0596 auto &hitToTuple = *phitToTuple;
0597 auto const &foundNtuplets = *ptuples;
0598
0599 int first = blockDim.x * blockIdx.x + threadIdx.x;
0600 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0601 if (hitToTuple.size(idx) < 2)
0602 continue;
0603
0604 int nt = 0;
0605
0606
0607 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0608 if (quality[*it] < loose)
0609 continue;
0610 ++nt;
0611 }
0612
0613 if (nt < 2)
0614 continue;
0615
0616
0617 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0618 if (foundNtuplets.size(*it) > 3)
0619 continue;
0620 atomicAdd(&nshared[*it], 1);
0621 }
0622
0623 }
0624 }
0625
0626 template <typename TrackerTraits>
0627 __global__ void kernel_markSharedHit(int const *__restrict__ nshared,
0628 HitContainer<TrackerTraits> const *__restrict__ tuples,
0629 Quality *__restrict__ quality,
0630 bool dupPassThrough) {
0631
0632 constexpr auto dup = pixelTrack::Quality::dup;
0633 constexpr auto loose = pixelTrack::Quality::loose;
0634
0635
0636
0637 auto const reject = dupPassThrough ? loose : dup;
0638
0639 int first = blockDim.x * blockIdx.x + threadIdx.x;
0640 for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0641 if (tuples->size(idx) == 0)
0642 break;
0643 if (quality[idx] <= reject)
0644 continue;
0645 if (nshared[idx] > 2)
0646 quality[idx] = reject;
0647 }
0648 }
0649
0650
0651 template <typename TrackerTraits>
0652 __global__ void kernel_rejectDuplicate(TkSoAView<TrackerTraits> tracks_view,
0653 uint16_t nmin,
0654 bool dupPassThrough,
0655 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0656
0657 auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0658
0659 auto &hitToTuple = *phitToTuple;
0660
0661 int first = blockDim.x * blockIdx.x + threadIdx.x;
0662 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0663 if (hitToTuple.size(idx) < 2)
0664 continue;
0665
0666 auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
0667
0668
0669 for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
0670 auto const it = *ip;
0671 auto qi = tracks_view[it].quality();
0672 if (qi <= reject)
0673 continue;
0674 auto opi = tracks_view[it].state()(2);
0675 auto e2opi = tracks_view[it].covariance()(9);
0676 auto cti = tracks_view[it].state()(3);
0677 auto e2cti = tracks_view[it].covariance()(12);
0678 auto nli = tracks_view[it].nLayers();
0679 for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) {
0680 auto const jt = *jp;
0681 auto qj = tracks_view[jt].quality();
0682 if (qj <= reject)
0683 continue;
0684 auto opj = tracks_view[jt].state()(2);
0685 auto ctj = tracks_view[jt].state()(3);
0686 auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti);
0687 if ((cti - ctj) * (cti - ctj) > dct)
0688 continue;
0689 auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi);
0690 if ((opi - opj) * (opi - opj) > dop)
0691 continue;
0692 auto nlj = tracks_view[jt].nLayers();
0693 if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj)))))
0694 tracks_view[jt].quality() = reject;
0695 else {
0696 tracks_view[it].quality() = reject;
0697 break;
0698 }
0699 }
0700 }
0701 }
0702 }
0703
0704 template <typename TrackerTraits>
0705 __global__ void kernel_sharedHitCleaner(HitsConstView<TrackerTraits> hh,
0706 TkSoAView<TrackerTraits> tracks_view,
0707 int nmin,
0708 bool dupPassThrough,
0709 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0710
0711 auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup;
0712
0713 auto const longTqual = pixelTrack::Quality::highPurity;
0714
0715 auto &hitToTuple = *phitToTuple;
0716
0717 int l1end = hh.hitsLayerStart()[1];
0718
0719 int first = blockDim.x * blockIdx.x + threadIdx.x;
0720 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0721 if (hitToTuple.size(idx) < 2)
0722 continue;
0723
0724 int8_t maxNl = 0;
0725
0726
0727 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0728 if (tracks_view[*it].quality() < longTqual)
0729 continue;
0730
0731 auto nl = tracks_view[*it].nLayers();
0732 maxNl = std::max(nl, maxNl);
0733 }
0734
0735 if (maxNl < 4)
0736 continue;
0737
0738
0739
0740
0741
0742 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0743 auto nl = tracks_view[*it].nLayers();
0744
0745
0746 if (idx < l1end and nl > nmin)
0747 continue;
0748
0749 if (nl < maxNl && tracks_view[*it].quality() > reject)
0750 tracks_view[*it].quality() = reject;
0751 }
0752 }
0753 }
0754
0755 template <typename TrackerTraits>
0756 __global__ void kernel_tripletCleaner(TkSoAView<TrackerTraits> tracks_view,
0757 uint16_t nmin,
0758 bool dupPassThrough,
0759 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0760
0761 auto const reject = pixelTrack::Quality::loose;
0762
0763 auto const good = pixelTrack::Quality::strict;
0764
0765 auto &hitToTuple = *phitToTuple;
0766
0767 int first = blockDim.x * blockIdx.x + threadIdx.x;
0768 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0769 if (hitToTuple.size(idx) < 2)
0770 continue;
0771
0772 float mc = maxScore;
0773 uint16_t im = tkNotFound;
0774 bool onlyTriplets = true;
0775
0776
0777 for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
0778 if (tracks_view[*it].quality() <= good)
0779 continue;
0780 onlyTriplets &= TracksUtilities<TrackerTraits>::isTriplet(tracks_view, *it);
0781 if (!onlyTriplets)
0782 break;
0783 }
0784
0785
0786 if (!onlyTriplets)
0787 continue;
0788
0789
0790 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0791 auto const it = *ip;
0792 if (tracks_view[it].quality() >= good && std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
0793 mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
0794 im = it;
0795 }
0796 }
0797
0798 if (tkNotFound == im)
0799 continue;
0800
0801
0802 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0803 auto const it = *ip;
0804 if (tracks_view[it].quality() > reject && it != im)
0805 tracks_view[it].quality() = reject;
0806 }
0807
0808 }
0809 }
0810
0811 template <typename TrackerTraits>
0812 __global__ void kernel_simpleTripletCleaner(TkSoAView<TrackerTraits> tracks_view,
0813 uint16_t nmin,
0814 bool dupPassThrough,
0815 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple) {
0816
0817 auto const reject = pixelTrack::Quality::loose;
0818
0819 auto const good = pixelTrack::Quality::loose;
0820
0821 auto &hitToTuple = *phitToTuple;
0822
0823 int first = blockDim.x * blockIdx.x + threadIdx.x;
0824 for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) {
0825 if (hitToTuple.size(idx) < 2)
0826 continue;
0827
0828 float mc = maxScore;
0829 uint16_t im = tkNotFound;
0830
0831
0832 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0833 auto const it = *ip;
0834 if (tracks_view[it].quality() >= good && std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
0835 mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
0836 im = it;
0837 }
0838 }
0839
0840 if (tkNotFound == im)
0841 continue;
0842
0843
0844 for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
0845 auto const it = *ip;
0846 if (tracks_view[it].quality() > reject && TracksUtilities<TrackerTraits>::isTriplet(tracks_view, it) &&
0847 it != im)
0848 tracks_view[it].quality() = reject;
0849 }
0850
0851 }
0852 }
0853
0854 template <typename TrackerTraits>
0855 __global__ void kernel_print_found_ntuplets(HitsConstView<TrackerTraits> hh,
0856 TkSoAView<TrackerTraits> tracks_view,
0857 HitToTuple<TrackerTraits> const *__restrict__ phitToTuple,
0858 int32_t firstPrint,
0859 int32_t lastPrint,
0860 int iev) {
0861 constexpr auto loose = pixelTrack::Quality::loose;
0862
0863 int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x;
0864 for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np;
0865 i += blockDim.x * gridDim.x) {
0866 auto nh = tracks_view.hitIndices().size(i);
0867 if (nh < 3)
0868 continue;
0869 if (tracks_view[i].quality() < loose)
0870 continue;
0871 printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n",
0872 10000 * iev + i,
0873 int(tracks_view[i].quality()),
0874 nh,
0875 tracks_view[i].nLayers(),
0876 TracksUtilities<TrackerTraits>::charge(tracks_view, i),
0877 tracks_view[i].pt(),
0878 tracks_view[i].eta(),
0879 TracksUtilities<TrackerTraits>::phi(tracks_view, i),
0880 TracksUtilities<TrackerTraits>::tip(tracks_view, i),
0881 TracksUtilities<TrackerTraits>::zip(tracks_view, i),
0882 tracks_view[i].chi2(),
0883 hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
0884 hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
0885 hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(),
0886 nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0,
0887 nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0,
0888 nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0,
0889 nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0);
0890 }
0891 }
0892
0893 __global__ void kernel_printCounters(Counters const *counters) {
0894 auto const &c = *counters;
0895 printf(
0896 "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | "
0897 "nDupHits | "
0898 "nFishCells | "
0899 "nKilledCells | "
0900 "nUsedCells | nZeroTrackCells ||\n");
0901 printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n",
0902 c.nEvents,
0903 c.nHits,
0904 c.nCells,
0905 c.nTuples,
0906 c.nFitTracks,
0907 c.nLooseTracks,
0908 c.nGoodTracks,
0909 c.nUsedHits,
0910 c.nDupHits,
0911 c.nFishCells,
0912 c.nKilledCells,
0913 c.nEmptyCells,
0914 c.nZeroTrackCells);
0915 printf(
0916 "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n",
0917 c.nEvents,
0918 c.nHits / double(c.nEvents),
0919 c.nCells / double(c.nEvents),
0920 c.nTuples / double(c.nEvents),
0921 c.nFitTracks / double(c.nEvents),
0922 c.nLooseTracks / double(c.nEvents),
0923 c.nGoodTracks / double(c.nEvents),
0924 c.nUsedHits / double(c.nEvents),
0925 c.nDupHits / double(c.nEvents),
0926 c.nFishCells / double(c.nCells),
0927 c.nKilledCells / double(c.nCells),
0928 c.nEmptyCells / double(c.nCells),
0929 c.nZeroTrackCells / double(c.nCells));
0930 }
0931
0932 }