File indexing completed on 2023-07-17 02:54:11
0001 #include "RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h"
0002
0003 #include <mutex>
0004
0005 namespace {
0006
0007
0008 std::mutex lock_stat;
0009 }
0010
0011 template <typename TrackerTraits>
0012 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::printCounters(Counters const *counters) {
0013 caHitNtupletGeneratorKernels::kernel_printCounters(counters);
0014 }
0015
0016 template <typename TrackerTraits>
0017 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::buildDoublets(const HitsConstView &hh,
0018 int32_t offsetBPIX2,
0019 cudaStream_t stream) {
0020 using namespace gpuPixelDoublets;
0021
0022 using GPUCACell = GPUCACellT<TrackerTraits>;
0023 using OuterHitOfCell = typename GPUCACell::OuterHitOfCell;
0024 using CellNeighbors = typename GPUCACell::CellNeighbors;
0025 using CellTracks = typename GPUCACell::CellTracks;
0026 using OuterHitOfCellContainer = typename GPUCACell::OuterHitOfCellContainer;
0027
0028 auto nhits = hh.nHits();
0029
0030 #ifdef NTUPLE_DEBUG
0031 std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << offsetBPIX2 << std::endl;
0032 #endif
0033
0034
0035
0036
0037
0038 this->device_isOuterHitOfCell_ = std::make_unique<OuterHitOfCellContainer[]>(std::max(1U, nhits));
0039 assert(this->device_isOuterHitOfCell_.get());
0040 this->isOuterHitOfCell_ = OuterHitOfCell{this->device_isOuterHitOfCell_.get(), offsetBPIX2};
0041
0042 auto cellStorageSize = TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) +
0043 TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks);
0044
0045
0046 this->cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);
0047 this->device_theCellNeighborsContainer_ = (CellNeighbors *)this->cellStorage_.get();
0048 this->device_theCellTracksContainer_ =
0049 (CellTracks *)(this->cellStorage_.get() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors));
0050
0051 initDoublets<TrackerTraits>(this->isOuterHitOfCell_,
0052 nhits,
0053 this->device_theCellNeighbors_.get(),
0054 this->device_theCellNeighborsContainer_,
0055 this->device_theCellTracks_.get(),
0056 this->device_theCellTracksContainer_);
0057
0058
0059 this->device_theCells_ = std::make_unique<GPUCACell[]>(this->params_.caParams_.maxNumberOfDoublets_);
0060 if (0 == nhits)
0061 return;
0062
0063
0064 auto nActualPairs = this->params_.nPairs();
0065
0066 assert(nActualPairs <= TrackerTraits::nPairs);
0067
0068 getDoubletsFromHisto<TrackerTraits>(this->device_theCells_.get(),
0069 this->device_nCells_,
0070 this->device_theCellNeighbors_.get(),
0071 this->device_theCellTracks_.get(),
0072 hh,
0073 this->isOuterHitOfCell_,
0074 nActualPairs,
0075 this->params_.caParams_.maxNumberOfDoublets_,
0076 this->device_cellCuts_.get());
0077 }
0078
0079 template <typename TrackerTraits>
0080 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::launchKernels(const HitsConstView &hh,
0081 TkSoAView &tracks_view,
0082 cudaStream_t cudaStream) {
0083 using namespace caHitNtupletGeneratorKernels;
0084
0085
0086 cms::cuda::launchZero(&tracks_view.hitIndices(), cudaStream);
0087
0088 uint32_t nhits = hh.metadata().size();
0089
0090 #ifdef NTUPLE_DEBUG
0091 std::cout << "start tuple building. N hits " << nhits << std::endl;
0092 if (nhits < 2)
0093 std::cout << "too few hits " << nhits << std::endl;
0094 #endif
0095
0096
0097
0098
0099
0100 kernel_connect<TrackerTraits>(this->device_hitTuple_apc_,
0101 this->device_hitToTuple_apc_,
0102 hh,
0103 this->device_theCells_.get(),
0104 this->device_nCells_,
0105 this->device_theCellNeighbors_.get(),
0106 this->isOuterHitOfCell_,
0107 this->params_.caParams_);
0108
0109 if (nhits > 1 && this->params_.earlyFishbone_) {
0110 gpuPixelDoublets::fishbone<TrackerTraits>(
0111 hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, false);
0112 }
0113
0114 kernel_find_ntuplets<TrackerTraits>(hh,
0115 tracks_view,
0116 this->device_theCells_.get(),
0117 this->device_nCells_,
0118 this->device_theCellTracks_.get(),
0119 this->device_hitTuple_apc_,
0120 this->params_.caParams_);
0121 if (this->params_.doStats_)
0122 kernel_mark_used(this->device_theCells_.get(), this->device_nCells_);
0123
0124 cms::cuda::finalizeBulk(this->device_hitTuple_apc_, &tracks_view.hitIndices());
0125
0126 kernel_fillHitDetIndices<TrackerTraits>(tracks_view, hh);
0127 kernel_fillNLayers<TrackerTraits>(tracks_view, this->device_hitTuple_apc_);
0128
0129
0130 kernel_earlyDuplicateRemover<TrackerTraits>(
0131 this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_);
0132
0133 kernel_countMultiplicity<TrackerTraits>(tracks_view, this->device_tupleMultiplicity_.get());
0134 cms::cuda::launchFinalize(this->device_tupleMultiplicity_.get(), cudaStream);
0135 kernel_fillMultiplicity<TrackerTraits>(tracks_view, this->device_tupleMultiplicity_.get());
0136
0137 if (nhits > 1 && this->params_.lateFishbone_) {
0138 gpuPixelDoublets::fishbone<TrackerTraits>(
0139 hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, true);
0140 }
0141 }
0142
0143 template <typename TrackerTraits>
0144 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::classifyTuples(const HitsConstView &hh,
0145 TkSoAView &tracks_view,
0146 cudaStream_t cudaStream) {
0147 using namespace caHitNtupletGeneratorKernels;
0148
0149 int32_t nhits = hh.metadata().size();
0150
0151
0152 kernel_classifyTracks<TrackerTraits>(tracks_view, this->params_.qualityCuts_);
0153 if (this->params_.lateFishbone_) {
0154
0155 kernel_fishboneCleaner<TrackerTraits>(this->device_theCells_.get(), this->device_nCells_, tracks_view);
0156 }
0157
0158
0159 kernel_fastDuplicateRemover<TrackerTraits>(
0160 this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_);
0161
0162
0163 if (this->params_.doSharedHitCut_ || this->params_.doStats_) {
0164 kernel_countHitInTracks<TrackerTraits>(tracks_view, this->device_hitToTuple_.get());
0165 cms::cuda::launchFinalize(this->hitToTupleView_, cudaStream);
0166 kernel_fillHitInTracks<TrackerTraits>(tracks_view, this->device_hitToTuple_.get());
0167 }
0168
0169
0170 if (this->params_.doSharedHitCut_) {
0171 kernel_rejectDuplicate<TrackerTraits>(tracks_view,
0172 this->params_.minHitsForSharingCut_,
0173 this->params_.dupPassThrough_,
0174 this->device_hitToTuple_.get());
0175
0176 kernel_sharedHitCleaner<TrackerTraits>(hh,
0177 tracks_view,
0178 this->params_.minHitsForSharingCut_,
0179 this->params_.dupPassThrough_,
0180 this->device_hitToTuple_.get());
0181 if (this->params_.useSimpleTripletCleaner_) {
0182 kernel_simpleTripletCleaner<TrackerTraits>(tracks_view,
0183 this->params_.minHitsForSharingCut_,
0184 this->params_.dupPassThrough_,
0185 this->device_hitToTuple_.get());
0186 } else {
0187 kernel_tripletCleaner<TrackerTraits>(tracks_view,
0188 this->params_.minHitsForSharingCut_,
0189 this->params_.dupPassThrough_,
0190 this->device_hitToTuple_.get());
0191 }
0192 }
0193
0194 if (this->params_.doStats_) {
0195 std::lock_guard guard(lock_stat);
0196 kernel_checkOverflows<TrackerTraits>(tracks_view,
0197 this->device_tupleMultiplicity_.get(),
0198 this->device_hitToTuple_.get(),
0199 this->device_hitTuple_apc_,
0200 this->device_theCells_.get(),
0201 this->device_nCells_,
0202 this->device_theCellNeighbors_.get(),
0203 this->device_theCellTracks_.get(),
0204 this->isOuterHitOfCell_,
0205 nhits,
0206 this->params_.caParams_.maxNumberOfDoublets_,
0207 this->counters_);
0208 }
0209
0210 if (this->params_.doStats_) {
0211
0212 std::lock_guard guard(lock_stat);
0213 kernel_doStatsForHitInTracks<TrackerTraits>(this->device_hitToTuple_.get(), this->counters_);
0214 kernel_doStatsForTracks<TrackerTraits>(tracks_view, this->counters_);
0215 }
0216
0217 #ifdef DUMP_GPU_TK_TUPLES
0218 static std::atomic<int> iev(0);
0219 static std::mutex lock;
0220 {
0221 std::lock_guard<std::mutex> guard(lock);
0222 ++iev;
0223 kernel_print_found_ntuplets<TrackerTraits>(hh, tracks_view, this->device_hitToTuple_.get(), 0, 1000000, iev);
0224 }
0225 #endif
0226 }
0227
0228 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase1>;
0229 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase2>;
0230 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::HIonPhase1>;