Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-07-17 02:54:11

0001 #include "RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h"
0002 
0003 #include <mutex>
0004 
0005 namespace {
0006   // cuda atomics are NOT atomics on CPU so protect stat update with a mutex
0007   // waiting for a more general solution (incuding multiple devices) to be proposed and implemented
0008   std::mutex lock_stat;
0009 }  // namespace
0010 
0011 template <typename TrackerTraits>
0012 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::printCounters(Counters const *counters) {
0013   caHitNtupletGeneratorKernels::kernel_printCounters(counters);
0014 }
0015 
0016 template <typename TrackerTraits>
0017 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::buildDoublets(const HitsConstView &hh,
0018                                                                    int32_t offsetBPIX2,
0019                                                                    cudaStream_t stream) {
0020   using namespace gpuPixelDoublets;
0021 
0022   using GPUCACell = GPUCACellT<TrackerTraits>;
0023   using OuterHitOfCell = typename GPUCACell::OuterHitOfCell;
0024   using CellNeighbors = typename GPUCACell::CellNeighbors;
0025   using CellTracks = typename GPUCACell::CellTracks;
0026   using OuterHitOfCellContainer = typename GPUCACell::OuterHitOfCellContainer;
0027 
0028   auto nhits = hh.nHits();
0029 
0030 #ifdef NTUPLE_DEBUG
0031   std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << offsetBPIX2 << std::endl;
0032 #endif
0033 
0034   // use "nhits" to heuristically dimension the workspace
0035 
0036   // no need to use the Traits allocations, since we know this is being compiled for the CPU
0037   //this->device_isOuterHitOfCell_ = Traits::template make_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
0038   this->device_isOuterHitOfCell_ = std::make_unique<OuterHitOfCellContainer[]>(std::max(1U, nhits));
0039   assert(this->device_isOuterHitOfCell_.get());
0040   this->isOuterHitOfCell_ = OuterHitOfCell{this->device_isOuterHitOfCell_.get(), offsetBPIX2};
0041 
0042   auto cellStorageSize = TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) +
0043                          TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks);
0044   // no need to use the Traits allocations, since we know this is being compiled for the CPU
0045   //cellStorage_ = Traits::template make_unique<unsigned char[]>(cellStorageSize, stream);
0046   this->cellStorage_ = std::make_unique<unsigned char[]>(cellStorageSize);
0047   this->device_theCellNeighborsContainer_ = (CellNeighbors *)this->cellStorage_.get();
0048   this->device_theCellTracksContainer_ =
0049       (CellTracks *)(this->cellStorage_.get() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors));
0050 
0051   initDoublets<TrackerTraits>(this->isOuterHitOfCell_,
0052                               nhits,
0053                               this->device_theCellNeighbors_.get(),
0054                               this->device_theCellNeighborsContainer_,
0055                               this->device_theCellTracks_.get(),
0056                               this->device_theCellTracksContainer_);
0057 
0058   // no need to use the Traits allocations, since we know this is being compiled for the CPU
0059   this->device_theCells_ = std::make_unique<GPUCACell[]>(this->params_.caParams_.maxNumberOfDoublets_);
0060   if (0 == nhits)
0061     return;  // protect against empty events
0062 
0063   // take all layer pairs into account
0064   auto nActualPairs = this->params_.nPairs();
0065 
0066   assert(nActualPairs <= TrackerTraits::nPairs);
0067 
0068   getDoubletsFromHisto<TrackerTraits>(this->device_theCells_.get(),
0069                                       this->device_nCells_,
0070                                       this->device_theCellNeighbors_.get(),
0071                                       this->device_theCellTracks_.get(),
0072                                       hh,
0073                                       this->isOuterHitOfCell_,
0074                                       nActualPairs,
0075                                       this->params_.caParams_.maxNumberOfDoublets_,
0076                                       this->device_cellCuts_.get());
0077 }
0078 
0079 template <typename TrackerTraits>
0080 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::launchKernels(const HitsConstView &hh,
0081                                                                    TkSoAView &tracks_view,
0082                                                                    cudaStream_t cudaStream) {
0083   using namespace caHitNtupletGeneratorKernels;
0084 
0085   // zero tuples
0086   cms::cuda::launchZero(&tracks_view.hitIndices(), cudaStream);
0087 
0088   uint32_t nhits = hh.metadata().size();
0089 
0090 #ifdef NTUPLE_DEBUG
0091   std::cout << "start tuple building. N hits " << nhits << std::endl;
0092   if (nhits < 2)
0093     std::cout << "too few hits " << nhits << std::endl;
0094 #endif
0095 
0096   //
0097   // applying conbinatoric cleaning such as fishbone at this stage is too expensive
0098   //
0099 
0100   kernel_connect<TrackerTraits>(this->device_hitTuple_apc_,
0101                                 this->device_hitToTuple_apc_,  // needed only to be reset, ready for next kernel
0102                                 hh,
0103                                 this->device_theCells_.get(),
0104                                 this->device_nCells_,
0105                                 this->device_theCellNeighbors_.get(),
0106                                 this->isOuterHitOfCell_,
0107                                 this->params_.caParams_);
0108 
0109   if (nhits > 1 && this->params_.earlyFishbone_) {
0110     gpuPixelDoublets::fishbone<TrackerTraits>(
0111         hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, false);
0112   }
0113 
0114   kernel_find_ntuplets<TrackerTraits>(hh,
0115                                       tracks_view,
0116                                       this->device_theCells_.get(),
0117                                       this->device_nCells_,
0118                                       this->device_theCellTracks_.get(),
0119                                       this->device_hitTuple_apc_,
0120                                       this->params_.caParams_);
0121   if (this->params_.doStats_)
0122     kernel_mark_used(this->device_theCells_.get(), this->device_nCells_);
0123 
0124   cms::cuda::finalizeBulk(this->device_hitTuple_apc_, &tracks_view.hitIndices());
0125 
0126   kernel_fillHitDetIndices<TrackerTraits>(tracks_view, hh);
0127   kernel_fillNLayers<TrackerTraits>(tracks_view, this->device_hitTuple_apc_);
0128 
0129   // remove duplicates (tracks that share a doublet)
0130   kernel_earlyDuplicateRemover<TrackerTraits>(
0131       this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_);
0132 
0133   kernel_countMultiplicity<TrackerTraits>(tracks_view, this->device_tupleMultiplicity_.get());
0134   cms::cuda::launchFinalize(this->device_tupleMultiplicity_.get(), cudaStream);
0135   kernel_fillMultiplicity<TrackerTraits>(tracks_view, this->device_tupleMultiplicity_.get());
0136 
0137   if (nhits > 1 && this->params_.lateFishbone_) {
0138     gpuPixelDoublets::fishbone<TrackerTraits>(
0139         hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, true);
0140   }
0141 }
0142 
0143 template <typename TrackerTraits>
0144 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::classifyTuples(const HitsConstView &hh,
0145                                                                     TkSoAView &tracks_view,
0146                                                                     cudaStream_t cudaStream) {
0147   using namespace caHitNtupletGeneratorKernels;
0148 
0149   int32_t nhits = hh.metadata().size();
0150 
0151   // classify tracks based on kinematics
0152   kernel_classifyTracks<TrackerTraits>(tracks_view, this->params_.qualityCuts_);
0153   if (this->params_.lateFishbone_) {
0154     // apply fishbone cleaning to good tracks
0155     kernel_fishboneCleaner<TrackerTraits>(this->device_theCells_.get(), this->device_nCells_, tracks_view);
0156   }
0157 
0158   // remove duplicates (tracks that share a doublet)
0159   kernel_fastDuplicateRemover<TrackerTraits>(
0160       this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_);
0161 
0162   // fill hit->track "map"
0163   if (this->params_.doSharedHitCut_ || this->params_.doStats_) {
0164     kernel_countHitInTracks<TrackerTraits>(tracks_view, this->device_hitToTuple_.get());
0165     cms::cuda::launchFinalize(this->hitToTupleView_, cudaStream);
0166     kernel_fillHitInTracks<TrackerTraits>(tracks_view, this->device_hitToTuple_.get());
0167   }
0168 
0169   // remove duplicates (tracks that share at least one hit)
0170   if (this->params_.doSharedHitCut_) {
0171     kernel_rejectDuplicate<TrackerTraits>(tracks_view,
0172                                           this->params_.minHitsForSharingCut_,
0173                                           this->params_.dupPassThrough_,
0174                                           this->device_hitToTuple_.get());
0175 
0176     kernel_sharedHitCleaner<TrackerTraits>(hh,
0177                                            tracks_view,
0178                                            this->params_.minHitsForSharingCut_,
0179                                            this->params_.dupPassThrough_,
0180                                            this->device_hitToTuple_.get());
0181     if (this->params_.useSimpleTripletCleaner_) {
0182       kernel_simpleTripletCleaner<TrackerTraits>(tracks_view,
0183                                                  this->params_.minHitsForSharingCut_,
0184                                                  this->params_.dupPassThrough_,
0185                                                  this->device_hitToTuple_.get());
0186     } else {
0187       kernel_tripletCleaner<TrackerTraits>(tracks_view,
0188                                            this->params_.minHitsForSharingCut_,
0189                                            this->params_.dupPassThrough_,
0190                                            this->device_hitToTuple_.get());
0191     }
0192   }
0193 
0194   if (this->params_.doStats_) {
0195     std::lock_guard guard(lock_stat);
0196     kernel_checkOverflows<TrackerTraits>(tracks_view,
0197                                          this->device_tupleMultiplicity_.get(),
0198                                          this->device_hitToTuple_.get(),
0199                                          this->device_hitTuple_apc_,
0200                                          this->device_theCells_.get(),
0201                                          this->device_nCells_,
0202                                          this->device_theCellNeighbors_.get(),
0203                                          this->device_theCellTracks_.get(),
0204                                          this->isOuterHitOfCell_,
0205                                          nhits,
0206                                          this->params_.caParams_.maxNumberOfDoublets_,
0207                                          this->counters_);
0208   }
0209 
0210   if (this->params_.doStats_) {
0211     // counters (add flag???)
0212     std::lock_guard guard(lock_stat);
0213     kernel_doStatsForHitInTracks<TrackerTraits>(this->device_hitToTuple_.get(), this->counters_);
0214     kernel_doStatsForTracks<TrackerTraits>(tracks_view, this->counters_);
0215   }
0216 
0217 #ifdef DUMP_GPU_TK_TUPLES
0218   static std::atomic<int> iev(0);
0219   static std::mutex lock;
0220   {
0221     std::lock_guard<std::mutex> guard(lock);
0222     ++iev;
0223     kernel_print_found_ntuplets<TrackerTraits>(hh, tracks_view, this->device_hitToTuple_.get(), 0, 1000000, iev);
0224   }
0225 #endif
0226 }
0227 
0228 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase1>;
0229 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase2>;
0230 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::HIonPhase1>;