Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-07-17 02:54:11

0001 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0002 
0003 #include "CAHitNtupletGeneratorKernels.h"
0004 
0005 // #define GPU_DEBUG
0006 template <typename TrackerTraits>
0007 #ifdef __CUDACC__
0008 void CAHitNtupletGeneratorKernelsGPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
0009   using Traits = cms::cudacompat::GPUTraits;
0010 #else
0011 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
0012   using Traits = cms::cudacompat::CPUTraits;
0013 #endif
0014 
0015   using CellCuts = gpuPixelDoublets::CellCutsT<TrackerTraits>;
0016 
0017   //////////////////////////////////////////////////////////
0018   // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER)
0019   //////////////////////////////////////////////////////////
0020 
0021   this->device_theCellNeighbors_ = Traits::template make_unique<CellNeighborsVector>(stream);
0022   this->device_theCellTracks_ = Traits::template make_unique<CellTracksVector>(stream);
0023 
0024 #ifdef GPU_DEBUG
0025   std::cout << "Allocation for tuple building. N hits " << nHits << std::endl;
0026 #endif
0027 
0028   nHits++;  // storage requires one more counter;
0029   assert(nHits > 0);
0030   this->device_hitToTuple_ = Traits::template make_unique<HitToTuple>(stream);
0031   this->device_hitToTupleStorage_ = Traits::template make_unique<typename HitToTuple::Counter[]>(nHits, stream);
0032   this->hitToTupleView_.assoc = this->device_hitToTuple_.get();
0033   this->hitToTupleView_.offStorage = this->device_hitToTupleStorage_.get();
0034   this->hitToTupleView_.offSize = nHits;
0035 
0036   this->device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);
0037 
0038   this->device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(3, stream);
0039 
0040   this->device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get();
0041   this->device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get() + 1;
0042   this->device_nCells_ = (uint32_t*)(this->device_storage_.get() + 2);
0043 
0044   this->device_cellCuts_ = Traits::template make_unique<CellCuts>(stream);
0045   // FIXME: consider collapsing these 3 in one adhoc kernel
0046   if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
0047     cudaCheck(cudaMemsetAsync(this->device_nCells_, 0, sizeof(uint32_t), stream));
0048     cudaCheck(cudaMemcpyAsync(
0049         this->device_cellCuts_.get(), &(this->params_.cellCuts_), sizeof(CellCuts), cudaMemcpyDefault, stream));
0050   } else {
0051     *(this->device_nCells_) = 0;
0052     *(this->device_cellCuts_.get()) = this->params_.cellCuts_;
0053   }
0054   cms::cuda::launchZero(this->device_tupleMultiplicity_.get(), stream);
0055   cms::cuda::launchZero(this->hitToTupleView_, stream);  // we may wish to keep it in the edm
0056 #ifdef GPU_DEBUG
0057   cudaDeviceSynchronize();
0058   cudaCheck(cudaGetLastError());
0059 #endif
0060 }
0061 
0062 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::Phase1>;
0063 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::Phase2>;
0064 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::HIonPhase1>;
0065 
0066 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase1>;
0067 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase2>;
0068 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::HIonPhase1>;