Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-06-24 02:11:17

0001 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0002 
0003 #include "CAHitNtupletGeneratorKernels.h"
0004 
0005 template <>
0006 #ifdef __CUDACC__
0007 void CAHitNtupletGeneratorKernelsGPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
0008 #else
0009 void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
0010 #endif
0011   //////////////////////////////////////////////////////////
0012   // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER)
0013   //////////////////////////////////////////////////////////
0014 
0015   device_theCellNeighbors_ = Traits::template make_unique<caConstants::CellNeighborsVector>(stream);
0016   device_theCellTracks_ = Traits::template make_unique<caConstants::CellTracksVector>(stream);
0017 
0018 #ifdef GPU_DEBUG
0019   std::cout << "Allocation for tuple building. N hits " << nHits << std::endl;
0020 #endif
0021 
0022   nHits++;  // storage requires one more counter;
0023   assert(nHits > 0);
0024   device_hitToTuple_ = Traits::template make_unique<HitToTuple>(stream);
0025   device_hitToTupleStorage_ = Traits::template make_unique<HitToTuple::Counter[]>(nHits, stream);
0026   hitToTupleView_.assoc = device_hitToTuple_.get();
0027   hitToTupleView_.offStorage = device_hitToTupleStorage_.get();
0028   hitToTupleView_.offSize = nHits;
0029 
0030   device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);
0031 
0032   device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(3, stream);
0033 
0034   device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get();
0035   device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1;
0036   device_nCells_ = (uint32_t*)(device_storage_.get() + 2);
0037 
0038   // FIXME: consider collapsing these 3 in one adhoc kernel
0039   if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
0040     cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream));
0041   } else {
0042     *device_nCells_ = 0;
0043   }
0044   cms::cuda::launchZero(device_tupleMultiplicity_.get(), stream);
0045   cms::cuda::launchZero(hitToTupleView_, stream);  // we may wish to keep it in the edm
0046 #ifdef GPU_DEBUG
0047   cudaDeviceSynchronize();
0048   cudaCheck(cudaGetLastError());
0049 #endif
0050 }