File indexing completed on 2024-04-06 12:28:33
0001 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0002
0003 #include "CAHitNtupletGeneratorKernels.h"
0004
0005
0006
0007 template <typename TrackerTraits>
0008 #ifdef __CUDACC__
0009 void CAHitNtupletGeneratorKernelsGPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
0010 using Traits = cms::cudacompat::GPUTraits;
0011 #else
0012 void CAHitNtupletGeneratorKernelsCPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
0013 using Traits = cms::cudacompat::CPUTraits;
0014 #endif
0015
0016 using CellCuts = gpuPixelDoublets::CellCutsT<TrackerTraits>;
0017
0018
0019
0020
0021
0022 this->device_theCellNeighbors_ = Traits::template make_unique<CellNeighborsVector>(stream);
0023 this->device_theCellTracks_ = Traits::template make_unique<CellTracksVector>(stream);
0024
0025 #ifdef GPU_DEBUG
0026 std::cout << "Allocation for tuple building. N hits " << nHits << std::endl;
0027 #endif
0028
0029 nHits++;
0030 assert(nHits > 0);
0031 this->device_hitToTuple_ = Traits::template make_unique<HitToTuple>(stream);
0032 this->device_hitToTupleStorage_ = Traits::template make_unique<typename HitToTuple::Counter[]>(nHits, stream);
0033 this->hitToTupleView_.assoc = this->device_hitToTuple_.get();
0034 this->hitToTupleView_.offStorage = this->device_hitToTupleStorage_.get();
0035 this->hitToTupleView_.offSize = nHits;
0036
0037 this->device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);
0038
0039 this->device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(3, stream);
0040
0041 this->device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get();
0042 this->device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get() + 1;
0043 this->device_nCells_ = (uint32_t*)(this->device_storage_.get() + 2);
0044
0045 this->device_cellCuts_ = Traits::template make_unique<CellCuts>(stream);
0046
0047 if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
0048 cudaCheck(cudaMemsetAsync(this->device_nCells_, 0, sizeof(uint32_t), stream));
0049 cudaCheck(cudaMemcpyAsync(
0050 this->device_cellCuts_.get(), &(this->params_.cellCuts_), sizeof(CellCuts), cudaMemcpyDefault, stream));
0051 } else {
0052 *(this->device_nCells_) = 0;
0053 *(this->device_cellCuts_.get()) = this->params_.cellCuts_;
0054 }
0055 cms::cuda::launchZero(this->device_tupleMultiplicity_.get(), stream);
0056 cms::cuda::launchZero(this->hitToTupleView_, stream);
0057 #ifdef GPU_DEBUG
0058 cudaDeviceSynchronize();
0059 cudaCheck(cudaGetLastError());
0060 #endif
0061 }
0062
0063 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::Phase1>;
0064 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::Phase2>;
0065 template class CAHitNtupletGeneratorKernelsGPU<pixelTopology::HIonPhase1>;
0066
0067 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase1>;
0068 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::Phase2>;
0069 template class CAHitNtupletGeneratorKernelsCPU<pixelTopology::HIonPhase1>;