Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:44

0001 #ifndef HeterogeneousCore_CUDACore_TestCUDAProducerGPUKernel_h
0002 #define HeterogeneousCore_CUDACore_TestCUDAProducerGPUKernel_h
0003 
0004 #include <string>
0005 
0006 #include <cuda_runtime.h>
0007 
0008 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0009 
0010 /**
0011  * This class models the actual CUDA implementation of an algorithm.
0012  *
0013  * Memory is allocated dynamically with the allocator in cms::cuda.
0014  *
0015  * The algorithm is intended to waste time with large matrix
0016  * operations so that the asynchronous nature of the CUDA integration
0017  * becomes visible with debug prints.
0018  */
0019 class TestCUDAProducerGPUKernel {
0020 public:
0021   static constexpr int NUM_VALUES = 4000;
0022 
0023   TestCUDAProducerGPUKernel() = default;
0024   ~TestCUDAProducerGPUKernel() = default;
0025 
0026   // returns (owning) pointer to device memory
0027   cms::cuda::device::unique_ptr<float[]> runAlgo(const std::string& label, cudaStream_t stream) const {
0028     return runAlgo(label, nullptr, stream);
0029   }
0030   cms::cuda::device::unique_ptr<float[]> runAlgo(const std::string& label,
0031                                                  const float* d_input,
0032                                                  cudaStream_t stream) const;
0033 
0034   void runSimpleAlgo(float* d_data, cudaStream_t stream) const;
0035 };
0036 
0037 #endif