CUDATest/plugins/TestCUDAProducerGPUKernel.h

0001 #ifndef HeterogeneousCore_CUDACore_TestCUDAProducerGPUKernel_h
0002 #define HeterogeneousCore_CUDACore_TestCUDAProducerGPUKernel_h
0003
0004 #include <string>
0005
0006 #include <cuda_runtime.h>
0007
0008 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0009
0010 /**
0011  * This class models the actual CUDA implementation of an algorithm.
0012  *
0013  * Memory is allocated dynamically with the allocator in cms::cuda.
0014  *
0015  * The algorithm is intended to waste time with large matrix
0016  * operations so that the asynchronous nature of the CUDA integration
0017  * becomes visible with debug prints.
0018  */
0019 class TestCUDAProducerGPUKernel {
0020 public:
0021   static constexpr int NUM_VALUES = 4000;
0022
0023   TestCUDAProducerGPUKernel() = default;
0024   ~TestCUDAProducerGPUKernel() = default;
0025
0026   // returns (owning) pointer to device memory
0027   cms::cuda::device::unique_ptr<float[]> runAlgo(const std::string& label, cudaStream_t stream) const {
0028     return runAlgo(label, nullptr, stream);
0029   }
0030   cms::cuda::device::unique_ptr<float[]> runAlgo(const std::string& label,
0031                                                  const float* d_input,
0032                                                  cudaStream_t stream) const;
0033
0034   void runSimpleAlgo(float* d_data, cudaStream_t stream) const;
0035 };
0036
0037 #endif