Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:05:46

0001 #ifndef HeterogeneousCore_CUDAUtilities_memsetAsync_h
0002 #define HeterogeneousCore_CUDAUtilities_memsetAsync_h
0003 
0004 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0006 
0007 #include <type_traits>
0008 
0009 namespace cms {
0010   namespace cuda {
0011     template <typename T>
0012     inline void memsetAsync(device::unique_ptr<T>& ptr, T value, cudaStream_t stream) {
0013       // Shouldn't compile for array types because of sizeof(T), but
0014       // let's add an assert with a more helpful message
0015       static_assert(std::is_array<T>::value == false,
0016                     "For array types, use the other overload with the size parameter");
0017       cudaCheck(cudaMemsetAsync(ptr.get(), value, sizeof(T), stream));
0018     }
0019 
0020     /**
0021    * The type of `value` is `int` because of `cudaMemsetAsync()` takes
0022    * it as an `int`. Note that `cudaMemsetAsync()` sets the value of
0023    * each **byte** to `value`. This may lead to unexpected results if
0024    * `sizeof(T) > 1` and `value != 0`.
0025    */
0026     template <typename T>
0027     inline void memsetAsync(device::unique_ptr<T[]>& ptr, int value, size_t nelements, cudaStream_t stream) {
0028       cudaCheck(cudaMemsetAsync(ptr.get(), value, nelements * sizeof(T), stream));
0029     }
0030   }  // namespace cuda
0031 }  // namespace cms
0032 
0033 #endif