CUDAUtilities/interface/copyAsync.h

0001 #ifndef HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
0002 #define HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
0003
0004 #include <type_traits>
0005 #include <vector>
0006
0007 #include "FWCore/Utilities/interface/propagate_const_array.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
0011 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
0013
0014 namespace cms {
0015   namespace cuda {
0016
0017     // Single element
0018
0019     template <typename T>
0020     inline void copyAsync(device::unique_ptr<T>& dst, const host::unique_ptr<T>& src, cudaStream_t stream) {
0021       // Shouldn't compile for array types because of sizeof(T), but
0022       // let's add an assert with a more helpful message
0023       static_assert(std::is_array<T>::value == false,
0024                     "For array types, use the other overload with the size parameter");
0025       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
0026     }
0027
0028     template <typename T>
0029     inline void copyAsync(device::unique_ptr<T>& dst, const host::noncached::unique_ptr<T>& src, cudaStream_t stream) {
0030       // Shouldn't compile for array types because of sizeof(T), but
0031       // let's add an assert with a more helpful message
0032       static_assert(std::is_array<T>::value == false,
0033                     "For array types, use the other overload with the size parameter");
0034       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
0035     }
0036
0037     template <typename T>
0038     inline void copyAsync(host::unique_ptr<T>& dst, const device::unique_ptr<T>& src, cudaStream_t stream) {
0039       static_assert(std::is_array<T>::value == false,
0040                     "For array types, use the other overload with the size parameter");
0041       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyDeviceToHost, stream));
0042     }
0043
0044     // Multiple elements
0045
0046     template <typename T>
0047     inline void copyAsync(device::unique_ptr<T[]>& dst,
0048                           const host::unique_ptr<T[]>& src,
0049                           size_t nelements,
0050                           cudaStream_t stream) {
0051       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
0052     }
0053
0054     template <typename T>
0055     inline void copyAsync(device::unique_ptr<T[]>& dst,
0056                           const host::noncached::unique_ptr<T[]>& src,
0057                           size_t nelements,
0058                           cudaStream_t stream) {
0059       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
0060     }
0061
0062     template <typename T>
0063     inline void copyAsync(host::unique_ptr<T[]>& dst,
0064                           const device::unique_ptr<T[]>& src,
0065                           size_t nelements,
0066                           cudaStream_t stream) {
0067       cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyDeviceToHost, stream));
0068     }
0069
0070     // copy from a host vector using pinned memory
0071     template <typename T>
0072     inline void copyAsync(cms::cuda::device::unique_ptr<T[]>& dst,
0073                           const std::vector<T, cms::cuda::HostAllocator<T>>& src,
0074                           cudaStream_t stream) {
0075       cudaCheck(cudaMemcpyAsync(dst.get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
0076     }
0077
0078     // special case used to transfer conditions data
0079     template <typename T>
0080     inline void copyAsync(edm::propagate_const_array<cms::cuda::device::unique_ptr<T[]>>& dst,
0081                           const std::vector<T, cms::cuda::HostAllocator<T>>& src,
0082                           cudaStream_t stream) {
0083       cudaCheck(cudaMemcpyAsync(
0084           get_underlying(dst).get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
0085     }
0086   }  // namespace cuda
0087 }  // namespace cms
0088
0089 #endif  // HeterogeneousCore_CUDAUtilities_interface_copyAsync_h