File indexing completed on 2024-04-06 12:15:44
0001 #ifndef HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
0002 #define HeterogeneousCore_CUDAUtilities_interface_copyAsync_h
0003
0004 #include <type_traits>
0005 #include <vector>
0006
0007 #include "FWCore/Utilities/interface/propagate_const_array.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
0011 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
0013
0014 namespace cms {
0015 namespace cuda {
0016
0017
0018
0019 template <typename T>
0020 inline void copyAsync(device::unique_ptr<T>& dst, const host::unique_ptr<T>& src, cudaStream_t stream) {
0021
0022
0023 static_assert(std::is_array<T>::value == false,
0024 "For array types, use the other overload with the size parameter");
0025 cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
0026 }
0027
0028 template <typename T>
0029 inline void copyAsync(device::unique_ptr<T>& dst, const host::noncached::unique_ptr<T>& src, cudaStream_t stream) {
0030
0031
0032 static_assert(std::is_array<T>::value == false,
0033 "For array types, use the other overload with the size parameter");
0034 cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyHostToDevice, stream));
0035 }
0036
0037 template <typename T>
0038 inline void copyAsync(host::unique_ptr<T>& dst, const device::unique_ptr<T>& src, cudaStream_t stream) {
0039 static_assert(std::is_array<T>::value == false,
0040 "For array types, use the other overload with the size parameter");
0041 cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), sizeof(T), cudaMemcpyDeviceToHost, stream));
0042 }
0043
0044
0045
0046 template <typename T>
0047 inline void copyAsync(device::unique_ptr<T[]>& dst,
0048 const host::unique_ptr<T[]>& src,
0049 size_t nelements,
0050 cudaStream_t stream) {
0051 cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
0052 }
0053
0054 template <typename T>
0055 inline void copyAsync(device::unique_ptr<T[]>& dst,
0056 const host::noncached::unique_ptr<T[]>& src,
0057 size_t nelements,
0058 cudaStream_t stream) {
0059 cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyHostToDevice, stream));
0060 }
0061
0062 template <typename T>
0063 inline void copyAsync(host::unique_ptr<T[]>& dst,
0064 const device::unique_ptr<T[]>& src,
0065 size_t nelements,
0066 cudaStream_t stream) {
0067 cudaCheck(cudaMemcpyAsync(dst.get(), src.get(), nelements * sizeof(T), cudaMemcpyDeviceToHost, stream));
0068 }
0069
0070
0071 template <typename T>
0072 inline void copyAsync(cms::cuda::device::unique_ptr<T[]>& dst,
0073 const std::vector<T, cms::cuda::HostAllocator<T>>& src,
0074 cudaStream_t stream) {
0075 cudaCheck(cudaMemcpyAsync(dst.get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
0076 }
0077
0078
0079 template <typename T>
0080 inline void copyAsync(edm::propagate_const_array<cms::cuda::device::unique_ptr<T[]>>& dst,
0081 const std::vector<T, cms::cuda::HostAllocator<T>>& src,
0082 cudaStream_t stream) {
0083 cudaCheck(cudaMemcpyAsync(
0084 get_underlying(dst).get(), src.data(), src.size() * sizeof(T), cudaMemcpyHostToDevice, stream));
0085 }
0086 }
0087 }
0088
0089 #endif