File indexing completed on 2024-09-07 04:36:35
0001 #ifndef HeterogeneousCore_CUDAUtilities_interface_device_unique_ptr_h
0002 #define HeterogeneousCore_CUDAUtilities_interface_device_unique_ptr_h
0003
0004 #include <memory>
0005 #include <functional>
0006
0007 #include "FWCore/Utilities/interface/Likely.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
0010
0011 namespace cms {
0012 namespace cuda {
0013 namespace device {
0014 namespace impl {
0015
0016 class DeviceDeleter {
0017 public:
0018 DeviceDeleter() = default;
0019 DeviceDeleter(int device) : device_{device} {}
0020
0021 void operator()(void *ptr) {
0022 if (LIKELY(device_ >= 0)) {
0023 free_device(device_, ptr);
0024 }
0025 }
0026
0027 private:
0028 int device_ = -1;
0029 };
0030 }
0031
0032 template <typename T>
0033 using unique_ptr = std::unique_ptr<T, impl::DeviceDeleter>;
0034
0035 namespace impl {
0036 template <typename T>
0037 struct make_device_unique_selector {
0038 using non_array = cms::cuda::device::unique_ptr<T>;
0039 };
0040 template <typename T>
0041 struct make_device_unique_selector<T[]> {
0042 using unbounded_array = cms::cuda::device::unique_ptr<T[]>;
0043 };
0044 template <typename T, size_t N>
0045 struct make_device_unique_selector<T[N]> {
0046 struct bounded_array {};
0047 };
0048 }
0049 }
0050
0051 template <typename T>
0052 typename device::impl::make_device_unique_selector<T>::non_array make_device_unique(cudaStream_t stream) {
0053 static_assert(std::is_trivially_constructible<T>::value,
0054 "Allocating with non-trivial constructor on the device memory is not supported");
0055 int dev = currentDevice();
0056 void *mem = allocate_device(dev, sizeof(T), stream);
0057 return typename device::impl::make_device_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),
0058 device::impl::DeviceDeleter{dev}};
0059 }
0060
0061 template <typename T>
0062 typename device::impl::make_device_unique_selector<T>::unbounded_array make_device_unique(size_t n,
0063 cudaStream_t stream) {
0064 using element_type = typename std::remove_extent<T>::type;
0065 static_assert(std::is_trivially_constructible<element_type>::value,
0066 "Allocating with non-trivial constructor on the device memory is not supported");
0067 int dev = currentDevice();
0068 void *mem = allocate_device(dev, n * sizeof(element_type), stream);
0069 return typename device::impl::make_device_unique_selector<T>::unbounded_array{
0070 reinterpret_cast<element_type *>(mem), device::impl::DeviceDeleter{dev}};
0071 }
0072
0073 template <typename T, typename... Args>
0074 typename device::impl::make_device_unique_selector<T>::bounded_array make_device_unique(Args &&...) = delete;
0075
0076
0077 template <typename T>
0078 typename device::impl::make_device_unique_selector<T>::non_array make_device_unique_uninitialized(
0079 cudaStream_t stream) {
0080 int dev = currentDevice();
0081 void *mem = allocate_device(dev, sizeof(T), stream);
0082 return typename device::impl::make_device_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),
0083 device::impl::DeviceDeleter{dev}};
0084 }
0085
0086 template <typename T>
0087 typename device::impl::make_device_unique_selector<T>::unbounded_array make_device_unique_uninitialized(
0088 size_t n, cudaStream_t stream) {
0089 using element_type = typename std::remove_extent<T>::type;
0090 int dev = currentDevice();
0091 void *mem = allocate_device(dev, n * sizeof(element_type), stream);
0092 return typename device::impl::make_device_unique_selector<T>::unbounded_array{
0093 reinterpret_cast<element_type *>(mem), device::impl::DeviceDeleter{dev}};
0094 }
0095
0096 template <typename T, typename... Args>
0097 typename device::impl::make_device_unique_selector<T>::bounded_array make_device_unique_uninitialized(Args &&...) =
0098 delete;
0099 }
0100 }
0101
0102 #endif