Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:36:35

0001 #ifndef HeterogeneousCore_CUDAUtilities_interface_device_unique_ptr_h
0002 #define HeterogeneousCore_CUDAUtilities_interface_device_unique_ptr_h
0003 
0004 #include <memory>
0005 #include <functional>
0006 
0007 #include "FWCore/Utilities/interface/Likely.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
0010 
0011 namespace cms {
0012   namespace cuda {
0013     namespace device {
0014       namespace impl {
0015         // Additional layer of types to distinguish from host::unique_ptr
0016         class DeviceDeleter {
0017         public:
0018           DeviceDeleter() = default;  // for edm::Wrapper
0019           DeviceDeleter(int device) : device_{device} {}
0020 
0021           void operator()(void *ptr) {
0022             if (LIKELY(device_ >= 0)) {
0023               free_device(device_, ptr);
0024             }
0025           }
0026 
0027         private:
0028           int device_ = -1;
0029         };
0030       }  // namespace impl
0031 
0032       template <typename T>
0033       using unique_ptr = std::unique_ptr<T, impl::DeviceDeleter>;
0034 
0035       namespace impl {
0036         template <typename T>
0037         struct make_device_unique_selector {
0038           using non_array = cms::cuda::device::unique_ptr<T>;
0039         };
0040         template <typename T>
0041         struct make_device_unique_selector<T[]> {
0042           using unbounded_array = cms::cuda::device::unique_ptr<T[]>;
0043         };
0044         template <typename T, size_t N>
0045         struct make_device_unique_selector<T[N]> {
0046           struct bounded_array {};
0047         };
0048       }  // namespace impl
0049     }  // namespace device
0050 
0051     template <typename T>
0052     typename device::impl::make_device_unique_selector<T>::non_array make_device_unique(cudaStream_t stream) {
0053       static_assert(std::is_trivially_constructible<T>::value,
0054                     "Allocating with non-trivial constructor on the device memory is not supported");
0055       int dev = currentDevice();
0056       void *mem = allocate_device(dev, sizeof(T), stream);
0057       return typename device::impl::make_device_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),
0058                                                                               device::impl::DeviceDeleter{dev}};
0059     }
0060 
0061     template <typename T>
0062     typename device::impl::make_device_unique_selector<T>::unbounded_array make_device_unique(size_t n,
0063                                                                                               cudaStream_t stream) {
0064       using element_type = typename std::remove_extent<T>::type;
0065       static_assert(std::is_trivially_constructible<element_type>::value,
0066                     "Allocating with non-trivial constructor on the device memory is not supported");
0067       int dev = currentDevice();
0068       void *mem = allocate_device(dev, n * sizeof(element_type), stream);
0069       return typename device::impl::make_device_unique_selector<T>::unbounded_array{
0070           reinterpret_cast<element_type *>(mem), device::impl::DeviceDeleter{dev}};
0071     }
0072 
0073     template <typename T, typename... Args>
0074     typename device::impl::make_device_unique_selector<T>::bounded_array make_device_unique(Args &&...) = delete;
0075 
0076     // No check for the trivial constructor, make it clear in the interface
0077     template <typename T>
0078     typename device::impl::make_device_unique_selector<T>::non_array make_device_unique_uninitialized(
0079         cudaStream_t stream) {
0080       int dev = currentDevice();
0081       void *mem = allocate_device(dev, sizeof(T), stream);
0082       return typename device::impl::make_device_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),
0083                                                                               device::impl::DeviceDeleter{dev}};
0084     }
0085 
0086     template <typename T>
0087     typename device::impl::make_device_unique_selector<T>::unbounded_array make_device_unique_uninitialized(
0088         size_t n, cudaStream_t stream) {
0089       using element_type = typename std::remove_extent<T>::type;
0090       int dev = currentDevice();
0091       void *mem = allocate_device(dev, n * sizeof(element_type), stream);
0092       return typename device::impl::make_device_unique_selector<T>::unbounded_array{
0093           reinterpret_cast<element_type *>(mem), device::impl::DeviceDeleter{dev}};
0094     }
0095 
0096     template <typename T, typename... Args>
0097     typename device::impl::make_device_unique_selector<T>::bounded_array make_device_unique_uninitialized(Args &&...) =
0098         delete;
0099   }  // namespace cuda
0100 }  // namespace cms
0101 
0102 #endif