Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:36:35

0001 #ifndef HeterogeneousCore_CUDAUtilities_interface_host_unique_ptr_h
0002 #define HeterogeneousCore_CUDAUtilities_interface_host_unique_ptr_h
0003 
0004 #include <cstdlib>
0005 #include <memory>
0006 #include <functional>
0007 
0008 #include "HeterogeneousCore/CUDAUtilities/interface/allocate_host.h"
0009 
0010 namespace cms {
0011   namespace cuda {
0012     namespace host {
0013       namespace impl {
0014 
0015         enum class MemoryType : bool {
0016           kDefault = false,
0017           kPinned = true,
0018         };
0019 
0020         // Custom deleter for host memory, with an internal state to distinguish pageable and pinned host memory
0021         class HostDeleter {
0022         public:
0023           // The default constructor is needed by the default constructor of unique_ptr<T, HostDeleter>,
0024           // which is needed by the default constructor of HostProduct<T>, which is needed by the ROOT dictionary
0025           HostDeleter() : type_{MemoryType::kDefault} {}
0026           HostDeleter(MemoryType type) : type_{type} {}
0027 
0028           void operator()(void *ptr) {
0029             if (type_ == MemoryType::kPinned) {
0030               cms::cuda::free_host(ptr);
0031             } else {
0032               std::free(ptr);
0033             }
0034           }
0035 
0036         private:
0037           MemoryType type_;
0038         };
0039 
0040       }  // namespace impl
0041 
0042       template <typename T>
0043       using unique_ptr = std::unique_ptr<T, impl::HostDeleter>;
0044 
0045       namespace impl {
0046         template <typename T>
0047         struct make_host_unique_selector {
0048           using non_array = cms::cuda::host::unique_ptr<T>;
0049         };
0050         template <typename T>
0051         struct make_host_unique_selector<T[]> {
0052           using unbounded_array = cms::cuda::host::unique_ptr<T[]>;
0053         };
0054         template <typename T, size_t N>
0055         struct make_host_unique_selector<T[N]> {
0056           struct bounded_array {};
0057         };
0058       }  // namespace impl
0059     }  // namespace host
0060 
0061     // Allocate pageable host memory
0062     template <typename T>
0063     typename host::impl::make_host_unique_selector<T>::non_array make_host_unique() {
0064       static_assert(std::is_trivially_constructible<T>::value,
0065                     "Allocating with non-trivial constructor on the host memory is not supported");
0066       // Allocate a buffer aligned to 128 bytes, to match the CUDA cache line size
0067       const size_t alignment = 128;
0068       // std::aligned_alloc() requires the size to be a multiple of the alignment
0069       const size_t size = (sizeof(T) + alignment - 1) / alignment * alignment;
0070       void *mem = std::aligned_alloc(alignment, size);
0071       return typename host::impl::make_host_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),
0072                                                                           host::impl::MemoryType::kDefault};
0073     }
0074 
0075     template <typename T>
0076     typename host::impl::make_host_unique_selector<T>::unbounded_array make_host_unique(size_t n) {
0077       using element_type = typename std::remove_extent<T>::type;
0078       static_assert(std::is_trivially_constructible<element_type>::value,
0079                     "Allocating with non-trivial constructor on the host memory is not supported");
0080       // Allocate a buffer aligned to 128 bytes, to match the CUDA cache line size
0081       const size_t alignment = 128;
0082       // std::aligned_alloc() requires the size to be a multiple of the alignment
0083       const size_t size = (n * sizeof(element_type) + alignment - 1) / alignment * alignment;
0084       void *mem = std::aligned_alloc(alignment, size);
0085       return typename host::impl::make_host_unique_selector<T>::unbounded_array{reinterpret_cast<element_type *>(mem),
0086                                                                                 host::impl::MemoryType::kDefault};
0087     }
0088 
0089     // Allocate pinned host memory
0090     template <typename T>
0091     typename host::impl::make_host_unique_selector<T>::non_array make_host_unique(cudaStream_t stream) {
0092       static_assert(std::is_trivially_constructible<T>::value,
0093                     "Allocating with non-trivial constructor on the host memory is not supported");
0094       void *mem = allocate_host(sizeof(T), stream);
0095       return typename host::impl::make_host_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),  //
0096                                                                           host::impl::MemoryType::kPinned};
0097     }
0098 
0099     template <typename T>
0100     typename host::impl::make_host_unique_selector<T>::unbounded_array make_host_unique(size_t n, cudaStream_t stream) {
0101       using element_type = typename std::remove_extent<T>::type;
0102       static_assert(std::is_trivially_constructible<element_type>::value,
0103                     "Allocating with non-trivial constructor on the host memory is not supported");
0104       void *mem = allocate_host(n * sizeof(element_type), stream);
0105       return typename host::impl::make_host_unique_selector<T>::unbounded_array{reinterpret_cast<element_type *>(mem),
0106                                                                                 host::impl::MemoryType::kPinned};
0107     }
0108 
0109     // Arrays of known bounds are not supported by std::unique_ptr
0110     template <typename T, typename... Args>
0111     typename host::impl::make_host_unique_selector<T>::bounded_array make_host_unique(Args &&...) = delete;
0112 
0113     // No check for the trivial constructor, make it clear in the interface
0114     template <typename T>
0115     typename host::impl::make_host_unique_selector<T>::non_array make_host_unique_uninitialized(cudaStream_t stream) {
0116       void *mem = allocate_host(sizeof(T), stream);
0117       return typename host::impl::make_host_unique_selector<T>::non_array{reinterpret_cast<T *>(mem),  //
0118                                                                           host::impl::MemoryType::kPinned};
0119     }
0120 
0121     template <typename T>
0122     typename host::impl::make_host_unique_selector<T>::unbounded_array make_host_unique_uninitialized(
0123         size_t n, cudaStream_t stream) {
0124       using element_type = typename std::remove_extent<T>::type;
0125       void *mem = allocate_host(n * sizeof(element_type), stream);
0126       return typename host::impl::make_host_unique_selector<T>::unbounded_array{reinterpret_cast<element_type *>(mem),
0127                                                                                 host::impl::MemoryType::kPinned};
0128     }
0129 
0130     // Arrays of known bounds are not supported by std::unique_ptr
0131     template <typename T, typename... Args>
0132     typename host::impl::make_host_unique_selector<T>::bounded_array make_host_unique_uninitialized(Args &&...) = delete;
0133 
0134   }  // namespace cuda
0135 }  // namespace cms
0136 
0137 #endif