Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-08-15 01:07:43

0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
0003 
0004 #include <alpaka/alpaka.hpp>
0005 
0006 #include "HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
0009 
0010 namespace cms::alpakatools {
0011 
0012   namespace traits {
0013 
0014     //! The caching memory allocator trait.
0015     template <typename TElem,
0016               typename TDim,
0017               typename TIdx,
0018               typename TDev,
0019               typename TQueue,
0020               typename = void,
0021               typename = std::enable_if_t<cms::alpakatools::is_device_v<TDev> and cms::alpakatools::is_queue_v<TQueue>>>
0022     struct CachedBufAlloc {
0023       static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
0024     };
0025 
0026     //! The caching memory allocator implementation for the CPU device
0027     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0028     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
0029       template <typename TExtent>
0030       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
0031           -> alpaka::BufCpu<TElem, TDim, TIdx> {
0032         // non-cached, queue-ordered asynchronous host-only memory
0033         return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
0034       }
0035     };
0036 
0037 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
0038 
0039     //! The caching memory allocator implementation for the pinned host memory, with a blocking queue
0040     template <typename TElem, typename TDim, typename TIdx>
0041     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
0042       template <typename TExtent>
0043       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0044                                                 alpaka::QueueCudaRtBlocking queue,
0045                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0046         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0047 
0048         auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
0049 
0050         // FIXME the BufCpu does not support a pitch ?
0051         size_t size = alpaka::getExtentProduct(extent);
0052         size_t sizeBytes = size * sizeof(TElem);
0053         void* memPtr = allocator.allocate(sizeBytes, queue);
0054 
0055         // use a custom deleter to return the buffer to the CachingAllocator
0056         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0057 
0058         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0059       }
0060     };
0061 
0062     //! The caching memory allocator implementation for the pinned host memory, with a non-blocking queue
0063     template <typename TElem, typename TDim, typename TIdx>
0064     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
0065       template <typename TExtent>
0066       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0067                                                 alpaka::QueueCudaRtNonBlocking queue,
0068                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0069         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0070 
0071         auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
0072 
0073         // FIXME the BufCpu does not support a pitch ?
0074         size_t size = alpaka::getExtentProduct(extent);
0075         size_t sizeBytes = size * sizeof(TElem);
0076         void* memPtr = allocator.allocate(sizeBytes, queue);
0077 
0078         // use a custom deleter to return the buffer to the CachingAllocator
0079         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0080 
0081         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0082       }
0083     };
0084 
0085     //! The caching memory allocator implementation for the CUDA device
0086     template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0087     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
0088       template <typename TExtent>
0089       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
0090           -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
0091         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0092 
0093         auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
0094 
0095         size_t width = alpaka::getWidth(extent);
0096         size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
0097         // TODO implement pitch for TDim > 1
0098         size_t pitchBytes = widthBytes;
0099         size_t size = alpaka::getExtentProduct(extent);
0100         size_t sizeBytes = size * sizeof(TElem);
0101         void* memPtr = allocator.allocate(sizeBytes, queue);
0102 
0103         // use a custom deleter to return the buffer to the CachingAllocator
0104         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0105 
0106         return alpaka::BufCudaRt<TElem, TDim, TIdx>(
0107             dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
0108       }
0109     };
0110 
0111 #endif  // ALPAKA_ACC_GPU_CUDA_ENABLED
0112 
0113 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
0114 
0115     //! The caching memory allocator implementation for the pinned host memory, with a blocking queue
0116     template <typename TElem, typename TDim, typename TIdx>
0117     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
0118       template <typename TExtent>
0119       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0120                                                 alpaka::QueueHipRtBlocking queue,
0121                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0122         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0123 
0124         auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
0125 
0126         // FIXME the BufCpu does not support a pitch ?
0127         size_t size = alpaka::getExtentProduct(extent);
0128         size_t sizeBytes = size * sizeof(TElem);
0129         void* memPtr = allocator.allocate(sizeBytes, queue);
0130 
0131         // use a custom deleter to return the buffer to the CachingAllocator
0132         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0133 
0134         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0135       }
0136     };
0137 
0138     //! The caching memory allocator implementation for the pinned host memory, with a non-blocking queue
0139     template <typename TElem, typename TDim, typename TIdx>
0140     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
0141       template <typename TExtent>
0142       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0143                                                 alpaka::QueueHipRtNonBlocking queue,
0144                                                 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0145         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0146 
0147         auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
0148 
0149         // FIXME the BufCpu does not support a pitch ?
0150         size_t size = alpaka::getExtentProduct(extent);
0151         size_t sizeBytes = size * sizeof(TElem);
0152         void* memPtr = allocator.allocate(sizeBytes, queue);
0153 
0154         // use a custom deleter to return the buffer to the CachingAllocator
0155         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0156 
0157         return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0158       }
0159     };
0160 
0161     //! The caching memory allocator implementation for the ROCm/HIP device
0162     template <typename TElem,
0163               typename TDim,
0164               typename TIdx,
0165               typename TQueue,
0166               typename = std::enable_if_t<cms::alpakatools::is_queue_v<TQueue>>>
0167     struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
0168       template <typename TExtent>
0169       ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
0170           -> alpaka::BufHipRt<TElem, TDim, TIdx> {
0171         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0172 
0173         auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
0174 
0175         size_t width = alpaka::getWidth(extent);
0176         size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
0177         // TODO implement pitch for TDim > 1
0178         size_t pitchBytes = widthBytes;
0179         size_t size = alpaka::getExtentProduct(extent);
0180         size_t sizeBytes = size * sizeof(TElem);
0181         void* memPtr = allocator.allocate(sizeBytes, queue);
0182 
0183         // use a custom deleter to return the buffer to the CachingAllocator
0184         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0185 
0186         return alpaka::BufHipRt<TElem, TDim, TIdx>(
0187             dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
0188       }
0189     };
0190 
0191 #endif  // ALPAKA_ACC_GPU_HIP_ENABLED
0192 
0193   }  // namespace traits
0194 
0195   template <typename TElem,
0196             typename TIdx,
0197             typename TExtent,
0198             typename TQueue,
0199             typename TDev,
0200             typename = std::enable_if_t<cms::alpakatools::is_device_v<TDev> and cms::alpakatools::is_queue_v<TQueue>>>
0201   ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
0202     return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
0203   }
0204 
0205 }  // namespace cms::alpakatools
0206 
0207 #endif  // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h