File indexing completed on 2024-04-20 02:31:58
0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
0003
0004 #include <alpaka/alpaka.hpp>
0005
0006 #include "HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h"
0008
0009 namespace cms::alpakatools {
0010
0011 namespace traits {
0012
0013
0014 template <typename TElem,
0015 typename TDim,
0016 typename TIdx,
0017 typename TDev,
0018 typename TQueue,
0019 typename = void,
0020 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
0021 struct CachedBufAlloc {
0022 static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
0023 };
0024
0025
0026 template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0027 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
0028 template <typename TExtent>
0029 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
0030 -> alpaka::BufCpu<TElem, TDim, TIdx> {
0031
0032 return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
0033 }
0034 };
0035
0036 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
0037
0038
0039 template <typename TElem, typename TDim, typename TIdx>
0040 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
0041 template <typename TExtent>
0042 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0043 alpaka::QueueCudaRtBlocking queue,
0044 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0045 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0046
0047 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
0048
0049
0050 size_t size = alpaka::getExtentProduct(extent);
0051 size_t sizeBytes = size * sizeof(TElem);
0052 void* memPtr = allocator.allocate(sizeBytes, queue);
0053
0054
0055 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0056
0057 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0058 }
0059 };
0060
0061
0062 template <typename TElem, typename TDim, typename TIdx>
0063 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
0064 template <typename TExtent>
0065 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0066 alpaka::QueueCudaRtNonBlocking queue,
0067 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0068 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0069
0070 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
0071
0072
0073 size_t size = alpaka::getExtentProduct(extent);
0074 size_t sizeBytes = size * sizeof(TElem);
0075 void* memPtr = allocator.allocate(sizeBytes, queue);
0076
0077
0078 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0079
0080 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0081 }
0082 };
0083
0084
0085 template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0086 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
0087 template <typename TExtent>
0088 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
0089 -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
0090 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0091
0092 auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
0093
0094 size_t width = alpaka::getWidth(extent);
0095 size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
0096
0097 size_t pitchBytes = widthBytes;
0098 size_t size = alpaka::getExtentProduct(extent);
0099 size_t sizeBytes = size * sizeof(TElem);
0100 void* memPtr = allocator.allocate(sizeBytes, queue);
0101
0102
0103 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0104
0105 return alpaka::BufCudaRt<TElem, TDim, TIdx>(
0106 dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
0107 }
0108 };
0109
0110 #endif
0111
0112 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
0113
0114
0115 template <typename TElem, typename TDim, typename TIdx>
0116 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
0117 template <typename TExtent>
0118 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0119 alpaka::QueueHipRtBlocking queue,
0120 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0121 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0122
0123 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
0124
0125
0126 size_t size = alpaka::getExtentProduct(extent);
0127 size_t sizeBytes = size * sizeof(TElem);
0128 void* memPtr = allocator.allocate(sizeBytes, queue);
0129
0130
0131 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0132
0133 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0134 }
0135 };
0136
0137
0138 template <typename TElem, typename TDim, typename TIdx>
0139 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
0140 template <typename TExtent>
0141 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0142 alpaka::QueueHipRtNonBlocking queue,
0143 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0144 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0145
0146 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
0147
0148
0149 size_t size = alpaka::getExtentProduct(extent);
0150 size_t sizeBytes = size * sizeof(TElem);
0151 void* memPtr = allocator.allocate(sizeBytes, queue);
0152
0153
0154 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0155
0156 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0157 }
0158 };
0159
0160
0161 template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0162 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
0163 template <typename TExtent>
0164 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
0165 -> alpaka::BufHipRt<TElem, TDim, TIdx> {
0166 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0167
0168 auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
0169
0170 size_t width = alpaka::getWidth(extent);
0171 size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
0172
0173 size_t pitchBytes = widthBytes;
0174 size_t size = alpaka::getExtentProduct(extent);
0175 size_t sizeBytes = size * sizeof(TElem);
0176 void* memPtr = allocator.allocate(sizeBytes, queue);
0177
0178
0179 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0180
0181 return alpaka::BufHipRt<TElem, TDim, TIdx>(
0182 dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
0183 }
0184 };
0185
0186 #endif
0187
0188 }
0189
0190 template <typename TElem,
0191 typename TIdx,
0192 typename TExtent,
0193 typename TQueue,
0194 typename TDev,
0195 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
0196 ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
0197 return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
0198 }
0199
0200 }
0201
0202 #endif