File indexing completed on 2024-09-07 04:36:34
0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
0003
0004 #include <alpaka/alpaka.hpp>
0005
0006 #include "HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h"
0008
0009 namespace cms::alpakatools {
0010
0011 namespace traits {
0012
0013
0014 template <typename TElem,
0015 typename TDim,
0016 typename TIdx,
0017 typename TDev,
0018 typename TQueue,
0019 typename = void,
0020 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
0021 struct CachedBufAlloc {
0022 static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
0023 };
0024
0025
0026 template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0027 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
0028 template <typename TExtent>
0029 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0030 TQueue queue,
0031 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0032
0033 return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
0034 }
0035 };
0036
0037 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
0038
0039
0040 template <typename TElem, typename TDim, typename TIdx>
0041 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
0042 template <typename TExtent>
0043 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0044 alpaka::QueueCudaRtBlocking queue,
0045 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0046 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0047
0048 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
0049
0050
0051 size_t size = alpaka::getExtentProduct(extent);
0052 size_t sizeBytes = size * sizeof(TElem);
0053 void* memPtr = allocator.allocate(sizeBytes, queue);
0054
0055
0056 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0057
0058 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0059 }
0060 };
0061
0062
0063 template <typename TElem, typename TDim, typename TIdx>
0064 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
0065 template <typename TExtent>
0066 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0067 alpaka::QueueCudaRtNonBlocking queue,
0068 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0069 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0070
0071 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
0072
0073
0074 size_t size = alpaka::getExtentProduct(extent);
0075 size_t sizeBytes = size * sizeof(TElem);
0076 void* memPtr = allocator.allocate(sizeBytes, queue);
0077
0078
0079 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0080
0081 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0082 }
0083 };
0084
0085
0086 template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0087 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
0088 template <typename TExtent>
0089 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev,
0090 TQueue queue,
0091 TExtent const& extent) -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
0092 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0093
0094 auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
0095
0096 size_t width = alpaka::getWidth(extent);
0097 size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
0098
0099 size_t pitchBytes = widthBytes;
0100 size_t size = alpaka::getExtentProduct(extent);
0101 size_t sizeBytes = size * sizeof(TElem);
0102 void* memPtr = allocator.allocate(sizeBytes, queue);
0103
0104
0105 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0106
0107 return alpaka::BufCudaRt<TElem, TDim, TIdx>(
0108 dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
0109 }
0110 };
0111
0112 #endif
0113
0114 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
0115
0116
0117 template <typename TElem, typename TDim, typename TIdx>
0118 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
0119 template <typename TExtent>
0120 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0121 alpaka::QueueHipRtBlocking queue,
0122 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0123 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0124
0125 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
0126
0127
0128 size_t size = alpaka::getExtentProduct(extent);
0129 size_t sizeBytes = size * sizeof(TElem);
0130 void* memPtr = allocator.allocate(sizeBytes, queue);
0131
0132
0133 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0134
0135 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0136 }
0137 };
0138
0139
0140 template <typename TElem, typename TDim, typename TIdx>
0141 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
0142 template <typename TExtent>
0143 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
0144 alpaka::QueueHipRtNonBlocking queue,
0145 TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
0146 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0147
0148 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
0149
0150
0151 size_t size = alpaka::getExtentProduct(extent);
0152 size_t sizeBytes = size * sizeof(TElem);
0153 void* memPtr = allocator.allocate(sizeBytes, queue);
0154
0155
0156 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0157
0158 return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
0159 }
0160 };
0161
0162
0163 template <typename TElem, typename TDim, typename TIdx, typename TQueue>
0164 struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
0165 template <typename TExtent>
0166 ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev,
0167 TQueue queue,
0168 TExtent const& extent) -> alpaka::BufHipRt<TElem, TDim, TIdx> {
0169 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
0170
0171 auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
0172
0173 size_t width = alpaka::getWidth(extent);
0174 size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
0175
0176 size_t pitchBytes = widthBytes;
0177 size_t size = alpaka::getExtentProduct(extent);
0178 size_t sizeBytes = size * sizeof(TElem);
0179 void* memPtr = allocator.allocate(sizeBytes, queue);
0180
0181
0182 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
0183
0184 return alpaka::BufHipRt<TElem, TDim, TIdx>(
0185 dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
0186 }
0187 };
0188
0189 #endif
0190
0191 }
0192
0193 template <typename TElem,
0194 typename TIdx,
0195 typename TExtent,
0196 typename TQueue,
0197 typename TDev,
0198 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
0199 ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
0200 return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
0201 }
0202
0203 }
0204
0205 #endif