File indexing completed on 2025-01-22 07:34:11
0001 #include <cstdint>
0002 #include <vector>
0003
0004 #include <alpaka/alpaka.hpp>
0005
0006 #include "FWCore/Utilities/interface/stringize.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0011
0012 using namespace cms::alpakatools;
0013 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0014
0015
0016
0017
0018
0019 enum class RangeType { Default, ExtentLimited, ExtentLimitedWithShift };
0020
0021
0022 enum class LoopScope { Block, Grid };
0023
0024 template <RangeType rangeType, LoopScope loopScope>
0025 size_t constexpr expectedCount(WorkDiv1D const& workDiv, size_t skip, size_t size) {
0026 if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0027 return skip < size ? size - skip : 0;
0028 else if constexpr (rangeType == RangeType::ExtentLimited)
0029 return size;
0030 else
0031 if constexpr (loopScope == LoopScope::Block)
0032 return workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0033 else
0034 return workDiv.m_gridBlockExtent[0u] * workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0035 }
0036
0037 template <RangeType rangeType, LoopScope loopScope>
0038 struct testWordDivisionDefaultRange {
0039 ALPAKA_FN_ACC void operator()(Acc1D const& acc, size_t size, size_t skip, size_t* globalCounter) const {
0040 size_t& counter =
0041 (loopScope == LoopScope::Grid ? *globalCounter : alpaka::declareSharedVar<size_t, __COUNTER__>(acc));
0042
0043 if constexpr (loopScope == LoopScope::Block) {
0044 if (cms::alpakatools::once_per_block(acc)) {
0045 counter = 0;
0046 }
0047 alpaka::syncBlockThreads(acc);
0048 }
0049
0050 if constexpr (rangeType == RangeType::Default)
0051 for ([[maybe_unused]] auto idx : uniform_elements(acc))
0052 alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0053 else if constexpr (rangeType == RangeType::ExtentLimited)
0054 for ([[maybe_unused]] auto idx : uniform_elements(acc, size))
0055 alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0056 else if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0057 for ([[maybe_unused]] auto idx : uniform_elements(acc, skip, size))
0058 alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0059 alpaka::syncBlockThreads(acc);
0060
0061 if constexpr (loopScope == LoopScope::Block) {
0062 if (cms::alpakatools::once_per_block(acc)) {
0063 auto expected = expectedCount<rangeType, loopScope>(acc, skip, size);
0064 ALPAKA_ASSERT_ACC(counter == expected);
0065 }
0066 }
0067 }
0068 };
0069
0070 int main() {
0071
0072 auto const& devices = cms::alpakatools::devices<Platform>();
0073 if (devices.empty()) {
0074 std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0075 "the test will be skipped.\n";
0076 exit(EXIT_FAILURE);
0077 }
0078
0079 for (auto const& device : devices) {
0080
0081 Queue queue(device);
0082 auto counter_d = cms::alpakatools::make_device_buffer<size_t>(queue);
0083 auto counter_h = cms::alpakatools::make_host_buffer<size_t>(queue);
0084 alpaka::memset(queue, counter_d, 0);
0085 ssize_t BlockSize = 512;
0086 size_t GridSize = 4;
0087 for (size_t blocks = 1; blocks < GridSize * 3; blocks++)
0088 for (auto sizeFuzz :
0089 std::initializer_list<ssize_t>{-10 * BlockSize / 13, -BlockSize / 2, -1, 0, 1, BlockSize / 2})
0090 for (auto skip : std::initializer_list<ssize_t>{0,
0091 1,
0092 BlockSize / 2,
0093 BlockSize - 1,
0094 BlockSize,
0095 BlockSize + 1,
0096 BlockSize + BlockSize / 2,
0097 2 * BlockSize - 1,
0098 2 * BlockSize,
0099 2 * BlockSize + 1}) {
0100
0101
0102 alpaka::memset(queue, counter_d, 0);
0103 auto workdiv = make_workdiv<Acc1D>(BlockSize, GridSize);
0104 alpaka::enqueue(
0105 queue,
0106 alpaka::createTaskKernel<Acc1D>(workdiv,
0107 testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0108 blocks * BlockSize + sizeFuzz,
0109 skip,
0110 counter_d.data()));
0111 alpaka::memcpy(queue, counter_h, counter_d);
0112 alpaka::wait(queue);
0113 auto expected =
0114 expectedCount<RangeType::Default, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0115 assert(*counter_h.data() == expected);
0116
0117
0118 alpaka::memset(queue, counter_d, 0);
0119 alpaka::enqueue(
0120 queue,
0121 alpaka::createTaskKernel<Acc1D>(workdiv,
0122 testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0123 blocks * BlockSize + sizeFuzz,
0124 skip,
0125 counter_d.data()));
0126 alpaka::memcpy(queue, counter_h, counter_d);
0127 alpaka::wait(queue);
0128 expected =
0129 expectedCount<RangeType::ExtentLimited, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0130 assert(*counter_h.data() == expected);
0131
0132
0133 alpaka::memset(queue, counter_d, 0);
0134 alpaka::enqueue(queue,
0135 alpaka::createTaskKernel<Acc1D>(
0136 workdiv,
0137 testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0138 blocks * BlockSize + sizeFuzz,
0139 skip,
0140 counter_d.data()));
0141 alpaka::memcpy(queue, counter_h, counter_d);
0142 alpaka::wait(queue);
0143 expected = expectedCount<RangeType::ExtentLimitedWithShift, LoopScope::Grid>(
0144 workdiv, skip, blocks * BlockSize + sizeFuzz);
0145 assert(*counter_h.data() == expected);
0146
0147
0148 alpaka::enqueue(
0149 queue,
0150 alpaka::createTaskKernel<Acc1D>(workdiv,
0151 testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0152 blocks * BlockSize + sizeFuzz,
0153 skip,
0154 counter_d.data()));
0155 alpaka::enqueue(
0156 queue,
0157 alpaka::createTaskKernel<Acc1D>(workdiv,
0158 testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0159 blocks * BlockSize + sizeFuzz,
0160 skip,
0161 counter_d.data()));
0162 alpaka::enqueue(queue,
0163 alpaka::createTaskKernel<Acc1D>(
0164 workdiv,
0165 testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0166 blocks * BlockSize + sizeFuzz,
0167 skip,
0168 counter_d.data()));
0169 }
0170 alpaka::wait(queue);
0171 }
0172 }