File indexing completed on 2024-04-23 22:56:18
0001 #include <cstdint>
0002 #include <vector>
0003
0004 #include <alpaka/alpaka.hpp>
0005
0006 #include "FWCore/Utilities/interface/stringize.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0011
0012 using namespace cms::alpakatools;
0013 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0014
0015
0016
0017
0018
0019 enum class RangeType { Default, ExtentLimited, ExtentLimitedWithShift };
0020
0021
0022 enum class LoopScope { Block, Grid };
0023
0024 template <RangeType rangeType, LoopScope loopScope, typename TAcc>
0025 size_t constexpr expectedCount(TAcc const& acc, size_t skip, size_t size) {
0026 if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0027 return skip < size ? size - skip : 0;
0028 else if constexpr (rangeType == RangeType::ExtentLimited)
0029 return size;
0030 else
0031 if constexpr (loopScope == LoopScope::Block)
0032 return alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0033 else
0034 return alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[0u];
0035 }
0036
0037 template <RangeType rangeType, LoopScope loopScope>
0038 size_t constexpr expectedCount(WorkDiv1D const& workDiv, size_t skip, size_t size) {
0039 if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0040 return skip < size ? size - skip : 0;
0041 else if constexpr (rangeType == RangeType::ExtentLimited)
0042 return size;
0043 else
0044 if constexpr (loopScope == LoopScope::Block)
0045 return workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0046 else
0047 return workDiv.m_gridBlockExtent[0u] * workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0048 }
0049
0050 template <RangeType rangeType, LoopScope loopScope>
0051 struct testWordDivisionDefaultRange {
0052 template <typename TAcc>
0053 ALPAKA_FN_ACC void operator()(TAcc const& acc, size_t size, size_t skip, size_t* globalCounter) const {
0054 size_t& counter =
0055 (loopScope == LoopScope::Grid ? *globalCounter : alpaka::declareSharedVar<size_t, __COUNTER__>(acc));
0056
0057 if constexpr (loopScope == LoopScope::Block) {
0058 if (cms::alpakatools::once_per_block(acc)) {
0059 counter = 0;
0060 }
0061 alpaka::syncBlockThreads(acc);
0062 }
0063
0064 if constexpr (rangeType == RangeType::Default)
0065 for ([[maybe_unused]] auto idx : uniform_elements(acc))
0066 alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0067 else if constexpr (rangeType == RangeType::ExtentLimited)
0068 for ([[maybe_unused]] auto idx : uniform_elements(acc, size))
0069 alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0070 else if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0071 for ([[maybe_unused]] auto idx : uniform_elements(acc, skip, size))
0072 alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0073 alpaka::syncBlockThreads(acc);
0074
0075 if constexpr (loopScope == LoopScope::Block) {
0076 if (cms::alpakatools::once_per_block(acc)) {
0077 auto expected = expectedCount<rangeType, loopScope>(acc, skip, size);
0078 ALPAKA_ASSERT_ACC(counter == expected);
0079 }
0080 }
0081 }
0082 };
0083
0084 int main() {
0085
0086 auto const& devices = cms::alpakatools::devices<Platform>();
0087 if (devices.empty()) {
0088 std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0089 "the test will be skipped.\n";
0090 exit(EXIT_FAILURE);
0091 }
0092
0093 for (auto const& device : devices) {
0094
0095 Queue queue(device);
0096 auto counter_d = cms::alpakatools::make_device_buffer<size_t>(queue);
0097 auto counter_h = cms::alpakatools::make_host_buffer<size_t>(queue);
0098 alpaka::memset(queue, counter_d, 0);
0099 ssize_t BlockSize = 512;
0100 size_t GridSize = 4;
0101 for (size_t blocks = 1; blocks < GridSize * 3; blocks++)
0102 for (auto sizeFuzz :
0103 std::initializer_list<ssize_t>{-10 * BlockSize / 13, -BlockSize / 2, -1, 0, 1, BlockSize / 2})
0104 for (auto skip : std::initializer_list<ssize_t>{0,
0105 1,
0106 BlockSize / 2,
0107 BlockSize - 1,
0108 BlockSize,
0109 BlockSize + 1,
0110 BlockSize + BlockSize / 2,
0111 2 * BlockSize - 1,
0112 2 * BlockSize,
0113 2 * BlockSize + 1}) {
0114
0115
0116 alpaka::memset(queue, counter_d, 0);
0117 auto workdiv = make_workdiv<Acc1D>(BlockSize, GridSize);
0118 alpaka::enqueue(
0119 queue,
0120 alpaka::createTaskKernel<Acc1D>(workdiv,
0121 testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0122 blocks * BlockSize + sizeFuzz,
0123 skip,
0124 counter_d.data()));
0125 alpaka::memcpy(queue, counter_h, counter_d);
0126 alpaka::wait(queue);
0127 auto expected =
0128 expectedCount<RangeType::Default, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0129 assert(*counter_h.data() == expected);
0130
0131
0132 alpaka::memset(queue, counter_d, 0);
0133 alpaka::enqueue(
0134 queue,
0135 alpaka::createTaskKernel<Acc1D>(workdiv,
0136 testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0137 blocks * BlockSize + sizeFuzz,
0138 skip,
0139 counter_d.data()));
0140 alpaka::memcpy(queue, counter_h, counter_d);
0141 alpaka::wait(queue);
0142 expected =
0143 expectedCount<RangeType::ExtentLimited, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0144 assert(*counter_h.data() == expected);
0145
0146
0147 alpaka::memset(queue, counter_d, 0);
0148 alpaka::enqueue(queue,
0149 alpaka::createTaskKernel<Acc1D>(
0150 workdiv,
0151 testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0152 blocks * BlockSize + sizeFuzz,
0153 skip,
0154 counter_d.data()));
0155 alpaka::memcpy(queue, counter_h, counter_d);
0156 alpaka::wait(queue);
0157 expected = expectedCount<RangeType::ExtentLimitedWithShift, LoopScope::Grid>(
0158 workdiv, skip, blocks * BlockSize + sizeFuzz);
0159 assert(*counter_h.data() == expected);
0160
0161
0162 alpaka::enqueue(
0163 queue,
0164 alpaka::createTaskKernel<Acc1D>(workdiv,
0165 testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0166 blocks * BlockSize + sizeFuzz,
0167 skip,
0168 counter_d.data()));
0169 alpaka::enqueue(
0170 queue,
0171 alpaka::createTaskKernel<Acc1D>(workdiv,
0172 testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0173 blocks * BlockSize + sizeFuzz,
0174 skip,
0175 counter_d.data()));
0176 alpaka::enqueue(queue,
0177 alpaka::createTaskKernel<Acc1D>(
0178 workdiv,
0179 testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0180 blocks * BlockSize + sizeFuzz,
0181 skip,
0182 counter_d.data()));
0183 }
0184 alpaka::wait(queue);
0185 }
0186 }