Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-23 22:56:18

0001 #include <cstdint>
0002 #include <vector>
0003 
0004 #include <alpaka/alpaka.hpp>
0005 
0006 #include "FWCore/Utilities/interface/stringize.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0011 
0012 using namespace cms::alpakatools;
0013 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0014 
0015 // Kernel running a loop over threads/elements
0016 // One function with multiple flavors
0017 
0018 // The type of uniform_elements
0019 enum class RangeType { Default, ExtentLimited, ExtentLimitedWithShift };
0020 
0021 // The concurrency scope between threads
0022 enum class LoopScope { Block, Grid };
0023 
0024 template <RangeType rangeType, LoopScope loopScope, typename TAcc>
0025 size_t constexpr expectedCount(TAcc const& acc, size_t skip, size_t size) {
0026   if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0027     return skip < size ? size - skip : 0;
0028   else if constexpr (rangeType == RangeType::ExtentLimited)
0029     return size;
0030   else /* rangeType == RangeType::Default */
0031     if constexpr (loopScope == LoopScope::Block)
0032       return alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0033     else
0034       return alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[0u];
0035 }
0036 
0037 template <RangeType rangeType, LoopScope loopScope>
0038 size_t constexpr expectedCount(WorkDiv1D const& workDiv, size_t skip, size_t size) {
0039   if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0040     return skip < size ? size - skip : 0;
0041   else if constexpr (rangeType == RangeType::ExtentLimited)
0042     return size;
0043   else /* rangeType == RangeType::Default */
0044     if constexpr (loopScope == LoopScope::Block)
0045       return workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0046     else
0047       return workDiv.m_gridBlockExtent[0u] * workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0048 }
0049 
0050 template <RangeType rangeType, LoopScope loopScope>
0051 struct testWordDivisionDefaultRange {
0052   template <typename TAcc>
0053   ALPAKA_FN_ACC void operator()(TAcc const& acc, size_t size, size_t skip, size_t* globalCounter) const {
0054     size_t& counter =
0055         (loopScope == LoopScope::Grid ? *globalCounter : alpaka::declareSharedVar<size_t, __COUNTER__>(acc));
0056     // Init the counter for block range. Grid range does so my mean of memset.
0057     if constexpr (loopScope == LoopScope::Block) {
0058       if (cms::alpakatools::once_per_block(acc)) {
0059         counter = 0;
0060       }
0061       alpaka::syncBlockThreads(acc);
0062     }
0063     // The loop we are testing
0064     if constexpr (rangeType == RangeType::Default)
0065       for ([[maybe_unused]] auto idx : uniform_elements(acc))
0066         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0067     else if constexpr (rangeType == RangeType::ExtentLimited)
0068       for ([[maybe_unused]] auto idx : uniform_elements(acc, size))
0069         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0070     else if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0071       for ([[maybe_unused]] auto idx : uniform_elements(acc, skip, size))
0072         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0073     alpaka::syncBlockThreads(acc);
0074     // Check the result. Grid range will check by memcpy-ing the result.
0075     if constexpr (loopScope == LoopScope::Block) {
0076       if (cms::alpakatools::once_per_block(acc)) {
0077         auto expected = expectedCount<rangeType, loopScope>(acc, skip, size);
0078         ALPAKA_ASSERT_ACC(counter == expected);
0079       }
0080     }
0081   }
0082 };
0083 
0084 int main() {
0085   // get the list of devices on the current platform
0086   auto const& devices = cms::alpakatools::devices<Platform>();
0087   if (devices.empty()) {
0088     std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0089       "the test will be skipped.\n";
0090     exit(EXIT_FAILURE);
0091   }
0092 
0093   for (auto const& device : devices) {
0094     // Get global memory
0095     Queue queue(device);
0096     auto counter_d = cms::alpakatools::make_device_buffer<size_t>(queue);
0097     auto counter_h = cms::alpakatools::make_host_buffer<size_t>(queue);
0098     alpaka::memset(queue, counter_d, 0);
0099     ssize_t BlockSize = 512;
0100     size_t GridSize = 4;
0101     for (size_t blocks = 1; blocks < GridSize * 3; blocks++)
0102       for (auto sizeFuzz :
0103            std::initializer_list<ssize_t>{-10 * BlockSize / 13, -BlockSize / 2, -1, 0, 1, BlockSize / 2})
0104         for (auto skip : std::initializer_list<ssize_t>{0,
0105                                                         1,
0106                                                         BlockSize / 2,
0107                                                         BlockSize - 1,
0108                                                         BlockSize,
0109                                                         BlockSize + 1,
0110                                                         BlockSize + BlockSize / 2,
0111                                                         2 * BlockSize - 1,
0112                                                         2 * BlockSize,
0113                                                         2 * BlockSize + 1}) {
0114           // Grid level iteration: we need to initialize/check at the grid level
0115           // Default range
0116           alpaka::memset(queue, counter_d, 0);
0117           auto workdiv = make_workdiv<Acc1D>(BlockSize, GridSize);
0118           alpaka::enqueue(
0119               queue,
0120               alpaka::createTaskKernel<Acc1D>(workdiv,
0121                                               testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0122                                               blocks * BlockSize + sizeFuzz,
0123                                               skip,
0124                                               counter_d.data()));
0125           alpaka::memcpy(queue, counter_h, counter_d);
0126           alpaka::wait(queue);
0127           auto expected =
0128               expectedCount<RangeType::Default, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0129           assert(*counter_h.data() == expected);
0130 
0131           // ExtentLimited range
0132           alpaka::memset(queue, counter_d, 0);
0133           alpaka::enqueue(
0134               queue,
0135               alpaka::createTaskKernel<Acc1D>(workdiv,
0136                                               testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0137                                               blocks * BlockSize + sizeFuzz,
0138                                               skip,
0139                                               counter_d.data()));
0140           alpaka::memcpy(queue, counter_h, counter_d);
0141           alpaka::wait(queue);
0142           expected =
0143               expectedCount<RangeType::ExtentLimited, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0144           assert(*counter_h.data() == expected);
0145 
0146           // ExtentLimitedWithShift range
0147           alpaka::memset(queue, counter_d, 0);
0148           alpaka::enqueue(queue,
0149                           alpaka::createTaskKernel<Acc1D>(
0150                               workdiv,
0151                               testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0152                               blocks * BlockSize + sizeFuzz,
0153                               skip,
0154                               counter_d.data()));
0155           alpaka::memcpy(queue, counter_h, counter_d);
0156           alpaka::wait(queue);
0157           expected = expectedCount<RangeType::ExtentLimitedWithShift, LoopScope::Grid>(
0158               workdiv, skip, blocks * BlockSize + sizeFuzz);
0159           assert(*counter_h.data() == expected);
0160 
0161           // Block level auto tests
0162           alpaka::enqueue(
0163               queue,
0164               alpaka::createTaskKernel<Acc1D>(workdiv,
0165                                               testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0166                                               blocks * BlockSize + sizeFuzz,
0167                                               skip,
0168                                               counter_d.data()));
0169           alpaka::enqueue(
0170               queue,
0171               alpaka::createTaskKernel<Acc1D>(workdiv,
0172                                               testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0173                                               blocks * BlockSize + sizeFuzz,
0174                                               skip,
0175                                               counter_d.data()));
0176           alpaka::enqueue(queue,
0177                           alpaka::createTaskKernel<Acc1D>(
0178                               workdiv,
0179                               testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0180                               blocks * BlockSize + sizeFuzz,
0181                               skip,
0182                               counter_d.data()));
0183         }
0184     alpaka::wait(queue);
0185   }
0186 }