Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:41

0001 #include <cstdint>
0002 #include <vector>
0003 
0004 #include <alpaka/alpaka.hpp>
0005 
0006 #include "FWCore/Utilities/interface/stringize.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0011 
0012 using namespace cms::alpakatools;
0013 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0014 
0015 // Kernel running a loop over threads/elements
0016 // One function with multiple flavors
0017 
0018 // The type of uniform_elements
0019 enum class RangeType { Default, ExtentLimited, ExtentLimitedWithShift };
0020 
0021 // The concurrency scope between threads
0022 enum class LoopScope { Block, Grid };
0023 
0024 // Utility for one time initializations
0025 template <LoopScope loopScope, typename TAcc>
0026 bool constexpr firstInLoopRange(TAcc const& acc) {
0027   if constexpr (loopScope == LoopScope::Block)
0028     return not alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
0029   if constexpr (loopScope == LoopScope::Grid)
0030     return not alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0031   assert(false);
0032 }
0033 
0034 template <RangeType rangeType, LoopScope loopScope, typename TAcc>
0035 size_t constexpr expectedCount(TAcc const& acc, size_t skip, size_t size) {
0036   if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0037     return skip < size ? size - skip : 0;
0038   else if constexpr (rangeType == RangeType::ExtentLimited)
0039     return size;
0040   else /* rangeType == RangeType::Default */
0041     if constexpr (loopScope == LoopScope::Block)
0042       return alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0043     else
0044       return alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[0u];
0045 }
0046 
0047 template <RangeType rangeType, LoopScope loopScope>
0048 size_t constexpr expectedCount(WorkDiv1D const& workDiv, size_t skip, size_t size) {
0049   if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0050     return skip < size ? size - skip : 0;
0051   else if constexpr (rangeType == RangeType::ExtentLimited)
0052     return size;
0053   else /* rangeType == RangeType::Default */
0054     if constexpr (loopScope == LoopScope::Block)
0055       return workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0056     else
0057       return workDiv.m_gridBlockExtent[0u] * workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0058 }
0059 
0060 template <RangeType rangeType, LoopScope loopScope>
0061 struct testWordDivisionDefaultRange {
0062   template <typename TAcc>
0063   ALPAKA_FN_ACC void operator()(TAcc const& acc, size_t size, size_t skip, size_t* globalCounter) const {
0064     size_t& counter =
0065         (loopScope == LoopScope::Grid ? *globalCounter : alpaka::declareSharedVar<size_t, __COUNTER__>(acc));
0066     // Init the counter for block range. Grid range does so my mean of memset.
0067     if constexpr (loopScope == LoopScope::Block) {
0068       if (firstInLoopRange<loopScope>(acc))
0069         counter = 0;
0070       alpaka::syncBlockThreads(acc);
0071     }
0072     // The loop we are testing
0073     if constexpr (rangeType == RangeType::Default)
0074       for ([[maybe_unused]] auto idx : uniform_elements(acc))
0075         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0076     else if constexpr (rangeType == RangeType::ExtentLimited)
0077       for ([[maybe_unused]] auto idx : uniform_elements(acc, size))
0078         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0079     else if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0080       for ([[maybe_unused]] auto idx : uniform_elements(acc, skip, size))
0081         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0082     alpaka::syncBlockThreads(acc);
0083     // Check the result. Grid range will check by memcpy-ing the result.
0084     if constexpr (loopScope == LoopScope::Block) {
0085       if (firstInLoopRange<loopScope>(acc)) {
0086         auto expected = expectedCount<rangeType, loopScope>(acc, skip, size);
0087         assert(counter == expected);
0088       }
0089     }
0090   }
0091 };
0092 
0093 int main() {
0094   // get the list of devices on the current platform
0095   auto const& devices = cms::alpakatools::devices<Platform>();
0096   if (devices.empty()) {
0097     std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0098       "the test will be skipped.\n";
0099     exit(EXIT_FAILURE);
0100   }
0101 
0102   for (auto const& device : devices) {
0103     // Get global memory
0104     Queue queue(device);
0105     auto counter_d = cms::alpakatools::make_device_buffer<size_t>(queue);
0106     auto counter_h = cms::alpakatools::make_host_buffer<size_t>(queue);
0107     alpaka::memset(queue, counter_d, 0);
0108     ssize_t BlockSize = 512;
0109     size_t GridSize = 4;
0110     for (size_t blocks = 1; blocks < GridSize * 3; blocks++)
0111       for (auto sizeFuzz :
0112            std::initializer_list<ssize_t>{-10 * BlockSize / 13, -BlockSize / 2, -1, 0, 1, BlockSize / 2})
0113         for (auto skip : std::initializer_list<ssize_t>{0,
0114                                                         1,
0115                                                         BlockSize / 2,
0116                                                         BlockSize - 1,
0117                                                         BlockSize,
0118                                                         BlockSize + 1,
0119                                                         BlockSize + BlockSize / 2,
0120                                                         2 * BlockSize - 1,
0121                                                         2 * BlockSize,
0122                                                         2 * BlockSize + 1}) {
0123           // Grid level iteration: we need to initialize/check at the grid level
0124           // Default range
0125           alpaka::memset(queue, counter_d, 0);
0126           auto workdiv = make_workdiv<Acc1D>(BlockSize, GridSize);
0127           alpaka::enqueue(
0128               queue,
0129               alpaka::createTaskKernel<Acc1D>(workdiv,
0130                                               testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0131                                               blocks * BlockSize + sizeFuzz,
0132                                               skip,
0133                                               counter_d.data()));
0134           alpaka::memcpy(queue, counter_h, counter_d);
0135           alpaka::wait(queue);
0136           auto expected =
0137               expectedCount<RangeType::Default, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0138           assert(*counter_h.data() == expected);
0139 
0140           // ExtentLimited range
0141           alpaka::memset(queue, counter_d, 0);
0142           alpaka::enqueue(
0143               queue,
0144               alpaka::createTaskKernel<Acc1D>(workdiv,
0145                                               testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0146                                               blocks * BlockSize + sizeFuzz,
0147                                               skip,
0148                                               counter_d.data()));
0149           alpaka::memcpy(queue, counter_h, counter_d);
0150           alpaka::wait(queue);
0151           expected =
0152               expectedCount<RangeType::ExtentLimited, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0153           assert(*counter_h.data() == expected);
0154 
0155           // ExtentLimitedWithShift range
0156           alpaka::memset(queue, counter_d, 0);
0157           alpaka::enqueue(queue,
0158                           alpaka::createTaskKernel<Acc1D>(
0159                               workdiv,
0160                               testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0161                               blocks * BlockSize + sizeFuzz,
0162                               skip,
0163                               counter_d.data()));
0164           alpaka::memcpy(queue, counter_h, counter_d);
0165           alpaka::wait(queue);
0166           expected = expectedCount<RangeType::ExtentLimitedWithShift, LoopScope::Grid>(
0167               workdiv, skip, blocks * BlockSize + sizeFuzz);
0168           assert(*counter_h.data() == expected);
0169 
0170           // Block level auto tests
0171           alpaka::enqueue(
0172               queue,
0173               alpaka::createTaskKernel<Acc1D>(workdiv,
0174                                               testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0175                                               blocks * BlockSize + sizeFuzz,
0176                                               skip,
0177                                               counter_d.data()));
0178           alpaka::enqueue(
0179               queue,
0180               alpaka::createTaskKernel<Acc1D>(workdiv,
0181                                               testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0182                                               blocks * BlockSize + sizeFuzz,
0183                                               skip,
0184                                               counter_d.data()));
0185           alpaka::enqueue(queue,
0186                           alpaka::createTaskKernel<Acc1D>(
0187                               workdiv,
0188                               testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0189                               blocks * BlockSize + sizeFuzz,
0190                               skip,
0191                               counter_d.data()));
0192         }
0193     alpaka::wait(queue);
0194   }
0195 }