Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-01-22 07:34:11

0001 #include <cstdint>
0002 #include <vector>
0003 
0004 #include <alpaka/alpaka.hpp>
0005 
0006 #include "FWCore/Utilities/interface/stringize.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0011 
0012 using namespace cms::alpakatools;
0013 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0014 
0015 // Kernel running a loop over threads/elements
0016 // One function with multiple flavors
0017 
0018 // The type of uniform_elements
0019 enum class RangeType { Default, ExtentLimited, ExtentLimitedWithShift };
0020 
0021 // The concurrency scope between threads
0022 enum class LoopScope { Block, Grid };
0023 
0024 template <RangeType rangeType, LoopScope loopScope>
0025 size_t constexpr expectedCount(WorkDiv1D const& workDiv, size_t skip, size_t size) {
0026   if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0027     return skip < size ? size - skip : 0;
0028   else if constexpr (rangeType == RangeType::ExtentLimited)
0029     return size;
0030   else /* rangeType == RangeType::Default */
0031     if constexpr (loopScope == LoopScope::Block)
0032       return workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0033     else
0034       return workDiv.m_gridBlockExtent[0u] * workDiv.m_blockThreadExtent[0u] * workDiv.m_threadElemExtent[0u];
0035 }
0036 
0037 template <RangeType rangeType, LoopScope loopScope>
0038 struct testWordDivisionDefaultRange {
0039   ALPAKA_FN_ACC void operator()(Acc1D const& acc, size_t size, size_t skip, size_t* globalCounter) const {
0040     size_t& counter =
0041         (loopScope == LoopScope::Grid ? *globalCounter : alpaka::declareSharedVar<size_t, __COUNTER__>(acc));
0042     // Init the counter for block range. Grid range does so my mean of memset.
0043     if constexpr (loopScope == LoopScope::Block) {
0044       if (cms::alpakatools::once_per_block(acc)) {
0045         counter = 0;
0046       }
0047       alpaka::syncBlockThreads(acc);
0048     }
0049     // The loop we are testing
0050     if constexpr (rangeType == RangeType::Default)
0051       for ([[maybe_unused]] auto idx : uniform_elements(acc))
0052         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0053     else if constexpr (rangeType == RangeType::ExtentLimited)
0054       for ([[maybe_unused]] auto idx : uniform_elements(acc, size))
0055         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0056     else if constexpr (rangeType == RangeType::ExtentLimitedWithShift)
0057       for ([[maybe_unused]] auto idx : uniform_elements(acc, skip, size))
0058         alpaka::atomicAdd(acc, &counter, 1ul, alpaka::hierarchy::Blocks{});
0059     alpaka::syncBlockThreads(acc);
0060     // Check the result. Grid range will check by memcpy-ing the result.
0061     if constexpr (loopScope == LoopScope::Block) {
0062       if (cms::alpakatools::once_per_block(acc)) {
0063         auto expected = expectedCount<rangeType, loopScope>(acc, skip, size);
0064         ALPAKA_ASSERT_ACC(counter == expected);
0065       }
0066     }
0067   }
0068 };
0069 
0070 int main() {
0071   // get the list of devices on the current platform
0072   auto const& devices = cms::alpakatools::devices<Platform>();
0073   if (devices.empty()) {
0074     std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0075       "the test will be skipped.\n";
0076     exit(EXIT_FAILURE);
0077   }
0078 
0079   for (auto const& device : devices) {
0080     // Get global memory
0081     Queue queue(device);
0082     auto counter_d = cms::alpakatools::make_device_buffer<size_t>(queue);
0083     auto counter_h = cms::alpakatools::make_host_buffer<size_t>(queue);
0084     alpaka::memset(queue, counter_d, 0);
0085     ssize_t BlockSize = 512;
0086     size_t GridSize = 4;
0087     for (size_t blocks = 1; blocks < GridSize * 3; blocks++)
0088       for (auto sizeFuzz :
0089            std::initializer_list<ssize_t>{-10 * BlockSize / 13, -BlockSize / 2, -1, 0, 1, BlockSize / 2})
0090         for (auto skip : std::initializer_list<ssize_t>{0,
0091                                                         1,
0092                                                         BlockSize / 2,
0093                                                         BlockSize - 1,
0094                                                         BlockSize,
0095                                                         BlockSize + 1,
0096                                                         BlockSize + BlockSize / 2,
0097                                                         2 * BlockSize - 1,
0098                                                         2 * BlockSize,
0099                                                         2 * BlockSize + 1}) {
0100           // Grid level iteration: we need to initialize/check at the grid level
0101           // Default range
0102           alpaka::memset(queue, counter_d, 0);
0103           auto workdiv = make_workdiv<Acc1D>(BlockSize, GridSize);
0104           alpaka::enqueue(
0105               queue,
0106               alpaka::createTaskKernel<Acc1D>(workdiv,
0107                                               testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0108                                               blocks * BlockSize + sizeFuzz,
0109                                               skip,
0110                                               counter_d.data()));
0111           alpaka::memcpy(queue, counter_h, counter_d);
0112           alpaka::wait(queue);
0113           auto expected =
0114               expectedCount<RangeType::Default, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0115           assert(*counter_h.data() == expected);
0116 
0117           // ExtentLimited range
0118           alpaka::memset(queue, counter_d, 0);
0119           alpaka::enqueue(
0120               queue,
0121               alpaka::createTaskKernel<Acc1D>(workdiv,
0122                                               testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0123                                               blocks * BlockSize + sizeFuzz,
0124                                               skip,
0125                                               counter_d.data()));
0126           alpaka::memcpy(queue, counter_h, counter_d);
0127           alpaka::wait(queue);
0128           expected =
0129               expectedCount<RangeType::ExtentLimited, LoopScope::Grid>(workdiv, skip, blocks * BlockSize + sizeFuzz);
0130           assert(*counter_h.data() == expected);
0131 
0132           // ExtentLimitedWithShift range
0133           alpaka::memset(queue, counter_d, 0);
0134           alpaka::enqueue(queue,
0135                           alpaka::createTaskKernel<Acc1D>(
0136                               workdiv,
0137                               testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0138                               blocks * BlockSize + sizeFuzz,
0139                               skip,
0140                               counter_d.data()));
0141           alpaka::memcpy(queue, counter_h, counter_d);
0142           alpaka::wait(queue);
0143           expected = expectedCount<RangeType::ExtentLimitedWithShift, LoopScope::Grid>(
0144               workdiv, skip, blocks * BlockSize + sizeFuzz);
0145           assert(*counter_h.data() == expected);
0146 
0147           // Block level auto tests
0148           alpaka::enqueue(
0149               queue,
0150               alpaka::createTaskKernel<Acc1D>(workdiv,
0151                                               testWordDivisionDefaultRange<RangeType::Default, LoopScope::Grid>{},
0152                                               blocks * BlockSize + sizeFuzz,
0153                                               skip,
0154                                               counter_d.data()));
0155           alpaka::enqueue(
0156               queue,
0157               alpaka::createTaskKernel<Acc1D>(workdiv,
0158                                               testWordDivisionDefaultRange<RangeType::ExtentLimited, LoopScope::Grid>{},
0159                                               blocks * BlockSize + sizeFuzz,
0160                                               skip,
0161                                               counter_d.data()));
0162           alpaka::enqueue(queue,
0163                           alpaka::createTaskKernel<Acc1D>(
0164                               workdiv,
0165                               testWordDivisionDefaultRange<RangeType::ExtentLimitedWithShift, LoopScope::Grid>{},
0166                               blocks * BlockSize + sizeFuzz,
0167                               skip,
0168                               counter_d.data()));
0169         }
0170     alpaka::wait(queue);
0171   }
0172 }