Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:41

0001 #include <cassert>
0002 #include <iostream>
0003 
0004 #define CATCH_CONFIG_MAIN
0005 #include <catch.hpp>
0006 
0007 #include <alpaka/alpaka.hpp>
0008 
0009 #include "FWCore/Utilities/interface/stringize.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0014 
0015 using namespace cms::alpakatools;
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017 
0018 static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair) "]";
0019 
0020 struct update {
0021   template <typename TAcc>
0022   ALPAKA_FN_ACC void operator()(
0023       const TAcc &acc, AtomicPairCounter *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0024     for (auto i : uniform_elements(acc, n)) {
0025       auto m = i % 11;
0026       m = m % 6 + 1;  // max 6, no 0
0027       auto c = dc->inc_add(acc, m);
0028       assert(c.first < n);
0029       ind[c.first] = c.second;
0030       for (uint32_t j = c.second; j < c.second + m; ++j)
0031         cont[j] = i;
0032     }
0033   }
0034 };
0035 
0036 struct finalize {
0037   template <typename TAcc>
0038   ALPAKA_FN_ACC void operator()(
0039       const TAcc &acc, AtomicPairCounter const *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0040     assert(dc->get().first == n);
0041     ind[n] = dc->get().second;
0042   }
0043 };
0044 
0045 TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair), s_tag) {
0046   SECTION("AtomicPairCounter") {
0047     auto const &devices = cms::alpakatools::devices<Platform>();
0048     if (devices.empty()) {
0049       FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "backend, "
0050            "the test will be skipped.");
0051     }
0052 
0053     // run the test on each device
0054     for (auto const &device : devices) {
0055       std::cout << "Test AtomicPairCounter on " << alpaka::getName(device) << '\n';
0056       auto queue = Queue(device);
0057 
0058       auto c_d = make_device_buffer<AtomicPairCounter>(queue);
0059       alpaka::memset(queue, c_d, 0);
0060 
0061       std::cout << "- size " << sizeof(AtomicPairCounter) << std::endl;
0062 
0063       constexpr uint32_t N = 20000;
0064       constexpr uint32_t M = N * 6;
0065       auto n_d = make_device_buffer<uint32_t[]>(queue, N);
0066       auto m_d = make_device_buffer<uint32_t[]>(queue, M);
0067 
0068       constexpr uint32_t NUM_VALUES = 10000;
0069 
0070       // Update
0071       const auto blocksPerGrid = 2000u;
0072       const auto threadsPerBlockOrElementsPerThread = 512u;
0073       const auto workDiv = make_workdiv<Acc1D>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0074       alpaka::enqueue(
0075           queue, alpaka::createTaskKernel<Acc1D>(workDiv, update(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0076 
0077       // Finalize
0078       const auto blocksPerGridFinalize = 1u;
0079       const auto threadsPerBlockOrElementsPerThreadFinalize = 1u;
0080       const auto workDivFinalize =
0081           make_workdiv<Acc1D>(blocksPerGridFinalize, threadsPerBlockOrElementsPerThreadFinalize);
0082       alpaka::enqueue(
0083           queue,
0084           alpaka::createTaskKernel<Acc1D>(workDivFinalize, finalize(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0085 
0086       auto c_h = make_host_buffer<AtomicPairCounter>(queue);
0087       auto n_h = make_host_buffer<uint32_t[]>(queue, N);
0088       auto m_h = make_host_buffer<uint32_t[]>(queue, M);
0089 
0090       // copy the results from the device to the host
0091       alpaka::memcpy(queue, c_h, c_d);
0092       alpaka::memcpy(queue, n_h, n_d);
0093       alpaka::memcpy(queue, m_h, m_d);
0094 
0095       // wait for all the operations to complete
0096       alpaka::wait(queue);
0097 
0098       REQUIRE(c_h.data()->get().first == NUM_VALUES);
0099       REQUIRE(n_h[NUM_VALUES] == c_h.data()->get().second);
0100       REQUIRE(n_h[0] == 0);
0101 
0102       for (size_t i = 0; i < NUM_VALUES; ++i) {
0103         auto ib = n_h.data()[i];
0104         auto ie = n_h.data()[i + 1];
0105         auto k = m_h.data()[ib++];
0106         REQUIRE(k < NUM_VALUES);
0107 
0108         for (; ib < ie; ++ib)
0109           REQUIRE(m_h.data()[ib] == k);
0110       }
0111     }
0112   }
0113 }