Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-01-22 07:34:10

0001 #include <cassert>
0002 #include <iostream>
0003 
0004 #define CATCH_CONFIG_MAIN
0005 #include <catch.hpp>
0006 
0007 #include <alpaka/alpaka.hpp>
0008 
0009 #include "FWCore/Utilities/interface/stringize.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0014 
0015 using namespace cms::alpakatools;
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017 
0018 static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair) "]";
0019 
0020 struct update {
0021   ALPAKA_FN_ACC void operator()(
0022       const Acc1D &acc, AtomicPairCounter *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0023     for (auto i : uniform_elements(acc, n)) {
0024       auto m = i % 11;
0025       m = m % 6 + 1;  // max 6, no 0
0026       auto c = dc->inc_add(acc, m);
0027       ALPAKA_ASSERT_ACC(c.first < n);
0028       ind[c.first] = c.second;
0029       for (uint32_t j = c.second; j < c.second + m; ++j)
0030         cont[j] = i;
0031     }
0032   }
0033 };
0034 
0035 struct finalize {
0036   ALPAKA_FN_ACC void operator()(
0037       const Acc1D &acc, AtomicPairCounter const *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0038     ALPAKA_ASSERT_ACC(dc->get().first == n);
0039     ind[n] = dc->get().second;
0040   }
0041 };
0042 
0043 TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair), s_tag) {
0044   SECTION("AtomicPairCounter") {
0045     auto const &devices = cms::alpakatools::devices<Platform>();
0046     if (devices.empty()) {
0047       FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "backend, "
0048            "the test will be skipped.");
0049     }
0050 
0051     // run the test on each device
0052     for (auto const &device : devices) {
0053       std::cout << "Test AtomicPairCounter on " << alpaka::getName(device) << '\n';
0054       auto queue = Queue(device);
0055 
0056       auto c_d = make_device_buffer<AtomicPairCounter>(queue);
0057       alpaka::memset(queue, c_d, 0);
0058 
0059       std::cout << "- size " << sizeof(AtomicPairCounter) << std::endl;
0060 
0061       constexpr uint32_t N = 20000;
0062       constexpr uint32_t M = N * 6;
0063       auto n_d = make_device_buffer<uint32_t[]>(queue, N);
0064       auto m_d = make_device_buffer<uint32_t[]>(queue, M);
0065 
0066       constexpr uint32_t NUM_VALUES = 10000;
0067 
0068       // Update
0069       const auto blocksPerGrid = 2000u;
0070       const auto threadsPerBlockOrElementsPerThread = 512u;
0071       const auto workDiv = make_workdiv<Acc1D>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0072       alpaka::enqueue(
0073           queue, alpaka::createTaskKernel<Acc1D>(workDiv, update(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0074 
0075       // Finalize
0076       const auto blocksPerGridFinalize = 1u;
0077       const auto threadsPerBlockOrElementsPerThreadFinalize = 1u;
0078       const auto workDivFinalize =
0079           make_workdiv<Acc1D>(blocksPerGridFinalize, threadsPerBlockOrElementsPerThreadFinalize);
0080       alpaka::enqueue(
0081           queue,
0082           alpaka::createTaskKernel<Acc1D>(workDivFinalize, finalize(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0083 
0084       auto c_h = make_host_buffer<AtomicPairCounter>(queue);
0085       auto n_h = make_host_buffer<uint32_t[]>(queue, N);
0086       auto m_h = make_host_buffer<uint32_t[]>(queue, M);
0087 
0088       // copy the results from the device to the host
0089       alpaka::memcpy(queue, c_h, c_d);
0090       alpaka::memcpy(queue, n_h, n_d);
0091       alpaka::memcpy(queue, m_h, m_d);
0092 
0093       // wait for all the operations to complete
0094       alpaka::wait(queue);
0095 
0096       REQUIRE(c_h.data()->get().first == NUM_VALUES);
0097       REQUIRE(n_h[NUM_VALUES] == c_h.data()->get().second);
0098       REQUIRE(n_h[0] == 0);
0099 
0100       for (size_t i = 0; i < NUM_VALUES; ++i) {
0101         auto ib = n_h.data()[i];
0102         auto ie = n_h.data()[i + 1];
0103         auto k = m_h.data()[ib++];
0104         REQUIRE(k < NUM_VALUES);
0105 
0106         for (; ib < ie; ++ib)
0107           REQUIRE(m_h.data()[ib] == k);
0108       }
0109     }
0110   }
0111 }