File indexing completed on 2025-01-22 07:34:10
0001 #include <cassert>
0002 #include <iostream>
0003
0004 #define CATCH_CONFIG_MAIN
0005 #include <catch.hpp>
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009 #include "FWCore/Utilities/interface/stringize.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0014
0015 using namespace cms::alpakatools;
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017
0018 static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair) "]";
0019
0020 struct update {
0021 ALPAKA_FN_ACC void operator()(
0022 const Acc1D &acc, AtomicPairCounter *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0023 for (auto i : uniform_elements(acc, n)) {
0024 auto m = i % 11;
0025 m = m % 6 + 1;
0026 auto c = dc->inc_add(acc, m);
0027 ALPAKA_ASSERT_ACC(c.first < n);
0028 ind[c.first] = c.second;
0029 for (uint32_t j = c.second; j < c.second + m; ++j)
0030 cont[j] = i;
0031 }
0032 }
0033 };
0034
0035 struct finalize {
0036 ALPAKA_FN_ACC void operator()(
0037 const Acc1D &acc, AtomicPairCounter const *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0038 ALPAKA_ASSERT_ACC(dc->get().first == n);
0039 ind[n] = dc->get().second;
0040 }
0041 };
0042
0043 TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair), s_tag) {
0044 SECTION("AtomicPairCounter") {
0045 auto const &devices = cms::alpakatools::devices<Platform>();
0046 if (devices.empty()) {
0047 FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "backend, "
0048 "the test will be skipped.");
0049 }
0050
0051
0052 for (auto const &device : devices) {
0053 std::cout << "Test AtomicPairCounter on " << alpaka::getName(device) << '\n';
0054 auto queue = Queue(device);
0055
0056 auto c_d = make_device_buffer<AtomicPairCounter>(queue);
0057 alpaka::memset(queue, c_d, 0);
0058
0059 std::cout << "- size " << sizeof(AtomicPairCounter) << std::endl;
0060
0061 constexpr uint32_t N = 20000;
0062 constexpr uint32_t M = N * 6;
0063 auto n_d = make_device_buffer<uint32_t[]>(queue, N);
0064 auto m_d = make_device_buffer<uint32_t[]>(queue, M);
0065
0066 constexpr uint32_t NUM_VALUES = 10000;
0067
0068
0069 const auto blocksPerGrid = 2000u;
0070 const auto threadsPerBlockOrElementsPerThread = 512u;
0071 const auto workDiv = make_workdiv<Acc1D>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0072 alpaka::enqueue(
0073 queue, alpaka::createTaskKernel<Acc1D>(workDiv, update(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0074
0075
0076 const auto blocksPerGridFinalize = 1u;
0077 const auto threadsPerBlockOrElementsPerThreadFinalize = 1u;
0078 const auto workDivFinalize =
0079 make_workdiv<Acc1D>(blocksPerGridFinalize, threadsPerBlockOrElementsPerThreadFinalize);
0080 alpaka::enqueue(
0081 queue,
0082 alpaka::createTaskKernel<Acc1D>(workDivFinalize, finalize(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0083
0084 auto c_h = make_host_buffer<AtomicPairCounter>(queue);
0085 auto n_h = make_host_buffer<uint32_t[]>(queue, N);
0086 auto m_h = make_host_buffer<uint32_t[]>(queue, M);
0087
0088
0089 alpaka::memcpy(queue, c_h, c_d);
0090 alpaka::memcpy(queue, n_h, n_d);
0091 alpaka::memcpy(queue, m_h, m_d);
0092
0093
0094 alpaka::wait(queue);
0095
0096 REQUIRE(c_h.data()->get().first == NUM_VALUES);
0097 REQUIRE(n_h[NUM_VALUES] == c_h.data()->get().second);
0098 REQUIRE(n_h[0] == 0);
0099
0100 for (size_t i = 0; i < NUM_VALUES; ++i) {
0101 auto ib = n_h.data()[i];
0102 auto ie = n_h.data()[i + 1];
0103 auto k = m_h.data()[ib++];
0104 REQUIRE(k < NUM_VALUES);
0105
0106 for (; ib < ie; ++ib)
0107 REQUIRE(m_h.data()[ib] == k);
0108 }
0109 }
0110 }
0111 }