File indexing completed on 2024-04-23 22:56:17
0001 #include <cassert>
0002 #include <iostream>
0003
0004 #define CATCH_CONFIG_MAIN
0005 #include <catch.hpp>
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009 #include "FWCore/Utilities/interface/stringize.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0014
0015 using namespace cms::alpakatools;
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017
0018 static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair) "]";
0019
0020 struct update {
0021 template <typename TAcc>
0022 ALPAKA_FN_ACC void operator()(
0023 const TAcc &acc, AtomicPairCounter *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0024 for (auto i : uniform_elements(acc, n)) {
0025 auto m = i % 11;
0026 m = m % 6 + 1;
0027 auto c = dc->inc_add(acc, m);
0028 ALPAKA_ASSERT_ACC(c.first < n);
0029 ind[c.first] = c.second;
0030 for (uint32_t j = c.second; j < c.second + m; ++j)
0031 cont[j] = i;
0032 }
0033 }
0034 };
0035
0036 struct finalize {
0037 template <typename TAcc>
0038 ALPAKA_FN_ACC void operator()(
0039 const TAcc &acc, AtomicPairCounter const *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const {
0040 ALPAKA_ASSERT_ACC(dc->get().first == n);
0041 ind[n] = dc->get().second;
0042 }
0043 };
0044
0045 TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestAtomicPair), s_tag) {
0046 SECTION("AtomicPairCounter") {
0047 auto const &devices = cms::alpakatools::devices<Platform>();
0048 if (devices.empty()) {
0049 FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "backend, "
0050 "the test will be skipped.");
0051 }
0052
0053
0054 for (auto const &device : devices) {
0055 std::cout << "Test AtomicPairCounter on " << alpaka::getName(device) << '\n';
0056 auto queue = Queue(device);
0057
0058 auto c_d = make_device_buffer<AtomicPairCounter>(queue);
0059 alpaka::memset(queue, c_d, 0);
0060
0061 std::cout << "- size " << sizeof(AtomicPairCounter) << std::endl;
0062
0063 constexpr uint32_t N = 20000;
0064 constexpr uint32_t M = N * 6;
0065 auto n_d = make_device_buffer<uint32_t[]>(queue, N);
0066 auto m_d = make_device_buffer<uint32_t[]>(queue, M);
0067
0068 constexpr uint32_t NUM_VALUES = 10000;
0069
0070
0071 const auto blocksPerGrid = 2000u;
0072 const auto threadsPerBlockOrElementsPerThread = 512u;
0073 const auto workDiv = make_workdiv<Acc1D>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0074 alpaka::enqueue(
0075 queue, alpaka::createTaskKernel<Acc1D>(workDiv, update(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0076
0077
0078 const auto blocksPerGridFinalize = 1u;
0079 const auto threadsPerBlockOrElementsPerThreadFinalize = 1u;
0080 const auto workDivFinalize =
0081 make_workdiv<Acc1D>(blocksPerGridFinalize, threadsPerBlockOrElementsPerThreadFinalize);
0082 alpaka::enqueue(
0083 queue,
0084 alpaka::createTaskKernel<Acc1D>(workDivFinalize, finalize(), c_d.data(), n_d.data(), m_d.data(), NUM_VALUES));
0085
0086 auto c_h = make_host_buffer<AtomicPairCounter>(queue);
0087 auto n_h = make_host_buffer<uint32_t[]>(queue, N);
0088 auto m_h = make_host_buffer<uint32_t[]>(queue, M);
0089
0090
0091 alpaka::memcpy(queue, c_h, c_d);
0092 alpaka::memcpy(queue, n_h, n_d);
0093 alpaka::memcpy(queue, m_h, m_d);
0094
0095
0096 alpaka::wait(queue);
0097
0098 REQUIRE(c_h.data()->get().first == NUM_VALUES);
0099 REQUIRE(n_h[NUM_VALUES] == c_h.data()->get().second);
0100 REQUIRE(n_h[0] == 0);
0101
0102 for (size_t i = 0; i < NUM_VALUES; ++i) {
0103 auto ib = n_h.data()[i];
0104 auto ie = n_h.data()[i + 1];
0105 auto k = m_h.data()[ib++];
0106 REQUIRE(k < NUM_VALUES);
0107
0108 for (; ib < ie; ++ib)
0109 REQUIRE(m_h.data()[ib] == k);
0110 }
0111 }
0112 }
0113 }