File indexing completed on 2024-04-06 12:15:41
0001 #include <cstdio>
0002 #include <random>
0003
0004 #include <alpaka/alpaka.hpp>
0005
0006 #define CATCH_CONFIG_MAIN
0007 #include <catch.hpp>
0008
0009 #include "FWCore/Utilities/interface/stringize.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013
0014
0015 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0016
0017
0018
0019
0020 struct IndependentWorkKernel {
0021 template <typename TAcc, typename T>
0022 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0023 T const* __restrict__ in,
0024 T* __restrict__ out,
0025 size_t const* __restrict__ indices,
0026 size_t groups) const {
0027 for (auto group : cms::alpakatools::independent_groups(acc, groups)) {
0028 size_t first = indices[group];
0029 size_t last = indices[group + 1];
0030 size_t size = last - first;
0031 for (auto index : cms::alpakatools::independent_group_elements(acc, size)) {
0032 out[first + index] = in[first + index] + group;
0033 }
0034 }
0035 }
0036 };
0037
0038
0039
0040 template <typename TKernel>
0041 void testIndependentWorkKernel(size_t groups, size_t grid_size, size_t block_size, TKernel kernel) {
0042
0043 std::random_device rd{};
0044 std::default_random_engine engine{rd()};
0045
0046
0047 std::uniform_int_distribution<size_t> random_size{100, 201};
0048
0049
0050 std::normal_distribution<float> dist{0., 1.};
0051
0052
0053 std::vector<size_t> sizes(groups);
0054 auto indices_h = cms::alpakatools::make_host_buffer<size_t[], Platform>(groups + 1);
0055 indices_h[0] = 0;
0056 for (size_t i = 0; i < groups; ++i) {
0057 auto size = random_size(engine);
0058 sizes[i] = size;
0059 indices_h[i + 1] = indices_h[i] + size;
0060 }
0061
0062
0063 constexpr float epsilon = 0.000001;
0064
0065
0066 const size_t size = indices_h[groups];
0067
0068
0069 auto in_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
0070 auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
0071
0072
0073 for (size_t i = 0; i < size; ++i) {
0074 in_h[i] = dist(engine);
0075 out_h[i] = 0;
0076 }
0077
0078
0079 for (auto const& device : cms::alpakatools::devices<Platform>()) {
0080 std::cout << "Test IndependentWorkKernel on " << alpaka::getName(device) << " over " << size << " elements in "
0081 << groups << " independent groups with " << grid_size << " blocks of " << block_size << " elements\n";
0082 auto queue = Queue(device);
0083
0084
0085 auto indices_d = cms::alpakatools::make_device_buffer<size_t[]>(queue, groups + 1);
0086 auto in_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0087 auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0088
0089
0090 alpaka::memcpy(queue, indices_d, indices_h);
0091 alpaka::memcpy(queue, in_d, in_h);
0092
0093
0094 alpaka::memset(queue, out_d, 0.);
0095
0096
0097 auto div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
0098 alpaka::exec<Acc1D>(queue, div, kernel, in_d.data(), out_d.data(), indices_d.data(), groups);
0099
0100
0101 alpaka::memcpy(queue, out_h, out_d);
0102
0103
0104 alpaka::wait(queue);
0105
0106
0107 for (size_t g = 0; g < groups; ++g) {
0108 size_t first = indices_h[g];
0109 size_t last = indices_h[g + 1];
0110 for (size_t i = first; i < last; ++i) {
0111 float sum = in_h[i] + g;
0112 float delta = std::max(std::fabs(sum) * epsilon, epsilon);
0113 REQUIRE(out_h[i] < sum + delta);
0114 REQUIRE(out_h[i] > sum - delta);
0115 }
0116 }
0117 }
0118 }
0119
0120 TEST_CASE("Test alpaka kernels for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend",
0121 "[" EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "]") {
0122 SECTION("Independent work groups") {
0123
0124 auto const& devices = cms::alpakatools::devices<Platform>();
0125 if (devices.empty()) {
0126 FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0127 "the test will be skipped.");
0128 }
0129
0130
0131
0132 std::cout << "Test independent work kernel with small block size, using scalar dimensions\n";
0133 testIndependentWorkKernel(100, 32, 32, IndependentWorkKernel{});
0134
0135
0136
0137 std::cout << "Test independent work kernel with large block size, using scalar dimensions\n";
0138 testIndependentWorkKernel(100, 1, 1024, IndependentWorkKernel{});
0139
0140
0141
0142 std::cout << "Test independent work kernel with large block size, using scalar dimensions\n";
0143 testIndependentWorkKernel(100, 1024, 1024, IndependentWorkKernel{});
0144 }
0145 }