File indexing completed on 2024-04-11 23:28:00
0001 #include <cstdint>
0002 #include <random>
0003 #include <vector>
0004
0005 #define CATCH_CONFIG_MAIN
0006 #include <catch.hpp>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h"
0015
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017
0018 struct KernelAddVectorsF {
0019 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0020 const float* __restrict__ in1,
0021 const float* __restrict__ in2,
0022 float* __restrict__ out,
0023 uint32_t size) const {
0024 test::add_vectors_f(acc, in1, in2, out, size);
0025 }
0026 };
0027
0028 TEST_CASE("HeterogeneousTest/AlpakaDevice test", "[alpakaTestDeviceAddition]") {
0029 auto const& devices = cms::alpakatools::devices<Platform>();
0030 if (devices.empty()) {
0031 FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0032 "the test will be skipped.");
0033 }
0034
0035
0036 std::random_device rd{};
0037 std::default_random_engine rand{rd()};
0038 std::normal_distribution<float> dist{0., 1.};
0039
0040
0041 constexpr float epsilon = 0.000001;
0042
0043
0044 constexpr uint32_t size = 1024 * 1024;
0045
0046
0047 std::vector<float> in1_h(size);
0048 std::vector<float> in2_h(size);
0049 std::vector<float> out_h(size);
0050
0051
0052 for (uint32_t i = 0; i < size; ++i) {
0053 in1_h[i] = dist(rand);
0054 in2_h[i] = dist(rand);
0055 out_h[i] = 0.;
0056 }
0057
0058
0059 for (auto const& device : cms::alpakatools::devices<Platform>()) {
0060 SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") {
0061 REQUIRE_NOTHROW([&]() {
0062 Queue queue{device};
0063
0064
0065 auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0066 auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0067 auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0068
0069
0070
0071
0072 alpaka::memcpy(queue, in1_d, in1_h, size);
0073 alpaka::memcpy(queue, in2_d, in2_h, size);
0074
0075
0076 alpaka::memset(queue, out_d, 0);
0077
0078
0079 alpaka::exec<Acc1D>(queue,
0080 cms::alpakatools::make_workdiv<Acc1D>(32, 32),
0081 KernelAddVectorsF{},
0082 in1_d.data(),
0083 in2_d.data(),
0084 out_d.data(),
0085 size);
0086
0087
0088 alpaka::memcpy(queue, out_h, out_d, size);
0089
0090
0091 alpaka::wait(queue);
0092 }());
0093
0094
0095 for (uint32_t i = 0; i < size; ++i) {
0096 float sum = in1_h[i] + in2_h[i];
0097 CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
0098 }
0099 }
0100 }
0101 }