Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-11 23:28:00

0001 #include <cstdint>
0002 #include <random>
0003 #include <vector>
0004 
0005 #define CATCH_CONFIG_MAIN
0006 #include <catch.hpp>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h"
0015 
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017 
0018 struct KernelAddVectorsF {
0019   ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0020                                 const float* __restrict__ in1,
0021                                 const float* __restrict__ in2,
0022                                 float* __restrict__ out,
0023                                 uint32_t size) const {
0024     test::add_vectors_f(acc, in1, in2, out, size);
0025   }
0026 };
0027 
0028 TEST_CASE("HeterogeneousTest/AlpakaDevice test", "[alpakaTestDeviceAddition]") {
0029   auto const& devices = cms::alpakatools::devices<Platform>();
0030   if (devices.empty()) {
0031     FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0032         "the test will be skipped.");
0033   }
0034 
0035   // random number generator with a gaussian distribution
0036   std::random_device rd{};
0037   std::default_random_engine rand{rd()};
0038   std::normal_distribution<float> dist{0., 1.};
0039 
0040   // tolerance
0041   constexpr float epsilon = 0.000001;
0042 
0043   // buffer size
0044   constexpr uint32_t size = 1024 * 1024;
0045 
0046   // allocate input and output host buffers
0047   std::vector<float> in1_h(size);
0048   std::vector<float> in2_h(size);
0049   std::vector<float> out_h(size);
0050 
0051   // fill the input buffers with random data, and the output buffer with zeros
0052   for (uint32_t i = 0; i < size; ++i) {
0053     in1_h[i] = dist(rand);
0054     in2_h[i] = dist(rand);
0055     out_h[i] = 0.;
0056   }
0057 
0058   // run the test on all available devices
0059   for (auto const& device : cms::alpakatools::devices<Platform>()) {
0060     SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") {
0061       REQUIRE_NOTHROW([&]() {
0062         Queue queue{device};
0063 
0064         // allocate input and output buffers on the device
0065         auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0066         auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0067         auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0068 
0069         // copy the input data to the device
0070         // FIXME: pass the explicit size of type uint32_t to avoid compilation error
0071         // The destination view and the extent are required to have compatible index types!
0072         alpaka::memcpy(queue, in1_d, in1_h, size);
0073         alpaka::memcpy(queue, in2_d, in2_h, size);
0074 
0075         // fill the output buffer with zeros
0076         alpaka::memset(queue, out_d, 0);
0077 
0078         // launch the 1-dimensional kernel for vector addition
0079         alpaka::exec<Acc1D>(queue,
0080                             cms::alpakatools::make_workdiv<Acc1D>(32, 32),
0081                             KernelAddVectorsF{},
0082                             in1_d.data(),
0083                             in2_d.data(),
0084                             out_d.data(),
0085                             size);
0086 
0087         // copy the results from the device to the host
0088         alpaka::memcpy(queue, out_h, out_d, size);
0089 
0090         // wait for all the operations to complete
0091         alpaka::wait(queue);
0092       }());
0093 
0094       // check the results
0095       for (uint32_t i = 0; i < size; ++i) {
0096         float sum = in1_h[i] + in2_h[i];
0097         CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
0098       }
0099     }
0100   }
0101 }