Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-11 23:28:00

0001 #include <cstdint>
0002 #include <random>
0003 #include <vector>
0004 
0005 #define CATCH_CONFIG_MAIN
0006 #include <catch.hpp>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h"
0015 
0016 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0017 
0018 TEST_CASE("HeterogeneousTest/AlpakaKernel test", "[alpakaTestDeviceAdditionKernel]") {
0019   auto const& devices = cms::alpakatools::devices<Platform>();
0020   if (devices.empty()) {
0021     FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0022         "the test will be skipped.");
0023   }
0024 
0025   // random number generator with a gaussian distribution
0026   std::random_device rd{};
0027   std::default_random_engine rand{rd()};
0028   std::normal_distribution<float> dist{0., 1.};
0029 
0030   // tolerance
0031   constexpr float epsilon = 0.000001;
0032 
0033   // buffer size
0034   constexpr uint32_t size = 1024 * 1024;
0035 
0036   // allocate input and output host buffers
0037   std::vector<float> in1_h(size);
0038   std::vector<float> in2_h(size);
0039   std::vector<float> out_h(size);
0040 
0041   // fill the input buffers with random data, and the output buffer with zeros
0042   for (uint32_t i = 0; i < size; ++i) {
0043     in1_h[i] = dist(rand);
0044     in2_h[i] = dist(rand);
0045     out_h[i] = 0.;
0046   }
0047 
0048   // run the test on all available devices
0049   for (auto const& device : cms::alpakatools::devices<Platform>()) {
0050     SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") {
0051       REQUIRE_NOTHROW([&]() {
0052         Queue queue{device};
0053 
0054         // allocate input and output buffers on the device
0055         auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0056         auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0057         auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
0058 
0059         // copy the input data to the device
0060         // FIXME: pass the explicit size of type uint32_t to avoid compilation error
0061         // The destination view and the extent are required to have compatible index types!
0062         alpaka::memcpy(queue, in1_d, in1_h, size);
0063         alpaka::memcpy(queue, in2_d, in2_h, size);
0064 
0065         // fill the output buffer with zeros
0066         alpaka::memset(queue, out_d, 0);
0067 
0068         // launch the 1-dimensional kernel for vector addition
0069         alpaka::exec<Acc1D>(queue,
0070                             cms::alpakatools::make_workdiv<Acc1D>(32, 32),
0071                             test::KernelAddVectorsF{},
0072                             in1_d.data(),
0073                             in2_d.data(),
0074                             out_d.data(),
0075                             size);
0076 
0077         // copy the results from the device to the host
0078         alpaka::memcpy(queue, out_h, out_d, size);
0079 
0080         // wait for all the operations to complete
0081         alpaka::wait(queue);
0082       }());
0083 
0084       // check the results
0085       for (uint32_t i = 0; i < size; ++i) {
0086         float sum = in1_h[i] + in2_h[i];
0087         CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
0088       }
0089     }
0090   }
0091 }