File indexing completed on 2022-11-24 00:02:24
0001 #include "testESAlgoAsync.h"
0002
0003 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0004 AlpakaESTestDataDDevice testESAlgoAsync(Queue& queue,
0005 AlpakaESTestDataA const& dataA,
0006 cms::alpakatest::AlpakaESTestDataB<Device> const& dataB) {
0007 auto const size = std::min(dataA.size(), dataB.size());
0008 AlpakaESTestDataDDevice ret(size, queue);
0009
0010 auto const& deviceProperties = alpaka::getAccDevProps<Acc1D>(alpaka::getDev(queue));
0011 uint32_t maxThreadsPerBlock = deviceProperties.m_blockThreadExtentMax[0];
0012
0013 uint32_t threadsPerBlock = maxThreadsPerBlock;
0014 uint32_t blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
0015 uint32_t elementsPerThread = 1;
0016 auto workDiv = WorkDiv1D{blocksPerGrid, threadsPerBlock, elementsPerThread};
0017
0018 alpaka::exec<Acc1D>(
0019 queue,
0020 workDiv,
0021 [] ALPAKA_FN_ACC(Acc1D const& acc, int const* a, int const* b, AlpakaESTestDataDDevice::View c, int size) {
0022 const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0023 const int32_t stride = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u];
0024 for (auto i = thread; i < size; i += stride) {
0025 c[i] = a[i] + b[i];
0026 }
0027 },
0028 dataA.data(),
0029 dataB.data(),
0030 ret.view(),
0031 size);
0032
0033 return ret;
0034 }
0035 }