File indexing completed on 2024-04-06 12:15:42
0001 #include "testESAlgoAsync.h"
0002
0003 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0004 AlpakaESTestDataDDevice testESAlgoAsync(Queue& queue,
0005 AlpakaESTestDataADevice const& dataA,
0006 cms::alpakatest::AlpakaESTestDataB<Device> const& dataB) {
0007 auto const size = std::min(dataA->metadata().size(), static_cast<int>(dataB.size()));
0008 AlpakaESTestDataDDevice ret(size, queue);
0009
0010 auto const& deviceProperties = alpaka::getAccDevProps<Acc1D>(alpaka::getDev(queue));
0011 uint32_t maxThreadsPerBlock = deviceProperties.m_blockThreadExtentMax[0];
0012
0013 uint32_t threadsPerBlock = maxThreadsPerBlock;
0014 uint32_t blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
0015 uint32_t elementsPerThread = 1;
0016 auto workDiv = WorkDiv1D{blocksPerGrid, threadsPerBlock, elementsPerThread};
0017
0018 alpaka::exec<Acc1D>(
0019 queue,
0020 workDiv,
0021 [] ALPAKA_FN_ACC(Acc1D const& acc,
0022 AlpakaESTestDataADevice::ConstView a,
0023 int const* b,
0024 AlpakaESTestDataDDevice::View c,
0025 int size) {
0026 const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0027 const int32_t stride = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u];
0028 for (auto i = thread; i < size; i += stride) {
0029 c[i] = a.z()[i] + b[i];
0030 }
0031 },
0032 dataA.view(),
0033 dataB.data(),
0034 ret.view(),
0035 size);
0036
0037 return ret;
0038 }
0039 }