Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-11-24 00:02:24

0001 #include "testESAlgoAsync.h"
0002 
0003 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0004   AlpakaESTestDataDDevice testESAlgoAsync(Queue& queue,
0005                                           AlpakaESTestDataA const& dataA,
0006                                           cms::alpakatest::AlpakaESTestDataB<Device> const& dataB) {
0007     auto const size = std::min(dataA.size(), dataB.size());
0008     AlpakaESTestDataDDevice ret(size, queue);
0009 
0010     auto const& deviceProperties = alpaka::getAccDevProps<Acc1D>(alpaka::getDev(queue));
0011     uint32_t maxThreadsPerBlock = deviceProperties.m_blockThreadExtentMax[0];
0012 
0013     uint32_t threadsPerBlock = maxThreadsPerBlock;
0014     uint32_t blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
0015     uint32_t elementsPerThread = 1;
0016     auto workDiv = WorkDiv1D{blocksPerGrid, threadsPerBlock, elementsPerThread};
0017 
0018     alpaka::exec<Acc1D>(
0019         queue,
0020         workDiv,
0021         [] ALPAKA_FN_ACC(Acc1D const& acc, int const* a, int const* b, AlpakaESTestDataDDevice::View c, int size) {
0022           const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0023           const int32_t stride = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u];
0024           for (auto i = thread; i < size; i += stride) {
0025             c[i] = a[i] + b[i];
0026           }
0027         },
0028         dataA.data(),
0029         dataB.data(),
0030         ret.view(),
0031         size);
0032 
0033     return ret;
0034   }
0035 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE