Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-10-25 09:50:24

0001 #include "testESAlgoAsync.h"
0002 
0003 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0004   AlpakaESTestDataDDevice testESAlgoAsync(Queue& queue,
0005                                           AlpakaESTestDataADevice const& dataA,
0006                                           cms::alpakatest::AlpakaESTestDataB<Device> const& dataB) {
0007     auto const size = std::min(dataA->metadata().size(), static_cast<int>(dataB.size()));
0008     AlpakaESTestDataDDevice ret(size, queue);
0009 
0010     auto const& deviceProperties = alpaka::getAccDevProps<Acc1D>(alpaka::getDev(queue));
0011     uint32_t maxThreadsPerBlock = deviceProperties.m_blockThreadExtentMax[0];
0012 
0013     uint32_t threadsPerBlock = maxThreadsPerBlock;
0014     uint32_t blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
0015     uint32_t elementsPerThread = 1;
0016     auto workDiv = WorkDiv1D{blocksPerGrid, threadsPerBlock, elementsPerThread};
0017 
0018     alpaka::exec<Acc1D>(
0019         queue,
0020         workDiv,
0021         [] ALPAKA_FN_ACC(Acc1D const& acc,
0022                          AlpakaESTestDataADevice::ConstView a,
0023                          int const* b,
0024                          AlpakaESTestDataDDevice::View c,
0025                          int size) {
0026           const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0027           const int32_t stride = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u];
0028           for (auto i = thread; i < size; i += stride) {
0029             c[i] = a.z()[i] + b[i];
0030           }
0031         },
0032         dataA.view(),
0033         dataB.data(),
0034         ret.view(),
0035         size);
0036 
0037     return ret;
0038   }
0039 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE