Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-10-02 22:46:44

0001 // Check that ALPAKA_HOST_ONLY is not defined during device compilation:
0002 #ifdef ALPAKA_HOST_ONLY
0003 #error ALPAKA_HOST_ONLY defined in device compilation
0004 #endif
0005 
0006 #include <alpaka/alpaka.hpp>
0007 
0008 #include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
0011 
0012 #include "TestAlgo.h"
0013 
0014 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0015 
0016   class TestAlgoKernel {
0017   public:
0018     template <typename TAcc, typename = std::enable_if_t<cms::alpakatools::is_accelerator_v<TAcc>>>
0019     ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, int32_t size) const {
0020       // this example accepts an arbitrary number of blocks and threads, and always uses 1 element per thread
0021       const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0022       const int32_t stride = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u];
0023       const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
0024 
0025       if (thread == 0) {
0026         view.r() = 1.;
0027       }
0028       for (auto i = thread; i < size; i += stride) {
0029         view[i] = {0., 0., 0., i, matrix * i};
0030       }
0031     }
0032   };
0033 
0034   void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection) const {
0035     auto const& deviceProperties = alpaka::getAccDevProps<Acc1D>(alpaka::getDev(queue));
0036     uint32_t maxThreadsPerBlock = deviceProperties.m_blockThreadExtentMax[0];
0037 
0038     uint32_t threadsPerBlock = maxThreadsPerBlock;
0039     uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock;
0040     uint32_t elementsPerThread = 1;
0041     auto workDiv = WorkDiv1D{blocksPerGrid, threadsPerBlock, elementsPerThread};
0042 
0043     alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), collection->metadata().size());
0044   }
0045 
0046 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE