Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-08-15 01:07:43

0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_HostOnlyTask_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_HostOnlyTask_h
0003 
0004 #include <functional>
0005 #include <memory>
0006 
0007 #include <alpaka/alpaka.hpp>
0008 
0009 namespace alpaka {
0010 
0011   //! A task that is guaranted not to call any GPU-ralated APIs
0012   //!
0013   //! These tasks can be enqueued directly to the native GPU queues, without the use of a
0014   //! dedicated host-side worker thread.
0015   class HostOnlyTask {
0016   public:
0017     HostOnlyTask(std::function<void()> task) : task_(std::move(task)) {}
0018 
0019     void operator()() const { task_(); }
0020 
0021   private:
0022     std::function<void()> task_;
0023   };
0024 
0025   namespace trait {
0026 
0027 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
0028     //! The CUDA async queue enqueue trait specialization for "safe tasks"
0029     template <>
0030     struct Enqueue<QueueCudaRtNonBlocking, HostOnlyTask> {
0031       using TApi = ApiCudaRt;
0032 
0033       static void CUDART_CB callback(cudaStream_t /*queue*/, cudaError_t /*status*/, void* arg) {
0034         //ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(status);
0035         std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
0036         (*pTask)();
0037       }
0038 
0039       ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task) -> void {
0040         auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
0041         ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
0042             cudaStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
0043       }
0044     };
0045 #endif  // ALPAKA_ACC_GPU_CUDA_ENABLED
0046 
0047 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
0048     //! The HIP async queue enqueue trait specialization for "safe tasks"
0049     template <>
0050     struct Enqueue<QueueHipRtNonBlocking, HostOnlyTask> {
0051       using TApi = ApiHipRt;
0052 
0053       static void callback(hipStream_t /*queue*/, hipError_t /*status*/, void* arg) {
0054         //ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(status);
0055         std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
0056         (*pTask)();
0057       }
0058 
0059       ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task) -> void {
0060         auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
0061         ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
0062             hipStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
0063       }
0064     };
0065 #endif  // ALPAKA_ACC_GPU_HIP_ENABLED
0066 
0067   }  // namespace trait
0068 
0069 }  // namespace alpaka
0070 
0071 #endif  // HeterogeneousCore_AlpakaInterface_interface_HostOnlyTask_h