Enqueue

Enqueue

HostOnlyTask

Macros

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
#ifndef HeterogeneousCore_AlpakaInterface_interface_HostOnlyTask_h
#define HeterogeneousCore_AlpakaInterface_interface_HostOnlyTask_h

#include <functional>
#include <memory>

#include <fmt/format.h>

#include <alpaka/alpaka.hpp>

namespace alpaka {

  //! A task that is guaranted not to call any GPU-ralated APIs
  //!
  //! These tasks can be enqueued directly to the native GPU queues, without the use of a
  //! dedicated host-side worker thread.
  class HostOnlyTask {
  public:
    HostOnlyTask(std::function<void(std::exception_ptr)> task) : task_(std::move(task)) {}

    void operator()(std::exception_ptr eptr) const { task_(eptr); }

  private:
    std::function<void(std::exception_ptr)> task_;
  };

  namespace trait {

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
    //! The CUDA async queue enqueue trait specialization for "safe tasks"
    template <>
    struct Enqueue<QueueCudaRtNonBlocking, HostOnlyTask> {
      using TApi = ApiCudaRt;

      static void CUDART_CB callback(cudaStream_t queue, cudaError_t status, void* arg) {
        std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
        if (status == cudaSuccess) {
          (*pTask)(nullptr);
        } else {
          // wrap the exception in a try-catch block to let GDB "catch throw" break on it
          try {
            throw std::runtime_error(fmt::format("CUDA error: callback of stream {} received error {}: {}.",
                                                 fmt::ptr(queue),
                                                 cudaGetErrorName(status),
                                                 cudaGetErrorString(status)));
          } catch (std::exception&) {
            // pass the exception to the task
            (*pTask)(std::current_exception());
          }
        }
      }

      ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task) -> void {
        auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
            cudaStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
      }
    };
#endif  // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
    //! The HIP async queue enqueue trait specialization for "safe tasks"
    template <>
    struct Enqueue<QueueHipRtNonBlocking, HostOnlyTask> {
      using TApi = ApiHipRt;

      static void callback(hipStream_t queue, hipError_t status, void* arg) {
        std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
        if (status == hipSuccess) {
          (*pTask)(nullptr);
        } else {
          // wrap the exception in a try-catch block to let GDB "catch throw" break on it
          try {
            throw std::runtime_error(fmt::format("HIP error: callback of stream {} received error {}: {}.",
                                                 fmt::ptr(queue),
                                                 hipGetErrorName(status),
                                                 hipGetErrorString(status)));
          } catch (std::exception&) {
            // pass the exception to the task
            (*pTask)(std::current_exception());
          }
        }
      }

      ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task) -> void {
        auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
            hipStreamAddCallback(alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
      }
    };
#endif  // ALPAKA_ACC_GPU_HIP_ENABLED

  }  // namespace trait

}  // namespace alpaka

#endif  // HeterogeneousCore_AlpakaInterface_interface_HostOnlyTask_h