File indexing completed on 2024-04-06 12:15:45
0001 #ifndef HeterogeneousCore_CUDAUtilities_launch_h
0002 #define HeterogeneousCore_CUDAUtilities_launch_h
0003
0004 #include <tuple>
0005
0006 #include <cuda_runtime.h>
0007
0008 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045 namespace cms {
0046 namespace cuda {
0047
0048 struct LaunchParameters {
0049 dim3 gridDim;
0050 dim3 blockDim;
0051 size_t sharedMem;
0052 cudaStream_t stream;
0053
0054 LaunchParameters(dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, cudaStream_t stream = nullptr)
0055 : gridDim(gridDim), blockDim(blockDim), sharedMem(sharedMem), stream(stream) {}
0056
0057 LaunchParameters(int gridDim, int blockDim, size_t sharedMem = 0, cudaStream_t stream = nullptr)
0058 : gridDim(gridDim), blockDim(blockDim), sharedMem(sharedMem), stream(stream) {}
0059 };
0060
0061 namespace detail {
0062
0063 template <typename T>
0064 struct kernel_traits;
0065
0066 template <typename... Args>
0067 struct kernel_traits<void(Args...)> {
0068 static constexpr size_t arguments_size = sizeof...(Args);
0069
0070 using argument_type_tuple = std::tuple<Args...>;
0071
0072 template <size_t i>
0073 using argument_type = typename std::tuple_element<i, argument_type_tuple>::type;
0074 };
0075
0076
0077 template <int I>
0078 struct pointer_setter {
0079 template <typename Tuple>
0080 void operator()(void const* ptrs[], Tuple const& t) {
0081 pointer_setter<I - 1>()(ptrs, t);
0082 ptrs[I - 1] = &std::get<I - 1>(t);
0083 }
0084 };
0085
0086 template <>
0087 struct pointer_setter<0> {
0088 template <typename Tuple>
0089 void operator()(void const* ptrs[], Tuple const& t) {}
0090 };
0091
0092 }
0093
0094
0095
0096 inline void launch(void (*kernel)(), LaunchParameters config) {
0097 cudaCheck(cudaLaunchKernel(
0098 (const void*)kernel, config.gridDim, config.blockDim, nullptr, config.sharedMem, config.stream));
0099 }
0100
0101 template <typename F, typename... Args>
0102 #if __cplusplus >= 201703L
0103 std::enable_if_t<std::is_invocable_r<void, F, Args&&...>::value>
0104 #else
0105 std::enable_if_t<std::is_void<std::result_of_t<F && (Args && ...)> >::value>
0106 #endif
0107 launch(F* kernel, LaunchParameters config, Args&&... args) {
0108 using function_type = detail::kernel_traits<F>;
0109 typename function_type::argument_type_tuple args_copy(args...);
0110
0111 constexpr auto size = function_type::arguments_size;
0112 void const* pointers[size];
0113
0114 detail::pointer_setter<size>()(pointers, args_copy);
0115 cudaCheck(cudaLaunchKernel(
0116 (const void*)kernel, config.gridDim, config.blockDim, (void**)pointers, config.sharedMem, config.stream));
0117 }
0118
0119
0120
0121 inline void launch_cooperative(void (*kernel)(), LaunchParameters config) {
0122 cudaCheck(cudaLaunchCooperativeKernel(
0123 (const void*)kernel, config.gridDim, config.blockDim, nullptr, config.sharedMem, config.stream));
0124 }
0125
0126 template <typename F, typename... Args>
0127 #if __cplusplus >= 201703L
0128 std::enable_if_t<std::is_invocable_r<void, F, Args&&...>::value>
0129 #else
0130 std::enable_if_t<std::is_void<std::result_of_t<F && (Args && ...)> >::value>
0131 #endif
0132 launch_cooperative(F* kernel, LaunchParameters config, Args&&... args) {
0133 using function_type = detail::kernel_traits<F>;
0134 typename function_type::argument_type_tuple args_copy(args...);
0135
0136 constexpr auto size = function_type::arguments_size;
0137 void const* pointers[size];
0138
0139 detail::pointer_setter<size>()(pointers, args_copy);
0140 cudaCheck(cudaLaunchCooperativeKernel(
0141 (const void*)kernel, config.gridDim, config.blockDim, (void**)pointers, config.sharedMem, config.stream));
0142 }
0143
0144 }
0145 }
0146
0147 #endif