File indexing completed on 2023-03-17 11:05:46
0001 #ifndef HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
0002 #define HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
0003
0004
0005
0006
0007
0008 #ifndef __CUDACC__
0009
0010 #include <algorithm>
0011 #include <cstdint>
0012 #include <cstring>
0013
0014
0015 #include <cuda_runtime.h>
0016
0017
0018 #undef __global__
0019 #define __global__ inline __attribute__((always_inline))
0020
0021 #undef __forceinline__
0022 #define __forceinline__ inline __attribute__((always_inline))
0023
0024 namespace cms {
0025 namespace cudacompat {
0026
0027
0028
0029 const dim3 threadIdx = {0, 0, 0};
0030 const dim3 blockDim = {1, 1, 1};
0031
0032 const dim3 blockIdx = {0, 0, 0};
0033 const dim3 gridDim = {1, 1, 1};
0034
0035 template <typename T1, typename T2>
0036 T1 atomicCAS(T1* address, T1 compare, T2 val) {
0037 T1 old = *address;
0038 *address = old == compare ? val : old;
0039 return old;
0040 }
0041
0042 template <typename T1, typename T2>
0043 T1 atomicCAS_block(T1* address, T1 compare, T2 val) {
0044 return atomicCAS(address, compare, val);
0045 }
0046
0047 template <typename T1, typename T2>
0048 T1 atomicInc(T1* a, T2 b) {
0049 auto ret = *a;
0050 if ((*a) < T1(b))
0051 (*a)++;
0052 return ret;
0053 }
0054
0055 template <typename T1, typename T2>
0056 T1 atomicInc_block(T1* a, T2 b) {
0057 return atomicInc(a, b);
0058 }
0059
0060 template <typename T1, typename T2>
0061 T1 atomicAdd(T1* a, T2 b) {
0062 auto ret = *a;
0063 (*a) += b;
0064 return ret;
0065 }
0066
0067 template <typename T1, typename T2>
0068 T1 atomicAdd_block(T1* a, T2 b) {
0069 return atomicAdd(a, b);
0070 }
0071
0072 template <typename T1, typename T2>
0073 T1 atomicSub(T1* a, T2 b) {
0074 auto ret = *a;
0075 (*a) -= b;
0076 return ret;
0077 }
0078
0079 template <typename T1, typename T2>
0080 T1 atomicSub_block(T1* a, T2 b) {
0081 return atomicSub(a, b);
0082 }
0083
0084 template <typename T1, typename T2>
0085 T1 atomicMin(T1* a, T2 b) {
0086 auto ret = *a;
0087 *a = std::min(*a, T1(b));
0088 return ret;
0089 }
0090
0091 template <typename T1, typename T2>
0092 T1 atomicMin_block(T1* a, T2 b) {
0093 return atomicMin(a, b);
0094 }
0095
0096 template <typename T1, typename T2>
0097 T1 atomicMax(T1* a, T2 b) {
0098 auto ret = *a;
0099 *a = std::max(*a, T1(b));
0100 return ret;
0101 }
0102
0103 template <typename T1, typename T2>
0104 T1 atomicMax_block(T1* a, T2 b) {
0105 return atomicMax(a, b);
0106 }
0107
0108 template <typename T1, typename T2>
0109 T1 atomicAnd(T1* a, T2 b) {
0110 auto ret = *a;
0111 (*a) &= b;
0112 return ret;
0113 }
0114
0115 template <typename T1, typename T2>
0116 T1 atomicAnd_block(T1* a, T2 b) {
0117 return atomicAnd(a, b);
0118 }
0119
0120 template <typename T1, typename T2>
0121 T1 atomicOr(T1* a, T2 b) {
0122 auto ret = *a;
0123 (*a) |= b;
0124 return ret;
0125 }
0126
0127 template <typename T1, typename T2>
0128 T1 atomicOr_block(T1* a, T2 b) {
0129 return atomicOr(a, b);
0130 }
0131
0132 inline void __syncthreads() {}
0133 inline void __threadfence() {}
0134 inline bool __syncthreads_or(bool x) { return x; }
0135 inline bool __syncthreads_and(bool x) { return x; }
0136 template <typename T>
0137 inline T __ldg(T const* x) {
0138 return *x;
0139 }
0140
0141 }
0142 }
0143
0144
0145 using namespace cms::cudacompat;
0146
0147 #endif
0148
0149 #endif