File indexing completed on 2024-04-06 12:15:48
0001 #include <cstddef>
0002 #include <cstdint>
0003
0004 #include <hip/hip_runtime.h>
0005
0006 #include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
0007
0008 namespace cms::rocmtest {
0009
0010 __device__ void add_vectors_f(const float* __restrict__ in1,
0011 const float* __restrict__ in2,
0012 float* __restrict__ out,
0013 size_t size) {
0014 uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
0015 uint32_t stride = blockDim.x * gridDim.x;
0016
0017 for (size_t i = thread; i < size; i += stride) {
0018 out[i] = in1[i] + in2[i];
0019 }
0020 }
0021
0022 __device__ void add_vectors_d(const double* __restrict__ in1,
0023 const double* __restrict__ in2,
0024 double* __restrict__ out,
0025 size_t size) {
0026 uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
0027 uint32_t stride = blockDim.x * gridDim.x;
0028
0029 for (size_t i = thread; i < size; i += stride) {
0030 out[i] = in1[i] + in2[i];
0031 }
0032 }
0033
0034 }