File indexing completed on 2024-04-06 12:15:48
0001 #include <cstddef>
0002
0003 #include <cuda_runtime.h>
0004
0005 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0006 #include "HeterogeneousTest/CUDAOpaque/interface/DeviceAdditionOpaque.h"
0007 #include "HeterogeneousTest/CUDAWrapper/interface/DeviceAdditionWrapper.h"
0008
0009 namespace cms::cudatest {
0010
0011 void opaque_add_vectors_f(const float* in1_h, const float* in2_h, float* out_h, size_t size) {
0012
0013 float* in1_d;
0014 float* in2_d;
0015 float* out_d;
0016 cudaCheck(cudaMalloc(&in1_d, size * sizeof(float)));
0017 cudaCheck(cudaMalloc(&in2_d, size * sizeof(float)));
0018 cudaCheck(cudaMalloc(&out_d, size * sizeof(float)));
0019
0020
0021 cudaCheck(cudaMemcpy(in1_d, in1_h, size * sizeof(float), cudaMemcpyHostToDevice));
0022 cudaCheck(cudaMemcpy(in2_d, in2_h, size * sizeof(float), cudaMemcpyHostToDevice));
0023
0024
0025 cudaCheck(cudaMemset(out_d, 0, size * sizeof(float)));
0026
0027
0028 wrapper_add_vectors_f(in1_d, in2_d, out_d, size);
0029
0030
0031 cudaCheck(cudaMemcpy(out_h, out_d, size * sizeof(float), cudaMemcpyDeviceToHost));
0032
0033
0034 cudaCheck(cudaDeviceSynchronize());
0035
0036
0037 cudaCheck(cudaFree(in1_d));
0038 cudaCheck(cudaFree(in2_d));
0039 cudaCheck(cudaFree(out_d));
0040 }
0041
0042 void opaque_add_vectors_d(const double* in1_h, const double* in2_h, double* out_h, size_t size) {
0043
0044 double* in1_d;
0045 double* in2_d;
0046 double* out_d;
0047 cudaCheck(cudaMalloc(&in1_d, size * sizeof(double)));
0048 cudaCheck(cudaMalloc(&in2_d, size * sizeof(double)));
0049 cudaCheck(cudaMalloc(&out_d, size * sizeof(double)));
0050
0051
0052 cudaCheck(cudaMemcpy(in1_d, in1_h, size * sizeof(double), cudaMemcpyHostToDevice));
0053 cudaCheck(cudaMemcpy(in2_d, in2_h, size * sizeof(double), cudaMemcpyHostToDevice));
0054
0055
0056 cudaCheck(cudaMemset(out_d, 0, size * sizeof(double)));
0057
0058
0059 wrapper_add_vectors_d(in1_d, in2_d, out_d, size);
0060
0061
0062 cudaCheck(cudaMemcpy(out_h, out_d, size * sizeof(double), cudaMemcpyDeviceToHost));
0063
0064
0065 cudaCheck(cudaDeviceSynchronize());
0066
0067
0068 cudaCheck(cudaFree(in1_d));
0069 cudaCheck(cudaFree(in2_d));
0070 cudaCheck(cudaFree(out_d));
0071 }
0072
0073 }