File indexing completed on 2024-04-09 02:22:22
0001 #include <cstddef>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006
0007 #include <cuda_runtime.h>
0008
0009 #include "FWCore/Framework/interface/Event.h"
0010 #include "FWCore/Framework/interface/Frameworkfwd.h"
0011 #include "FWCore/Framework/interface/global/EDAnalyzer.h"
0012 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0013 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0014 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0015 #include "FWCore/ServiceRegistry/interface/Service.h"
0016 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
0017 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0018
0019 #include "CUDATestKernelAdditionAlgo.h"
0020
0021 class CUDATestKernelAdditionModule : public edm::global::EDAnalyzer<> {
0022 public:
0023 explicit CUDATestKernelAdditionModule(edm::ParameterSet const& config);
0024 ~CUDATestKernelAdditionModule() override = default;
0025
0026 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0027
0028 void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
0029
0030 private:
0031 const uint32_t size_;
0032 };
0033
0034 CUDATestKernelAdditionModule::CUDATestKernelAdditionModule(edm::ParameterSet const& config)
0035 : size_(config.getParameter<uint32_t>("size")) {}
0036
0037 void CUDATestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0038 edm::ParameterSetDescription desc;
0039 desc.add<uint32_t>("size", 1024 * 1024);
0040 descriptions.addWithDefaultLabel(desc);
0041 }
0042
0043 void CUDATestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
0044
0045 edm::Service<CUDAInterface> service;
0046 if (not service or not service->enabled()) {
0047 std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
0048 return;
0049 }
0050
0051
0052 std::random_device rd{};
0053 std::default_random_engine rand{rd()};
0054 std::normal_distribution<float> dist{0., 1.};
0055
0056
0057 constexpr float epsilon = 0.000001;
0058
0059
0060 std::vector<float> in1_h(size_);
0061 std::vector<float> in2_h(size_);
0062 std::vector<float> out_h(size_);
0063
0064
0065 for (size_t i = 0; i < size_; ++i) {
0066 in1_h[i] = dist(rand);
0067 in2_h[i] = dist(rand);
0068 out_h[i] = 0.;
0069 }
0070
0071
0072 float* in1_d;
0073 float* in2_d;
0074 float* out_d;
0075 cudaCheck(cudaMalloc(&in1_d, size_ * sizeof(float)));
0076 cudaCheck(cudaMalloc(&in2_d, size_ * sizeof(float)));
0077 cudaCheck(cudaMalloc(&out_d, size_ * sizeof(float)));
0078
0079
0080 cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
0081 cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
0082
0083
0084 cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float)));
0085
0086
0087 HeterogeneousTestCUDAKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);
0088
0089
0090 cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost));
0091
0092
0093 cudaCheck(cudaDeviceSynchronize());
0094
0095
0096 for (size_t i = 0; i < size_; ++i) {
0097 float sum = in1_h[i] + in2_h[i];
0098 assert(out_h[i] < sum + epsilon);
0099 assert(out_h[i] > sum - epsilon);
0100 }
0101
0102 std::cout << "All tests passed.\n";
0103 }
0104
0105 #include "FWCore/Framework/interface/MakerMacros.h"
0106 DEFINE_FWK_MODULE(CUDATestKernelAdditionModule);