Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-09 02:22:22

0001 #include <cstddef>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006 
0007 #include <cuda_runtime.h>
0008 
0009 #include "FWCore/Framework/interface/Event.h"
0010 #include "FWCore/Framework/interface/Frameworkfwd.h"
0011 #include "FWCore/Framework/interface/global/EDAnalyzer.h"
0012 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0013 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0014 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0015 #include "FWCore/ServiceRegistry/interface/Service.h"
0016 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
0017 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0018 
0019 #include "CUDATestKernelAdditionAlgo.h"
0020 
0021 class CUDATestKernelAdditionModule : public edm::global::EDAnalyzer<> {
0022 public:
0023   explicit CUDATestKernelAdditionModule(edm::ParameterSet const& config);
0024   ~CUDATestKernelAdditionModule() override = default;
0025 
0026   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0027 
0028   void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
0029 
0030 private:
0031   const uint32_t size_;
0032 };
0033 
0034 CUDATestKernelAdditionModule::CUDATestKernelAdditionModule(edm::ParameterSet const& config)
0035     : size_(config.getParameter<uint32_t>("size")) {}
0036 
0037 void CUDATestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0038   edm::ParameterSetDescription desc;
0039   desc.add<uint32_t>("size", 1024 * 1024);
0040   descriptions.addWithDefaultLabel(desc);
0041 }
0042 
0043 void CUDATestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
0044   // require CUDA for running
0045   edm::Service<CUDAInterface> service;
0046   if (not service or not service->enabled()) {
0047     std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
0048     return;
0049   }
0050 
0051   // random number generator with a gaussian distribution
0052   std::random_device rd{};
0053   std::default_random_engine rand{rd()};
0054   std::normal_distribution<float> dist{0., 1.};
0055 
0056   // tolerance
0057   constexpr float epsilon = 0.000001;
0058 
0059   // allocate input and output host buffers
0060   std::vector<float> in1_h(size_);
0061   std::vector<float> in2_h(size_);
0062   std::vector<float> out_h(size_);
0063 
0064   // fill the input buffers with random data, and the output buffer with zeros
0065   for (size_t i = 0; i < size_; ++i) {
0066     in1_h[i] = dist(rand);
0067     in2_h[i] = dist(rand);
0068     out_h[i] = 0.;
0069   }
0070 
0071   // allocate input and output buffers on the device
0072   float* in1_d;
0073   float* in2_d;
0074   float* out_d;
0075   cudaCheck(cudaMalloc(&in1_d, size_ * sizeof(float)));
0076   cudaCheck(cudaMalloc(&in2_d, size_ * sizeof(float)));
0077   cudaCheck(cudaMalloc(&out_d, size_ * sizeof(float)));
0078 
0079   // copy the input data to the device
0080   cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
0081   cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice));
0082 
0083   // fill the output buffer with zeros
0084   cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float)));
0085 
0086   // launch the 1-dimensional kernel for vector addition
0087   HeterogeneousTestCUDAKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);
0088 
0089   // copy the results from the device to the host
0090   cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost));
0091 
0092   // wait for all the operations to complete
0093   cudaCheck(cudaDeviceSynchronize());
0094 
0095   // check the results
0096   for (size_t i = 0; i < size_; ++i) {
0097     float sum = in1_h[i] + in2_h[i];
0098     assert(out_h[i] < sum + epsilon);
0099     assert(out_h[i] > sum - epsilon);
0100   }
0101 
0102   std::cout << "All tests passed.\n";
0103 }
0104 
0105 #include "FWCore/Framework/interface/MakerMacros.h"
0106 DEFINE_FWK_MODULE(CUDATestKernelAdditionModule);