Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:44

0001 #include "FWCore/Concurrency/interface/FunctorTask.h"
0002 #include "FWCore/Concurrency/interface/WaitingTask.h"
0003 #include "FWCore/Concurrency/interface/WaitingTaskHolder.h"
0004 #include "FWCore/Framework/interface/Event.h"
0005 #include "FWCore/Framework/interface/Frameworkfwd.h"
0006 #include "FWCore/Framework/interface/MakerMacros.h"
0007 #include "FWCore/Framework/interface/stream/EDProducer.h"
0008 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0009 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0010 #include "FWCore/ServiceRegistry/interface/Service.h"
0011 
0012 #include "CUDADataFormats/Common/interface/Product.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0014 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
0015 #include "HeterogeneousCore/CUDACore/interface/ContextState.h"
0016 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
0017 #include "HeterogeneousCore/CUDATest/interface/Thing.h"
0018 #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
0019 
0020 #include "TestCUDAProducerGPUKernel.h"
0021 
0022 #include <thread>
0023 
0024 class TestCUDAProducerGPUEWTask : public edm::stream::EDProducer<edm::ExternalWork> {
0025 public:
0026   explicit TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig);
0027   ~TestCUDAProducerGPUEWTask() override = default;
0028 
0029   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0030 
0031   void acquire(edm::Event const& iEvent,
0032                edm::EventSetup const& iSetup,
0033                edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
0034   void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;
0035 
0036 private:
0037   void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask& ctx);
0038 
0039   std::string const label_;
0040   edm::EDGetTokenT<cms::cuda::Product<cms::cudatest::Thing>> const srcToken_;
0041   edm::EDPutTokenT<cms::cuda::Product<cms::cudatest::Thing>> const dstToken_;
0042   TestCUDAProducerGPUKernel gpuAlgo_;
0043   cms::cuda::ContextState ctxState_;
0044   cms::cuda::device::unique_ptr<float[]> devicePtr_;
0045   cms::cuda::host::noncached::unique_ptr<float> hostData_;
0046 };
0047 
0048 TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig)
0049     : label_{iConfig.getParameter<std::string>("@module_label")},
0050       srcToken_{consumes<cms::cuda::Product<cms::cudatest::Thing>>(iConfig.getParameter<edm::InputTag>("src"))},
0051       dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
0052   edm::Service<CUDAInterface> cuda;
0053   if (cuda and cuda->enabled()) {
0054     hostData_ = cms::cuda::make_host_noncached_unique<float>();
0055   }
0056 }
0057 
0058 void TestCUDAProducerGPUEWTask::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0059   edm::ParameterSetDescription desc;
0060   desc.add<edm::InputTag>("src", edm::InputTag());
0061   descriptions.addWithDefaultLabel(desc);
0062   descriptions.setComment(
0063       "This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
0064       "algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
0065       "alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
0066       "GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
0067       "with the ExternalWork extension and explicit TBB tasks within the module. Produces "
0068       "cms::cuda::Product<cms::cudatest::Thing>.");
0069 }
0070 
0071 void TestCUDAProducerGPUEWTask::acquire(edm::Event const& iEvent,
0072                                         edm::EventSetup const& iSetup,
0073                                         edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
0074   edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire begin event "
0075                                                 << iEvent.id().event() << " stream " << iEvent.streamID();
0076 
0077   auto const& in = iEvent.get(srcToken_);
0078   cms::cuda::ScopedContextAcquire ctx{in, waitingTaskHolder, ctxState_};
0079 
0080   cms::cudatest::Thing const& input = ctx.get(in);
0081 
0082   devicePtr_ = gpuAlgo_.runAlgo(label_, input.get(), ctx.stream());
0083   // Mimick the need to transfer some of the GPU data back to CPU to
0084   // be used for something within this module, or to be put in the
0085   // event.
0086   cudaCheck(
0087       cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
0088   // Push a task to run addSimpleWork() after the asynchronous work
0089   // (and acquire()) has finished instead of produce()
0090   ctx.pushNextTask([iev = iEvent.id().event(), istr = iEvent.streamID(), this](cms::cuda::ScopedContextTask ctx) {
0091     addSimpleWork(iev, istr, ctx);
0092   });
0093 
0094   edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire end event "
0095                                                 << iEvent.id().event() << " stream " << iEvent.streamID();
0096 }
0097 
0098 void TestCUDAProducerGPUEWTask::addSimpleWork(edm::EventNumber_t eventID,
0099                                               edm::StreamID streamID,
0100                                               cms::cuda::ScopedContextTask& ctx) {
0101   if (*hostData_ < 13) {
0102     edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0103         << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID << " stream " << streamID
0104         << " 10th element " << *hostData_ << " not satisfied, queueing more work";
0105     cudaCheck(
0106         cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
0107 
0108     ctx.pushNextTask(
0109         [eventID, streamID, this](cms::cuda::ScopedContextTask ctx) { addSimpleWork(eventID, streamID, ctx); });
0110     gpuAlgo_.runSimpleAlgo(devicePtr_.get(), ctx.stream());
0111     edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0112         << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID << " stream " << streamID;
0113   } else {
0114     edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0115         << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID << " stream " << streamID
0116         << " 10th element " << *hostData_ << " not queueing more work";
0117   }
0118 }
0119 
0120 void TestCUDAProducerGPUEWTask::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) {
0121   edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0122       << label_ << " TestCUDAProducerGPUEWTask::produce begin event " << iEvent.id().event() << " stream "
0123       << iEvent.streamID() << " 10th element " << *hostData_;
0124   if (*hostData_ != 13) {
0125     throw cms::Exception("Assert") << "Expecting 10th element to be 13, got " << *hostData_;
0126   }
0127 
0128   cms::cuda::ScopedContextProduce ctx{ctxState_};
0129 
0130   ctx.emplace(iEvent, dstToken_, std::move(devicePtr_));
0131 
0132   edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::produce end event "
0133                                                 << iEvent.id().event() << " stream " << iEvent.streamID();
0134 }
0135 
0136 DEFINE_FWK_MODULE(TestCUDAProducerGPUEWTask);