File indexing completed on 2024-04-06 12:15:44
0001 #include "FWCore/Concurrency/interface/FunctorTask.h"
0002 #include "FWCore/Concurrency/interface/WaitingTask.h"
0003 #include "FWCore/Concurrency/interface/WaitingTaskHolder.h"
0004 #include "FWCore/Framework/interface/Event.h"
0005 #include "FWCore/Framework/interface/Frameworkfwd.h"
0006 #include "FWCore/Framework/interface/MakerMacros.h"
0007 #include "FWCore/Framework/interface/stream/EDProducer.h"
0008 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0009 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0010 #include "FWCore/ServiceRegistry/interface/Service.h"
0011
0012 #include "CUDADataFormats/Common/interface/Product.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0014 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
0015 #include "HeterogeneousCore/CUDACore/interface/ContextState.h"
0016 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
0017 #include "HeterogeneousCore/CUDATest/interface/Thing.h"
0018 #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
0019
0020 #include "TestCUDAProducerGPUKernel.h"
0021
0022 #include <thread>
0023
0024 class TestCUDAProducerGPUEWTask : public edm::stream::EDProducer<edm::ExternalWork> {
0025 public:
0026 explicit TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig);
0027 ~TestCUDAProducerGPUEWTask() override = default;
0028
0029 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0030
0031 void acquire(edm::Event const& iEvent,
0032 edm::EventSetup const& iSetup,
0033 edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
0034 void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;
0035
0036 private:
0037 void addSimpleWork(edm::EventNumber_t eventID, edm::StreamID streamID, cms::cuda::ScopedContextTask& ctx);
0038
0039 std::string const label_;
0040 edm::EDGetTokenT<cms::cuda::Product<cms::cudatest::Thing>> const srcToken_;
0041 edm::EDPutTokenT<cms::cuda::Product<cms::cudatest::Thing>> const dstToken_;
0042 TestCUDAProducerGPUKernel gpuAlgo_;
0043 cms::cuda::ContextState ctxState_;
0044 cms::cuda::device::unique_ptr<float[]> devicePtr_;
0045 cms::cuda::host::noncached::unique_ptr<float> hostData_;
0046 };
0047
0048 TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask(edm::ParameterSet const& iConfig)
0049 : label_{iConfig.getParameter<std::string>("@module_label")},
0050 srcToken_{consumes<cms::cuda::Product<cms::cudatest::Thing>>(iConfig.getParameter<edm::InputTag>("src"))},
0051 dstToken_{produces<cms::cuda::Product<cms::cudatest::Thing>>()} {
0052 edm::Service<CUDAInterface> cuda;
0053 if (cuda and cuda->enabled()) {
0054 hostData_ = cms::cuda::make_host_noncached_unique<float>();
0055 }
0056 }
0057
0058 void TestCUDAProducerGPUEWTask::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0059 edm::ParameterSetDescription desc;
0060 desc.add<edm::InputTag>("src", edm::InputTag());
0061 descriptions.addWithDefaultLabel(desc);
0062 descriptions.setComment(
0063 "This EDProducer is part of the TestCUDAProducer* family. It models a GPU algorithm this is not the first "
0064 "algorithm in the chain of the GPU EDProducers, and that transfers some data from GPU to CPU multiple times "
0065 "alternating the transfers and kernel executions (e.g. to decide which kernel to run next based on a value from "
0066 "GPU). A synchronization between GPU and CPU is needed after each transfer. The synchronizations are implemented "
0067 "with the ExternalWork extension and explicit TBB tasks within the module. Produces "
0068 "cms::cuda::Product<cms::cudatest::Thing>.");
0069 }
0070
0071 void TestCUDAProducerGPUEWTask::acquire(edm::Event const& iEvent,
0072 edm::EventSetup const& iSetup,
0073 edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
0074 edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire begin event "
0075 << iEvent.id().event() << " stream " << iEvent.streamID();
0076
0077 auto const& in = iEvent.get(srcToken_);
0078 cms::cuda::ScopedContextAcquire ctx{in, waitingTaskHolder, ctxState_};
0079
0080 cms::cudatest::Thing const& input = ctx.get(in);
0081
0082 devicePtr_ = gpuAlgo_.runAlgo(label_, input.get(), ctx.stream());
0083
0084
0085
0086 cudaCheck(
0087 cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
0088
0089
0090 ctx.pushNextTask([iev = iEvent.id().event(), istr = iEvent.streamID(), this](cms::cuda::ScopedContextTask ctx) {
0091 addSimpleWork(iev, istr, ctx);
0092 });
0093
0094 edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::acquire end event "
0095 << iEvent.id().event() << " stream " << iEvent.streamID();
0096 }
0097
0098 void TestCUDAProducerGPUEWTask::addSimpleWork(edm::EventNumber_t eventID,
0099 edm::StreamID streamID,
0100 cms::cuda::ScopedContextTask& ctx) {
0101 if (*hostData_ < 13) {
0102 edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0103 << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork begin event " << eventID << " stream " << streamID
0104 << " 10th element " << *hostData_ << " not satisfied, queueing more work";
0105 cudaCheck(
0106 cudaMemcpyAsync(hostData_.get(), devicePtr_.get() + 10, sizeof(float), cudaMemcpyDeviceToHost, ctx.stream()));
0107
0108 ctx.pushNextTask(
0109 [eventID, streamID, this](cms::cuda::ScopedContextTask ctx) { addSimpleWork(eventID, streamID, ctx); });
0110 gpuAlgo_.runSimpleAlgo(devicePtr_.get(), ctx.stream());
0111 edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0112 << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork end event " << eventID << " stream " << streamID;
0113 } else {
0114 edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0115 << label_ << " TestCUDAProducerGPUEWTask::addSimpleWork event " << eventID << " stream " << streamID
0116 << " 10th element " << *hostData_ << " not queueing more work";
0117 }
0118 }
0119
0120 void TestCUDAProducerGPUEWTask::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) {
0121 edm::LogVerbatim("TestCUDAProducerGPUEWTask")
0122 << label_ << " TestCUDAProducerGPUEWTask::produce begin event " << iEvent.id().event() << " stream "
0123 << iEvent.streamID() << " 10th element " << *hostData_;
0124 if (*hostData_ != 13) {
0125 throw cms::Exception("Assert") << "Expecting 10th element to be 13, got " << *hostData_;
0126 }
0127
0128 cms::cuda::ScopedContextProduce ctx{ctxState_};
0129
0130 ctx.emplace(iEvent, dstToken_, std::move(devicePtr_));
0131
0132 edm::LogVerbatim("TestCUDAProducerGPUEWTask") << label_ << " TestCUDAProducerGPUEWTask::produce end event "
0133 << iEvent.id().event() << " stream " << iEvent.streamID();
0134 }
0135
0136 DEFINE_FWK_MODULE(TestCUDAProducerGPUEWTask);