Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 12:49:25

0001 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
0002 
0003 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0004 #include "FWCore/ServiceRegistry/interface/Service.h"
0005 #include "FWCore/Utilities/interface/Exception.h"
0006 #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0008 
0009 #include "chooseDevice.h"
0010 
0011 namespace {
0012   struct CallbackData {
0013     edm::WaitingTaskWithArenaHolder holder;
0014     int device;
0015   };
0016 
0017   void CUDART_CB cudaScopedContextCallback(cudaStream_t streamId, cudaError_t status, void* data) {
0018     std::unique_ptr<CallbackData> guard{reinterpret_cast<CallbackData*>(data)};
0019     edm::WaitingTaskWithArenaHolder& waitingTaskHolder = guard->holder;
0020     int device = guard->device;
0021     if (status == cudaSuccess) {
0022       LogTrace("ScopedContext") << " GPU kernel finished (in callback) device " << device << " CUDA stream "
0023                                 << streamId;
0024       waitingTaskHolder.doneWaiting(nullptr);
0025     } else {
0026       // wrap the exception in a try-catch block to let GDB "catch throw" break on it
0027       try {
0028         auto error = cudaGetErrorName(status);
0029         auto message = cudaGetErrorString(status);
0030         throw cms::Exception("CUDAError") << "Callback of CUDA stream " << streamId << " in device " << device
0031                                           << " error " << error << ": " << message;
0032       } catch (cms::Exception&) {
0033         waitingTaskHolder.doneWaiting(std::current_exception());
0034       }
0035     }
0036   }
0037 }  // namespace
0038 
0039 namespace cms::cuda {
0040   namespace impl {
0041     ScopedContextBase::ScopedContextBase(edm::StreamID streamID) : currentDevice_(chooseDevice(streamID)) {
0042       cudaCheck(cudaSetDevice(currentDevice_));
0043       stream_ = getStreamCache().get();
0044     }
0045 
0046     ScopedContextBase::ScopedContextBase(const ProductBase& data) : currentDevice_(data.device()) {
0047       cudaCheck(cudaSetDevice(currentDevice_));
0048       if (data.mayReuseStream()) {
0049         stream_ = data.streamPtr();
0050       } else {
0051         stream_ = getStreamCache().get();
0052       }
0053     }
0054 
0055     ScopedContextBase::ScopedContextBase(int device, SharedStreamPtr stream)
0056         : currentDevice_(device), stream_(std::move(stream)) {
0057       cudaCheck(cudaSetDevice(currentDevice_));
0058     }
0059 
0060     ////////////////////
0061 
0062     void ScopedContextGetterBase::synchronizeStreams(int dataDevice,
0063                                                      cudaStream_t dataStream,
0064                                                      bool available,
0065                                                      cudaEvent_t dataEvent) {
0066       if (dataDevice != device()) {
0067         // Eventually replace with prefetch to current device (assuming unified memory works)
0068         // If we won't go to unified memory, need to figure out something else...
0069         throw cms::Exception("LogicError") << "Handling data from multiple devices is not yet supported";
0070       }
0071 
0072       if (dataStream != stream()) {
0073         // Different streams, need to synchronize
0074         if (not available) {
0075           // Event not yet occurred, so need to add synchronization
0076           // here. Sychronization is done by making the CUDA stream to
0077           // wait for an event, so all subsequent work in the stream
0078           // will run only after the event has "occurred" (i.e. data
0079           // product became available).
0080           cudaCheck(cudaStreamWaitEvent(stream(), dataEvent, 0), "Failed to make a stream to wait for an event");
0081         }
0082       }
0083     }
0084 
0085     void ScopedContextHolderHelper::enqueueCallback(int device, cudaStream_t stream) {
0086       cudaCheck(
0087           cudaStreamAddCallback(stream, cudaScopedContextCallback, new CallbackData{waitingTaskHolder_, device}, 0));
0088     }
0089   }  // namespace impl
0090 
0091   ////////////////////
0092 
0093   ScopedContextAcquire::~ScopedContextAcquire() {
0094     holderHelper_.enqueueCallback(device(), stream());
0095     if (contextState_) {
0096       contextState_->set(device(), streamPtr());
0097     }
0098   }
0099 
0100   void ScopedContextAcquire::throwNoState() {
0101     throw cms::Exception("LogicError")
0102         << "Calling ScopedContextAcquire::insertNextTask() requires ScopedContextAcquire to be constructed with "
0103            "ContextState, but that was not the case";
0104   }
0105 
0106   ////////////////////
0107 
0108   ScopedContextProduce::~ScopedContextProduce() {
0109     // Intentionally not checking the return value to avoid throwing
0110     // exceptions. If this call would fail, we should get failures
0111     // elsewhere as well.
0112     cudaEventRecord(event_.get(), stream());
0113   }
0114 
0115   ////////////////////
0116 
0117   ScopedContextTask::~ScopedContextTask() { holderHelper_.enqueueCallback(device(), stream()); }
0118 }  // namespace cms::cuda