Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-09-17 03:58:12

0001 // -*- C++ -*-
0002 //
0003 // Package:     HeterogeneousCore/CUDAServices
0004 // Class  :     NVProfilerService
0005 
0006 #include <algorithm>
0007 #include <iostream>
0008 #include <sstream>
0009 #include <string>
0010 #include <vector>
0011 
0012 #include <oneapi/tbb/concurrent_vector.h>
0013 
0014 #include <fmt/printf.h>
0015 
0016 #include <cuda_profiler_api.h>
0017 #include <nvToolsExt.h>
0018 
0019 #include "DataFormats/Common/interface/HLTPathStatus.h"
0020 #include "DataFormats/Provenance/interface/EventID.h"
0021 #include "DataFormats/Provenance/interface/LuminosityBlockID.h"
0022 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0023 #include "DataFormats/Provenance/interface/RunID.h"
0024 #include "DataFormats/Provenance/interface/Timestamp.h"
0025 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0026 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0027 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0028 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0029 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
0030 #include "FWCore/ServiceRegistry/interface/ConsumesInfo.h"
0031 #include "FWCore/ServiceRegistry/interface/GlobalContext.h"
0032 #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
0033 #include "FWCore/ServiceRegistry/interface/PathContext.h"
0034 #include "FWCore/ServiceRegistry/interface/PathsAndConsumesOfModulesBase.h"
0035 #include "FWCore/ServiceRegistry/interface/ProcessContext.h"
0036 #include "FWCore/ServiceRegistry/interface/Service.h"
0037 #include "FWCore/ServiceRegistry/interface/StreamContext.h"
0038 #include "FWCore/ServiceRegistry/interface/SystemBounds.h"
0039 #include "FWCore/Utilities/interface/BranchType.h"
0040 #include "FWCore/Utilities/interface/Exception.h"
0041 #include "FWCore/Utilities/interface/ProductKindOfType.h"
0042 #include "FWCore/Utilities/interface/TimeOfDay.h"
0043 #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
0044 #include "HLTrigger/Timer/interface/ProcessCallGraph.h"
0045 
0046 using namespace std::string_literals;
0047 
0048 namespace {
0049   int nvtxDomainRangePush(nvtxDomainHandle_t domain, const char* message) {
0050     nvtxEventAttributes_t eventAttrib = {};
0051     eventAttrib.version = NVTX_VERSION;
0052     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0053     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0054     eventAttrib.message.ascii = message;
0055     return nvtxDomainRangePushEx(domain, &eventAttrib);
0056   }
0057 
0058   __attribute__((unused)) int nvtxDomainRangePushColor(nvtxDomainHandle_t domain, const char* message, uint32_t color) {
0059     nvtxEventAttributes_t eventAttrib = {};
0060     eventAttrib.version = NVTX_VERSION;
0061     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0062     eventAttrib.colorType = NVTX_COLOR_ARGB;
0063     eventAttrib.color = color;
0064     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0065     eventAttrib.message.ascii = message;
0066     return nvtxDomainRangePushEx(domain, &eventAttrib);
0067   }
0068 
0069   __attribute__((unused)) nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain, const char* message) {
0070     nvtxEventAttributes_t eventAttrib = {};
0071     eventAttrib.version = NVTX_VERSION;
0072     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0073     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0074     eventAttrib.message.ascii = message;
0075     return nvtxDomainRangeStartEx(domain, &eventAttrib);
0076   }
0077 
0078   nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain, const char* message, uint32_t color) {
0079     nvtxEventAttributes_t eventAttrib = {};
0080     eventAttrib.version = NVTX_VERSION;
0081     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0082     eventAttrib.colorType = NVTX_COLOR_ARGB;
0083     eventAttrib.color = color;
0084     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0085     eventAttrib.message.ascii = message;
0086     return nvtxDomainRangeStartEx(domain, &eventAttrib);
0087   }
0088 
0089   void nvtxDomainMark(nvtxDomainHandle_t domain, const char* message) {
0090     nvtxEventAttributes_t eventAttrib = {};
0091     eventAttrib.version = NVTX_VERSION;
0092     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0093     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0094     eventAttrib.message.ascii = message;
0095     nvtxDomainMarkEx(domain, &eventAttrib);
0096   }
0097 
0098   __attribute__((unused)) void nvtxDomainMarkColor(nvtxDomainHandle_t domain, const char* message, uint32_t color) {
0099     nvtxEventAttributes_t eventAttrib = {};
0100     eventAttrib.version = NVTX_VERSION;
0101     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0102     eventAttrib.colorType = NVTX_COLOR_ARGB;
0103     eventAttrib.color = color;
0104     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0105     eventAttrib.message.ascii = message;
0106     nvtxDomainMarkEx(domain, &eventAttrib);
0107   }
0108 
0109   enum {
0110     nvtxBlack = 0x00000000,
0111     nvtxRed = 0x00ff0000,
0112     nvtxDarkGreen = 0x00009900,
0113     nvtxGreen = 0x0000ff00,
0114     nvtxLightGreen = 0x00ccffcc,
0115     nvtxBlue = 0x000000ff,
0116     nvtxAmber = 0x00ffbf00,
0117     nvtxLightAmber = 0x00fff2cc,
0118     nvtxWhite = 0x00ffffff
0119   };
0120 
0121   constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
0122 }  // namespace
0123 
0124 class NVProfilerService {
0125 public:
0126   NVProfilerService(const edm::ParameterSet&, edm::ActivityRegistry&);
0127   ~NVProfilerService();
0128 
0129   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0130 
0131   void preallocate(edm::service::SystemBounds const&);
0132 
0133   // these signal pair are NOT guaranteed to be called by the same thread
0134   void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&);
0135   void postBeginJob();
0136 
0137   // there is no preEndJob() signal
0138   void postEndJob();
0139 
0140   // these signal pair are NOT guaranteed to be called by the same thread
0141   void preGlobalBeginRun(edm::GlobalContext const&);
0142   void postGlobalBeginRun(edm::GlobalContext const&);
0143 
0144   // these signal pair are NOT guaranteed to be called by the same thread
0145   void preGlobalEndRun(edm::GlobalContext const&);
0146   void postGlobalEndRun(edm::GlobalContext const&);
0147 
0148   // these signal pair are NOT guaranteed to be called by the same thread
0149   void preStreamBeginRun(edm::StreamContext const&);
0150   void postStreamBeginRun(edm::StreamContext const&);
0151 
0152   // these signal pair are NOT guaranteed to be called by the same thread
0153   void preStreamEndRun(edm::StreamContext const&);
0154   void postStreamEndRun(edm::StreamContext const&);
0155 
0156   // these signal pair are NOT guaranteed to be called by the same thread
0157   void preGlobalBeginLumi(edm::GlobalContext const&);
0158   void postGlobalBeginLumi(edm::GlobalContext const&);
0159 
0160   // these signal pair are NOT guaranteed to be called by the same thread
0161   void preGlobalEndLumi(edm::GlobalContext const&);
0162   void postGlobalEndLumi(edm::GlobalContext const&);
0163 
0164   // these signal pair are NOT guaranteed to be called by the same thread
0165   void preStreamBeginLumi(edm::StreamContext const&);
0166   void postStreamBeginLumi(edm::StreamContext const&);
0167 
0168   // these signal pair are NOT guaranteed to be called by the same thread
0169   void preStreamEndLumi(edm::StreamContext const&);
0170   void postStreamEndLumi(edm::StreamContext const&);
0171 
0172   // these signal pair are NOT guaranteed to be called by the same thread
0173   void preEvent(edm::StreamContext const&);
0174   void postEvent(edm::StreamContext const&);
0175 
0176   // these signal pair are NOT guaranteed to be called by the same thread
0177   void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
0178   void postPathEvent(edm::StreamContext const&, edm::PathContext const&, edm::HLTPathStatus const&);
0179 
0180   // these signal pair are NOT guaranteed to be called by the same thread
0181   void preModuleEventPrefetching(edm::StreamContext const&, edm::ModuleCallingContext const&);
0182   void postModuleEventPrefetching(edm::StreamContext const&, edm::ModuleCallingContext const&);
0183 
0184   // these signal pair are guaranteed to be called by the same thread
0185   void preOpenFile(std::string const&);
0186   void postOpenFile(std::string const&);
0187 
0188   // these signal pair are guaranteed to be called by the same thread
0189   void preCloseFile(std::string const&);
0190   void postCloseFile(std::string const&);
0191 
0192   // these signal pair are guaranteed to be called by the same thread
0193   void preSourceConstruction(edm::ModuleDescription const&);
0194   void postSourceConstruction(edm::ModuleDescription const&);
0195 
0196   // these signal pair are guaranteed to be called by the same thread
0197   void preSourceRun(edm::RunIndex);
0198   void postSourceRun(edm::RunIndex);
0199 
0200   // these signal pair are guaranteed to be called by the same thread
0201   void preSourceLumi(edm::LuminosityBlockIndex);
0202   void postSourceLumi(edm::LuminosityBlockIndex);
0203 
0204   // these signal pair are guaranteed to be called by the same thread
0205   void preSourceEvent(edm::StreamID);
0206   void postSourceEvent(edm::StreamID);
0207 
0208   // these signal pair are guaranteed to be called by the same thread
0209   void preModuleConstruction(edm::ModuleDescription const&);
0210   void postModuleConstruction(edm::ModuleDescription const&);
0211 
0212   // these signal pair are guaranteed to be called by the same thread
0213   void preModuleDestruction(edm::ModuleDescription const&);
0214   void postModuleDestruction(edm::ModuleDescription const&);
0215 
0216   // these signal pair are guaranteed to be called by the same thread
0217   void preModuleBeginJob(edm::ModuleDescription const&);
0218   void postModuleBeginJob(edm::ModuleDescription const&);
0219 
0220   // these signal pair are guaranteed to be called by the same thread
0221   void preModuleEndJob(edm::ModuleDescription const&);
0222   void postModuleEndJob(edm::ModuleDescription const&);
0223 
0224   // these signal pair are guaranteed to be called by the same thread
0225   void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0226   void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0227 
0228   // these signal pair are guaranteed to be called by the same thread
0229   void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0230   void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0231 
0232   // these signal pair are guaranteed to be called by the same thread
0233   void preModuleGlobalBeginRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0234   void postModuleGlobalBeginRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0235 
0236   // these signal pair are guaranteed to be called by the same thread
0237   void preModuleGlobalEndRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0238   void postModuleGlobalEndRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0239 
0240   // these signal pair are guaranteed to be called by the same thread
0241   void preModuleGlobalBeginLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0242   void postModuleGlobalBeginLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0243 
0244   // these signal pair are guaranteed to be called by the same thread
0245   void preModuleGlobalEndLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0246   void postModuleGlobalEndLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0247 
0248   // these signal pair are guaranteed to be called by the same thread
0249   void preModuleStreamBeginRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0250   void postModuleStreamBeginRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0251 
0252   // these signal pair are guaranteed to be called by the same thread
0253   void preModuleStreamEndRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0254   void postModuleStreamEndRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0255 
0256   // these signal pair are guaranteed to be called by the same thread
0257   void preModuleStreamBeginLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0258   void postModuleStreamBeginLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0259 
0260   // these signal pair are guaranteed to be called by the same thread
0261   void preModuleStreamEndLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0262   void postModuleStreamEndLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0263 
0264   // these signal pair are guaranteed to be called by the same thread
0265   void preModuleEventAcquire(edm::StreamContext const&, edm::ModuleCallingContext const&);
0266   void postModuleEventAcquire(edm::StreamContext const&, edm::ModuleCallingContext const&);
0267 
0268   // these signal pair are guaranteed to be called by the same thread
0269   void preModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const&);
0270   void postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const&);
0271 
0272   // these signal pair are guaranteed to be called by the same thread
0273   void preModuleEventDelayedGet(edm::StreamContext const&, edm::ModuleCallingContext const&);
0274   void postModuleEventDelayedGet(edm::StreamContext const&, edm::ModuleCallingContext const&);
0275 
0276   // these signal pair are guaranteed to be called by the same thread
0277   void preEventReadFromSource(edm::StreamContext const&, edm::ModuleCallingContext const&);
0278   void postEventReadFromSource(edm::StreamContext const&, edm::ModuleCallingContext const&);
0279 
0280 private:
0281   bool highlight(std::string const& label) const {
0282     return (std::binary_search(highlightModules_.begin(), highlightModules_.end(), label));
0283   }
0284 
0285   uint32_t labelColor(std::string const& label) const { return highlight(label) ? nvtxAmber : nvtxGreen; }
0286 
0287   uint32_t labelColorLight(std::string const& label) const {
0288     return highlight(label) ? nvtxLightAmber : nvtxLightGreen;
0289   }
0290 
0291   // build a complete representation of the modules in the whole job
0292   ProcessCallGraph callgraph_;
0293 
0294   std::vector<std::string> highlightModules_;
0295   const bool showModulePrefetching_;
0296   const bool skipFirstEvent_;
0297 
0298   std::atomic<bool> globalFirstEventDone_ = false;
0299   std::vector<std::atomic<bool>> streamFirstEventDone_;
0300   std::vector<nvtxRangeId_t> event_;                        // per-stream event ranges
0301   std::vector<std::vector<nvtxRangeId_t>> stream_modules_;  // per-stream, per-module ranges
0302   // use a tbb::concurrent_vector rather than an std::vector because its final size is not known
0303   tbb::concurrent_vector<nvtxRangeId_t> global_modules_;  // global per-module events
0304 
0305   nvtxDomainHandle_t global_domain_;               // NVTX domain for global EDM transitions
0306   std::vector<nvtxDomainHandle_t> stream_domain_;  // NVTX domains for per-EDM-stream transitions
0307 };
0308 
0309 NVProfilerService::NVProfilerService(edm::ParameterSet const& config, edm::ActivityRegistry& registry)
0310     : highlightModules_(config.getUntrackedParameter<std::vector<std::string>>("highlightModules")),
0311       showModulePrefetching_(config.getUntrackedParameter<bool>("showModulePrefetching")),
0312       skipFirstEvent_(config.getUntrackedParameter<bool>("skipFirstEvent")) {
0313   // make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
0314   edm::Service<CUDAService> cudaService;
0315 
0316   std::sort(highlightModules_.begin(), highlightModules_.end());
0317 
0318   // create the NVTX domain for global EDM transitions
0319   global_domain_ = nvtxDomainCreate("EDM Global");
0320 
0321   // enables profile collection; if profiling is already enabled it has no effect
0322   if (not skipFirstEvent_) {
0323     cudaProfilerStart();
0324   }
0325 
0326   registry.watchPreallocate(this, &NVProfilerService::preallocate);
0327 
0328   // these signal pair are NOT guaranteed to be called by the same thread
0329   registry.watchPreBeginJob(this, &NVProfilerService::preBeginJob);
0330   registry.watchPostBeginJob(this, &NVProfilerService::postBeginJob);
0331 
0332   // there is no preEndJob() signal
0333   registry.watchPostEndJob(this, &NVProfilerService::postEndJob);
0334 
0335   // these signal pair are NOT guaranteed to be called by the same thread
0336   registry.watchPreGlobalBeginRun(this, &NVProfilerService::preGlobalBeginRun);
0337   registry.watchPostGlobalBeginRun(this, &NVProfilerService::postGlobalBeginRun);
0338 
0339   // these signal pair are NOT guaranteed to be called by the same thread
0340   registry.watchPreGlobalEndRun(this, &NVProfilerService::preGlobalEndRun);
0341   registry.watchPostGlobalEndRun(this, &NVProfilerService::postGlobalEndRun);
0342 
0343   // these signal pair are NOT guaranteed to be called by the same thread
0344   registry.watchPreStreamBeginRun(this, &NVProfilerService::preStreamBeginRun);
0345   registry.watchPostStreamBeginRun(this, &NVProfilerService::postStreamBeginRun);
0346 
0347   // these signal pair are NOT guaranteed to be called by the same thread
0348   registry.watchPreStreamEndRun(this, &NVProfilerService::preStreamEndRun);
0349   registry.watchPostStreamEndRun(this, &NVProfilerService::postStreamEndRun);
0350 
0351   // these signal pair are NOT guaranteed to be called by the same thread
0352   registry.watchPreGlobalBeginLumi(this, &NVProfilerService::preGlobalBeginLumi);
0353   registry.watchPostGlobalBeginLumi(this, &NVProfilerService::postGlobalBeginLumi);
0354 
0355   // these signal pair are NOT guaranteed to be called by the same thread
0356   registry.watchPreGlobalEndLumi(this, &NVProfilerService::preGlobalEndLumi);
0357   registry.watchPostGlobalEndLumi(this, &NVProfilerService::postGlobalEndLumi);
0358 
0359   // these signal pair are NOT guaranteed to be called by the same thread
0360   registry.watchPreStreamBeginLumi(this, &NVProfilerService::preStreamBeginLumi);
0361   registry.watchPostStreamBeginLumi(this, &NVProfilerService::postStreamBeginLumi);
0362 
0363   // these signal pair are NOT guaranteed to be called by the same thread
0364   registry.watchPreStreamEndLumi(this, &NVProfilerService::preStreamEndLumi);
0365   registry.watchPostStreamEndLumi(this, &NVProfilerService::postStreamEndLumi);
0366 
0367   // these signal pair are NOT guaranteed to be called by the same thread
0368   registry.watchPreEvent(this, &NVProfilerService::preEvent);
0369   registry.watchPostEvent(this, &NVProfilerService::postEvent);
0370 
0371   // these signal pair are NOT guaranteed to be called by the same thread
0372   registry.watchPrePathEvent(this, &NVProfilerService::prePathEvent);
0373   registry.watchPostPathEvent(this, &NVProfilerService::postPathEvent);
0374 
0375   if (showModulePrefetching_) {
0376     // these signal pair are NOT guaranteed to be called by the same thread
0377     registry.watchPreModuleEventPrefetching(this, &NVProfilerService::preModuleEventPrefetching);
0378     registry.watchPostModuleEventPrefetching(this, &NVProfilerService::postModuleEventPrefetching);
0379   }
0380 
0381   // these signal pair are guaranteed to be called by the same thread
0382   registry.watchPreOpenFile(this, &NVProfilerService::preOpenFile);
0383   registry.watchPostOpenFile(this, &NVProfilerService::postOpenFile);
0384 
0385   // these signal pair are guaranteed to be called by the same thread
0386   registry.watchPreCloseFile(this, &NVProfilerService::preCloseFile);
0387   registry.watchPostCloseFile(this, &NVProfilerService::postCloseFile);
0388 
0389   // these signal pair are guaranteed to be called by the same thread
0390   registry.watchPreSourceConstruction(this, &NVProfilerService::preSourceConstruction);
0391   registry.watchPostSourceConstruction(this, &NVProfilerService::postSourceConstruction);
0392 
0393   // these signal pair are guaranteed to be called by the same thread
0394   registry.watchPreSourceRun(this, &NVProfilerService::preSourceRun);
0395   registry.watchPostSourceRun(this, &NVProfilerService::postSourceRun);
0396 
0397   // these signal pair are guaranteed to be called by the same thread
0398   registry.watchPreSourceLumi(this, &NVProfilerService::preSourceLumi);
0399   registry.watchPostSourceLumi(this, &NVProfilerService::postSourceLumi);
0400 
0401   // these signal pair are guaranteed to be called by the same thread
0402   registry.watchPreSourceEvent(this, &NVProfilerService::preSourceEvent);
0403   registry.watchPostSourceEvent(this, &NVProfilerService::postSourceEvent);
0404 
0405   // these signal pair are guaranteed to be called by the same thread
0406   registry.watchPreModuleConstruction(this, &NVProfilerService::preModuleConstruction);
0407   registry.watchPostModuleConstruction(this, &NVProfilerService::postModuleConstruction);
0408 
0409   // these signal pair are guaranteed to be called by the same thread
0410   registry.watchPreModuleDestruction(this, &NVProfilerService::preModuleDestruction);
0411   registry.watchPostModuleDestruction(this, &NVProfilerService::postModuleDestruction);
0412 
0413   // these signal pair are guaranteed to be called by the same thread
0414   registry.watchPreModuleBeginJob(this, &NVProfilerService::preModuleBeginJob);
0415   registry.watchPostModuleBeginJob(this, &NVProfilerService::postModuleBeginJob);
0416 
0417   // these signal pair are guaranteed to be called by the same thread
0418   registry.watchPreModuleEndJob(this, &NVProfilerService::preModuleEndJob);
0419   registry.watchPostModuleEndJob(this, &NVProfilerService::postModuleEndJob);
0420 
0421   // these signal pair are guaranteed to be called by the same thread
0422   registry.watchPreModuleBeginStream(this, &NVProfilerService::preModuleBeginStream);
0423   registry.watchPostModuleBeginStream(this, &NVProfilerService::postModuleBeginStream);
0424 
0425   // these signal pair are guaranteed to be called by the same thread
0426   registry.watchPreModuleEndStream(this, &NVProfilerService::preModuleEndStream);
0427   registry.watchPostModuleEndStream(this, &NVProfilerService::postModuleEndStream);
0428 
0429   // these signal pair are guaranteed to be called by the same thread
0430   registry.watchPreModuleGlobalBeginRun(this, &NVProfilerService::preModuleGlobalBeginRun);
0431   registry.watchPostModuleGlobalBeginRun(this, &NVProfilerService::postModuleGlobalBeginRun);
0432 
0433   // these signal pair are guaranteed to be called by the same thread
0434   registry.watchPreModuleGlobalEndRun(this, &NVProfilerService::preModuleGlobalEndRun);
0435   registry.watchPostModuleGlobalEndRun(this, &NVProfilerService::postModuleGlobalEndRun);
0436 
0437   // these signal pair are guaranteed to be called by the same thread
0438   registry.watchPreModuleGlobalBeginLumi(this, &NVProfilerService::preModuleGlobalBeginLumi);
0439   registry.watchPostModuleGlobalBeginLumi(this, &NVProfilerService::postModuleGlobalBeginLumi);
0440 
0441   // these signal pair are guaranteed to be called by the same thread
0442   registry.watchPreModuleGlobalEndLumi(this, &NVProfilerService::preModuleGlobalEndLumi);
0443   registry.watchPostModuleGlobalEndLumi(this, &NVProfilerService::postModuleGlobalEndLumi);
0444 
0445   // these signal pair are guaranteed to be called by the same thread
0446   registry.watchPreModuleStreamBeginRun(this, &NVProfilerService::preModuleStreamBeginRun);
0447   registry.watchPostModuleStreamBeginRun(this, &NVProfilerService::postModuleStreamBeginRun);
0448 
0449   // these signal pair are guaranteed to be called by the same thread
0450   registry.watchPreModuleStreamEndRun(this, &NVProfilerService::preModuleStreamEndRun);
0451   registry.watchPostModuleStreamEndRun(this, &NVProfilerService::postModuleStreamEndRun);
0452 
0453   // these signal pair are guaranteed to be called by the same thread
0454   registry.watchPreModuleStreamBeginLumi(this, &NVProfilerService::preModuleStreamBeginLumi);
0455   registry.watchPostModuleStreamBeginLumi(this, &NVProfilerService::postModuleStreamBeginLumi);
0456 
0457   // these signal pair are guaranteed to be called by the same thread
0458   registry.watchPreModuleStreamEndLumi(this, &NVProfilerService::preModuleStreamEndLumi);
0459   registry.watchPostModuleStreamEndLumi(this, &NVProfilerService::postModuleStreamEndLumi);
0460 
0461   // these signal pair are guaranteed to be called by the same thread
0462   registry.watchPreModuleEventAcquire(this, &NVProfilerService::preModuleEventAcquire);
0463   registry.watchPostModuleEventAcquire(this, &NVProfilerService::postModuleEventAcquire);
0464 
0465   // these signal pair are guaranteed to be called by the same thread
0466   registry.watchPreModuleEvent(this, &NVProfilerService::preModuleEvent);
0467   registry.watchPostModuleEvent(this, &NVProfilerService::postModuleEvent);
0468 
0469   // these signal pair are guaranteed to be called by the same thread
0470   registry.watchPreModuleEventDelayedGet(this, &NVProfilerService::preModuleEventDelayedGet);
0471   registry.watchPostModuleEventDelayedGet(this, &NVProfilerService::postModuleEventDelayedGet);
0472 
0473   // these signal pair are guaranteed to be called by the same thread
0474   registry.watchPreEventReadFromSource(this, &NVProfilerService::preEventReadFromSource);
0475   registry.watchPostEventReadFromSource(this, &NVProfilerService::postEventReadFromSource);
0476 }
0477 
0478 NVProfilerService::~NVProfilerService() {
0479   for (unsigned int sid = 0; sid < stream_domain_.size(); ++sid) {
0480     nvtxDomainDestroy(stream_domain_[sid]);
0481   }
0482   nvtxDomainDestroy(global_domain_);
0483   cudaProfilerStop();
0484 }
0485 
0486 void NVProfilerService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0487   edm::ParameterSetDescription desc;
0488   desc.addUntracked<std::vector<std::string>>("highlightModules", {})->setComment("");
0489   desc.addUntracked<bool>("showModulePrefetching", false)
0490       ->setComment("Show the stack of dependencies that requested to run a module.");
0491   desc.addUntracked<bool>("skipFirstEvent", false)
0492       ->setComment(
0493           "Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging "
0494           "to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start "
0495           "off' option.");
0496   descriptions.add("NVProfilerService", desc);
0497   descriptions.setComment(R"(This Service provides CMSSW-aware annotations to nvprof/nvvm.
0498 
0499 Notes on nvprof options:
0500   - the option '--profile-from-start off' should be used if skipFirstEvent is True.
0501   - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job.
0502   - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)");
0503 }
0504 
0505 void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
0506   std::stringstream out;
0507   out << "preallocate: " << bounds.maxNumberOfConcurrentRuns() << " concurrent runs, "
0508       << bounds.maxNumberOfConcurrentLuminosityBlocks() << " luminosity sections, " << bounds.maxNumberOfStreams()
0509       << " streams\nrunning on " << bounds.maxNumberOfThreads() << " threads";
0510   nvtxDomainMark(global_domain_, out.str().c_str());
0511 
0512   auto concurrentStreams = bounds.maxNumberOfStreams();
0513   // create the NVTX domains for per-EDM-stream transitions
0514   stream_domain_.resize(concurrentStreams);
0515   for (unsigned int sid = 0; sid < concurrentStreams; ++sid) {
0516     stream_domain_[sid] = nvtxDomainCreate(fmt::sprintf("EDM Stream %d", sid).c_str());
0517   }
0518 
0519   event_.resize(concurrentStreams);
0520   stream_modules_.resize(concurrentStreams);
0521   if (skipFirstEvent_) {
0522     globalFirstEventDone_ = false;
0523     std::vector<std::atomic<bool>> tmp(concurrentStreams);
0524     for (auto& element : tmp)
0525       std::atomic_init(&element, false);
0526     streamFirstEventDone_ = std::move(tmp);
0527   }
0528 }
0529 
0530 void NVProfilerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
0531                                     edm::ProcessContext const& context) {
0532   callgraph_.preBeginJob(pathsAndConsumes, context);
0533   nvtxDomainMark(global_domain_, "preBeginJob");
0534 
0535   // this assumes that preBeginJob is not called concurrently with the modules' beginJob method
0536   // or the preBeginJob for a subprocess
0537   unsigned int modules = callgraph_.size();
0538   global_modules_.resize(modules, nvtxInvalidRangeId);
0539   for (unsigned int sid = 0; sid < stream_modules_.size(); ++sid) {
0540     stream_modules_[sid].resize(modules, nvtxInvalidRangeId);
0541   }
0542 }
0543 
0544 void NVProfilerService::postBeginJob() {
0545   if (not skipFirstEvent_ or globalFirstEventDone_) {
0546     nvtxDomainMark(global_domain_, "postBeginJob");
0547   }
0548 }
0549 
0550 void NVProfilerService::postEndJob() {
0551   if (not skipFirstEvent_ or globalFirstEventDone_) {
0552     nvtxDomainMark(global_domain_, "postEndJob");
0553   }
0554 }
0555 
0556 void NVProfilerService::preSourceEvent(edm::StreamID sid) {
0557   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0558     nvtxDomainRangePush(stream_domain_[sid], "source");
0559   }
0560 }
0561 
0562 void NVProfilerService::postSourceEvent(edm::StreamID sid) {
0563   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0564     nvtxDomainRangePop(stream_domain_[sid]);
0565   }
0566 }
0567 
0568 void NVProfilerService::preSourceLumi(edm::LuminosityBlockIndex index) {
0569   if (not skipFirstEvent_ or globalFirstEventDone_) {
0570     nvtxDomainRangePush(global_domain_, "source lumi");
0571   }
0572 }
0573 
0574 void NVProfilerService::postSourceLumi(edm::LuminosityBlockIndex index) {
0575   if (not skipFirstEvent_ or globalFirstEventDone_) {
0576     nvtxDomainRangePop(global_domain_);
0577   }
0578 }
0579 
0580 void NVProfilerService::preSourceRun(edm::RunIndex index) {
0581   if (not skipFirstEvent_ or globalFirstEventDone_) {
0582     nvtxDomainRangePush(global_domain_, "source run");
0583   }
0584 }
0585 
0586 void NVProfilerService::postSourceRun(edm::RunIndex index) {
0587   if (not skipFirstEvent_ or globalFirstEventDone_) {
0588     nvtxDomainRangePop(global_domain_);
0589   }
0590 }
0591 
0592 void NVProfilerService::preOpenFile(std::string const& lfn) {
0593   if (not skipFirstEvent_ or globalFirstEventDone_) {
0594     nvtxDomainRangePush(global_domain_, ("open file "s + lfn).c_str());
0595   }
0596 }
0597 
0598 void NVProfilerService::postOpenFile(std::string const& lfn) {
0599   if (not skipFirstEvent_ or globalFirstEventDone_) {
0600     nvtxDomainRangePop(global_domain_);
0601   }
0602 }
0603 
0604 void NVProfilerService::preCloseFile(std::string const& lfn) {
0605   if (not skipFirstEvent_ or globalFirstEventDone_) {
0606     nvtxDomainRangePush(global_domain_, ("close file "s + lfn).c_str());
0607   }
0608 }
0609 
0610 void NVProfilerService::postCloseFile(std::string const& lfn) {
0611   if (not skipFirstEvent_ or globalFirstEventDone_) {
0612     nvtxDomainRangePop(global_domain_);
0613   }
0614 }
0615 
0616 void NVProfilerService::preModuleBeginStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0617   auto sid = sc.streamID();
0618   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0619     auto mid = mcc.moduleDescription()->id();
0620     auto const& label = mcc.moduleDescription()->moduleLabel();
0621     auto const& msg = label + " begin stream";
0622     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0623     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0624   }
0625 }
0626 
0627 void NVProfilerService::postModuleBeginStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0628   auto sid = sc.streamID();
0629   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0630     auto mid = mcc.moduleDescription()->id();
0631     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0632     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0633   }
0634 }
0635 
0636 void NVProfilerService::preModuleEndStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0637   auto sid = sc.streamID();
0638   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0639     auto mid = mcc.moduleDescription()->id();
0640     auto const& label = mcc.moduleDescription()->moduleLabel();
0641     auto const& msg = label + " end stream";
0642     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0643     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0644   }
0645 }
0646 
0647 void NVProfilerService::postModuleEndStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0648   auto sid = sc.streamID();
0649   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0650     auto mid = mcc.moduleDescription()->id();
0651     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0652     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0653   }
0654 }
0655 
0656 void NVProfilerService::preGlobalBeginRun(edm::GlobalContext const& gc) {
0657   if (not skipFirstEvent_ or globalFirstEventDone_) {
0658     nvtxDomainRangePush(global_domain_, "global begin run");
0659   }
0660 }
0661 
0662 void NVProfilerService::postGlobalBeginRun(edm::GlobalContext const& gc) {
0663   if (not skipFirstEvent_ or globalFirstEventDone_) {
0664     nvtxDomainRangePop(global_domain_);
0665   }
0666 }
0667 
0668 void NVProfilerService::preGlobalEndRun(edm::GlobalContext const& gc) {
0669   if (not skipFirstEvent_ or globalFirstEventDone_) {
0670     nvtxDomainRangePush(global_domain_, "global end run");
0671   }
0672 }
0673 
0674 void NVProfilerService::postGlobalEndRun(edm::GlobalContext const& gc) {
0675   if (not skipFirstEvent_ or globalFirstEventDone_) {
0676     nvtxDomainRangePop(global_domain_);
0677   }
0678 }
0679 
0680 void NVProfilerService::preStreamBeginRun(edm::StreamContext const& sc) {
0681   auto sid = sc.streamID();
0682   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0683     nvtxDomainRangePush(stream_domain_[sid], "stream begin run");
0684   }
0685 }
0686 
0687 void NVProfilerService::postStreamBeginRun(edm::StreamContext const& sc) {
0688   auto sid = sc.streamID();
0689   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0690     nvtxDomainRangePop(stream_domain_[sid]);
0691   }
0692 }
0693 
0694 void NVProfilerService::preStreamEndRun(edm::StreamContext const& sc) {
0695   auto sid = sc.streamID();
0696   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0697     nvtxDomainRangePush(stream_domain_[sid], "stream end run");
0698   }
0699 }
0700 
0701 void NVProfilerService::postStreamEndRun(edm::StreamContext const& sc) {
0702   auto sid = sc.streamID();
0703   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0704     nvtxDomainRangePop(stream_domain_[sid]);
0705   }
0706 }
0707 
0708 void NVProfilerService::preGlobalBeginLumi(edm::GlobalContext const& gc) {
0709   if (not skipFirstEvent_ or globalFirstEventDone_) {
0710     nvtxDomainRangePush(global_domain_, "global begin lumi");
0711   }
0712 }
0713 
0714 void NVProfilerService::postGlobalBeginLumi(edm::GlobalContext const& gc) {
0715   if (not skipFirstEvent_ or globalFirstEventDone_) {
0716     nvtxDomainRangePop(global_domain_);
0717   }
0718 }
0719 
0720 void NVProfilerService::preGlobalEndLumi(edm::GlobalContext const& gc) {
0721   if (not skipFirstEvent_ or globalFirstEventDone_) {
0722     nvtxDomainRangePush(global_domain_, "global end lumi");
0723   }
0724 }
0725 
0726 void NVProfilerService::postGlobalEndLumi(edm::GlobalContext const& gc) {
0727   if (not skipFirstEvent_ or globalFirstEventDone_) {
0728     nvtxDomainRangePop(global_domain_);
0729   }
0730 }
0731 
0732 void NVProfilerService::preStreamBeginLumi(edm::StreamContext const& sc) {
0733   auto sid = sc.streamID();
0734   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0735     nvtxDomainRangePush(stream_domain_[sid], "stream begin lumi");
0736   }
0737 }
0738 
0739 void NVProfilerService::postStreamBeginLumi(edm::StreamContext const& sc) {
0740   auto sid = sc.streamID();
0741   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0742     nvtxDomainRangePop(stream_domain_[sid]);
0743   }
0744 }
0745 
0746 void NVProfilerService::preStreamEndLumi(edm::StreamContext const& sc) {
0747   auto sid = sc.streamID();
0748   nvtxDomainRangePush(stream_domain_[sid], "stream end lumi");
0749 }
0750 
0751 void NVProfilerService::postStreamEndLumi(edm::StreamContext const& sc) {
0752   auto sid = sc.streamID();
0753   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0754     nvtxDomainRangePop(stream_domain_[sid]);
0755   }
0756 }
0757 
0758 void NVProfilerService::preEvent(edm::StreamContext const& sc) {
0759   auto sid = sc.streamID();
0760   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0761     event_[sid] = nvtxDomainRangeStartColor(stream_domain_[sid], "event", nvtxDarkGreen);
0762   }
0763 }
0764 
0765 void NVProfilerService::postEvent(edm::StreamContext const& sc) {
0766   auto sid = sc.streamID();
0767   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0768     nvtxDomainRangeEnd(stream_domain_[sid], event_[sid]);
0769     event_[sid] = nvtxInvalidRangeId;
0770   } else {
0771     streamFirstEventDone_[sid] = true;
0772     auto identity = [](bool x) { return x; };
0773     if (std::all_of(streamFirstEventDone_.begin(), streamFirstEventDone_.end(), identity)) {
0774       bool expected = false;
0775       if (globalFirstEventDone_.compare_exchange_strong(expected, true))
0776         cudaProfilerStart();
0777     }
0778   }
0779 }
0780 
0781 void NVProfilerService::prePathEvent(edm::StreamContext const& sc, edm::PathContext const& pc) {
0782   auto sid = sc.streamID();
0783   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0784     nvtxDomainMark(global_domain_, ("before path "s + pc.pathName()).c_str());
0785   }
0786 }
0787 
0788 void NVProfilerService::postPathEvent(edm::StreamContext const& sc,
0789                                       edm::PathContext const& pc,
0790                                       edm::HLTPathStatus const& hlts) {
0791   auto sid = sc.streamID();
0792   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0793     nvtxDomainMark(global_domain_, ("after path "s + pc.pathName()).c_str());
0794   }
0795 }
0796 
0797 void NVProfilerService::preModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0798   auto sid = sc.streamID();
0799   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0800     auto mid = mcc.moduleDescription()->id();
0801     auto const& label = mcc.moduleDescription()->moduleLabel();
0802     auto const& msg = label + " prefetching";
0803     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0804     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColorLight(label));
0805   }
0806 }
0807 
0808 void NVProfilerService::postModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0809   auto sid = sc.streamID();
0810   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0811     auto mid = mcc.moduleDescription()->id();
0812     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0813     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0814   }
0815 }
0816 
0817 void NVProfilerService::preModuleConstruction(edm::ModuleDescription const& desc) {
0818   if (not skipFirstEvent_) {
0819     auto mid = desc.id();
0820     global_modules_.grow_to_at_least(mid + 1);
0821     auto const& label = desc.moduleLabel();
0822     auto const& msg = label + " construction";
0823     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0824   }
0825 }
0826 
0827 void NVProfilerService::postModuleConstruction(edm::ModuleDescription const& desc) {
0828   if (not skipFirstEvent_) {
0829     auto mid = desc.id();
0830     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0831     global_modules_[mid] = nvtxInvalidRangeId;
0832   }
0833 }
0834 
0835 void NVProfilerService::preModuleDestruction(edm::ModuleDescription const& desc) {
0836   if (not skipFirstEvent_) {
0837     auto mid = desc.id();
0838     global_modules_.grow_to_at_least(mid + 1);
0839     auto const& label = desc.moduleLabel();
0840     auto const& msg = label + " destruction";
0841     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0842   }
0843 }
0844 
0845 void NVProfilerService::postModuleDestruction(edm::ModuleDescription const& desc) {
0846   if (not skipFirstEvent_) {
0847     auto mid = desc.id();
0848     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0849     global_modules_[mid] = nvtxInvalidRangeId;
0850   }
0851 }
0852 
0853 void NVProfilerService::preModuleBeginJob(edm::ModuleDescription const& desc) {
0854   if (not skipFirstEvent_) {
0855     auto mid = desc.id();
0856     auto const& label = desc.moduleLabel();
0857     auto const& msg = label + " begin job";
0858     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0859   }
0860 }
0861 
0862 void NVProfilerService::postModuleBeginJob(edm::ModuleDescription const& desc) {
0863   if (not skipFirstEvent_) {
0864     auto mid = desc.id();
0865     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0866     global_modules_[mid] = nvtxInvalidRangeId;
0867   }
0868 }
0869 
0870 void NVProfilerService::preModuleEndJob(edm::ModuleDescription const& desc) {
0871   if (not skipFirstEvent_ or globalFirstEventDone_) {
0872     auto mid = desc.id();
0873     auto const& label = desc.moduleLabel();
0874     auto const& msg = label + " end job";
0875     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0876   }
0877 }
0878 
0879 void NVProfilerService::postModuleEndJob(edm::ModuleDescription const& desc) {
0880   if (not skipFirstEvent_ or globalFirstEventDone_) {
0881     auto mid = desc.id();
0882     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0883     global_modules_[mid] = nvtxInvalidRangeId;
0884   }
0885 }
0886 
0887 void NVProfilerService::preModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0888   auto sid = sc.streamID();
0889   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0890     auto mid = mcc.moduleDescription()->id();
0891     auto const& label = mcc.moduleDescription()->moduleLabel();
0892     auto const& msg = label + " acquire";
0893     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0894     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0895   }
0896 }
0897 
0898 void NVProfilerService::postModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0899   auto sid = sc.streamID();
0900   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0901     auto mid = mcc.moduleDescription()->id();
0902     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0903     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0904   }
0905 }
0906 
0907 void NVProfilerService::preModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0908   auto sid = sc.streamID();
0909   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0910     auto mid = mcc.moduleDescription()->id();
0911     auto const& label = mcc.moduleDescription()->moduleLabel();
0912     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0913     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], label.c_str(), labelColor(label));
0914   }
0915 }
0916 
0917 void NVProfilerService::postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0918   auto sid = sc.streamID();
0919   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0920     auto mid = mcc.moduleDescription()->id();
0921     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0922     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0923   }
0924 }
0925 
0926 void NVProfilerService::preModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0927   /* FIXME
0928   auto sid = sc.streamID();
0929   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0930     auto mid = mcc.moduleDescription()->id();
0931     auto const & label = mcc.moduleDescription()->moduleLabel();
0932     auto const & msg = label + " delayed get";
0933     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0934     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], label.c_str(), labelColorLight(label));
0935   }
0936   */
0937 }
0938 
0939 void NVProfilerService::postModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0940   /* FIXME
0941   auto sid = sc.streamID();
0942   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0943     auto mid = mcc.moduleDescription()->id();
0944     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0945     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0946   }
0947   */
0948 }
0949 
0950 void NVProfilerService::preEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0951   /* FIXME
0952   auto sid = sc.streamID();
0953   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0954     auto mid = mcc.moduleDescription()->id();
0955     auto const & label = mcc.moduleDescription()->moduleLabel();
0956     auto const & msg = label + " read from source";
0957     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0958     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColorLight(label));
0959   }
0960   */
0961 }
0962 
0963 void NVProfilerService::postEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0964   /* FIXME
0965   auto sid = sc.streamID();
0966   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0967     auto mid = mcc.moduleDescription()->id();
0968     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0969     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0970   }
0971   */
0972 }
0973 
0974 void NVProfilerService::preModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0975   auto sid = sc.streamID();
0976   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0977     auto mid = mcc.moduleDescription()->id();
0978     auto const& label = mcc.moduleDescription()->moduleLabel();
0979     auto const& msg = label + " stream begin run";
0980     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0981     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0982   }
0983 }
0984 
0985 void NVProfilerService::postModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0986   auto sid = sc.streamID();
0987   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0988     auto mid = mcc.moduleDescription()->id();
0989     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0990     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0991   }
0992 }
0993 
0994 void NVProfilerService::preModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0995   auto sid = sc.streamID();
0996   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0997     auto mid = mcc.moduleDescription()->id();
0998     auto const& label = mcc.moduleDescription()->moduleLabel();
0999     auto const& msg = label + " stream end run";
1000     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
1001     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
1002   }
1003 }
1004 
1005 void NVProfilerService::postModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1006   auto sid = sc.streamID();
1007   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1008     auto mid = mcc.moduleDescription()->id();
1009     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
1010     stream_modules_[sid][mid] = nvtxInvalidRangeId;
1011   }
1012 }
1013 
1014 void NVProfilerService::preModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1015   auto sid = sc.streamID();
1016   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1017     auto mid = mcc.moduleDescription()->id();
1018     auto const& label = mcc.moduleDescription()->moduleLabel();
1019     auto const& msg = label + " stream begin lumi";
1020     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
1021     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
1022   }
1023 }
1024 
1025 void NVProfilerService::postModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1026   auto sid = sc.streamID();
1027   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1028     auto mid = mcc.moduleDescription()->id();
1029     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
1030     stream_modules_[sid][mid] = nvtxInvalidRangeId;
1031   }
1032 }
1033 
1034 void NVProfilerService::preModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1035   auto sid = sc.streamID();
1036   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1037     auto mid = mcc.moduleDescription()->id();
1038     auto const& label = mcc.moduleDescription()->moduleLabel();
1039     auto const& msg = label + " stream end lumi";
1040     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
1041     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
1042   }
1043 }
1044 
1045 void NVProfilerService::postModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1046   auto sid = sc.streamID();
1047   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1048     auto mid = mcc.moduleDescription()->id();
1049     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
1050     stream_modules_[sid][mid] = nvtxInvalidRangeId;
1051   }
1052 }
1053 
1054 void NVProfilerService::preModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1055   if (not skipFirstEvent_ or globalFirstEventDone_) {
1056     auto mid = mcc.moduleDescription()->id();
1057     auto const& label = mcc.moduleDescription()->moduleLabel();
1058     auto const& msg = label + " global begin run";
1059     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1060   }
1061 }
1062 
1063 void NVProfilerService::postModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1064   if (not skipFirstEvent_ or globalFirstEventDone_) {
1065     auto mid = mcc.moduleDescription()->id();
1066     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1067     global_modules_[mid] = nvtxInvalidRangeId;
1068   }
1069 }
1070 
1071 void NVProfilerService::preModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1072   if (not skipFirstEvent_ or globalFirstEventDone_) {
1073     auto mid = mcc.moduleDescription()->id();
1074     auto const& label = mcc.moduleDescription()->moduleLabel();
1075     auto const& msg = label + " global end run";
1076     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1077   }
1078 }
1079 
1080 void NVProfilerService::postModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1081   if (not skipFirstEvent_ or globalFirstEventDone_) {
1082     auto mid = mcc.moduleDescription()->id();
1083     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1084     global_modules_[mid] = nvtxInvalidRangeId;
1085   }
1086 }
1087 
1088 void NVProfilerService::preModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1089   if (not skipFirstEvent_ or globalFirstEventDone_) {
1090     auto mid = mcc.moduleDescription()->id();
1091     auto const& label = mcc.moduleDescription()->moduleLabel();
1092     auto const& msg = label + " global begin lumi";
1093     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1094   }
1095 }
1096 
1097 void NVProfilerService::postModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1098   if (not skipFirstEvent_ or globalFirstEventDone_) {
1099     auto mid = mcc.moduleDescription()->id();
1100     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1101     global_modules_[mid] = nvtxInvalidRangeId;
1102   }
1103 }
1104 
1105 void NVProfilerService::preModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1106   if (not skipFirstEvent_ or globalFirstEventDone_) {
1107     auto mid = mcc.moduleDescription()->id();
1108     auto const& label = mcc.moduleDescription()->moduleLabel();
1109     auto const& msg = label + " global end lumi";
1110     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1111   }
1112 }
1113 
1114 void NVProfilerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1115   if (not skipFirstEvent_ or globalFirstEventDone_) {
1116     auto mid = mcc.moduleDescription()->id();
1117     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1118     global_modules_[mid] = nvtxInvalidRangeId;
1119   }
1120 }
1121 
1122 void NVProfilerService::preSourceConstruction(edm::ModuleDescription const& desc) {
1123   callgraph_.preSourceConstruction(desc);
1124 
1125   if (not skipFirstEvent_) {
1126     auto mid = desc.id();
1127     global_modules_.grow_to_at_least(mid + 1);
1128     auto const& label = desc.moduleLabel();
1129     auto const& msg = label + " construction";
1130     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1131   }
1132 }
1133 
1134 void NVProfilerService::postSourceConstruction(edm::ModuleDescription const& desc) {
1135   if (not skipFirstEvent_) {
1136     auto mid = desc.id();
1137     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1138     global_modules_[mid] = nvtxInvalidRangeId;
1139   }
1140 }
1141 
1142 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
1143 DEFINE_FWK_SERVICE(NVProfilerService);