Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:43

0001 #include <algorithm>
0002 #include <iostream>
0003 #include <sstream>
0004 #include <string>
0005 #include <vector>
0006 
0007 #include <oneapi/tbb/concurrent_vector.h>
0008 
0009 #include <fmt/printf.h>
0010 
0011 #include <cuda_profiler_api.h>
0012 #include <nvToolsExt.h>
0013 
0014 #include "DataFormats/Common/interface/HLTPathStatus.h"
0015 #include "DataFormats/Provenance/interface/EventID.h"
0016 #include "DataFormats/Provenance/interface/LuminosityBlockID.h"
0017 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0018 #include "DataFormats/Provenance/interface/RunID.h"
0019 #include "DataFormats/Provenance/interface/Timestamp.h"
0020 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0021 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0022 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0023 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0024 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
0025 #include "FWCore/ServiceRegistry/interface/ConsumesInfo.h"
0026 #include "FWCore/ServiceRegistry/interface/GlobalContext.h"
0027 #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
0028 #include "FWCore/ServiceRegistry/interface/PathContext.h"
0029 #include "FWCore/ServiceRegistry/interface/PathsAndConsumesOfModulesBase.h"
0030 #include "FWCore/ServiceRegistry/interface/ProcessContext.h"
0031 #include "FWCore/ServiceRegistry/interface/Service.h"
0032 #include "FWCore/ServiceRegistry/interface/StreamContext.h"
0033 #include "FWCore/ServiceRegistry/interface/SystemBounds.h"
0034 #include "FWCore/Utilities/interface/BranchType.h"
0035 #include "FWCore/Utilities/interface/Exception.h"
0036 #include "FWCore/Utilities/interface/ProductKindOfType.h"
0037 #include "FWCore/Utilities/interface/TimeOfDay.h"
0038 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
0039 #include "HLTrigger/Timer/interface/ProcessCallGraph.h"
0040 
0041 using namespace std::string_literals;
0042 
0043 namespace {
0044   int nvtxDomainRangePush(nvtxDomainHandle_t domain, const char* message) {
0045     nvtxEventAttributes_t eventAttrib = {};
0046     eventAttrib.version = NVTX_VERSION;
0047     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0048     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0049     eventAttrib.message.ascii = message;
0050     return nvtxDomainRangePushEx(domain, &eventAttrib);
0051   }
0052 
0053   __attribute__((unused)) int nvtxDomainRangePushColor(nvtxDomainHandle_t domain, const char* message, uint32_t color) {
0054     nvtxEventAttributes_t eventAttrib = {};
0055     eventAttrib.version = NVTX_VERSION;
0056     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0057     eventAttrib.colorType = NVTX_COLOR_ARGB;
0058     eventAttrib.color = color;
0059     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0060     eventAttrib.message.ascii = message;
0061     return nvtxDomainRangePushEx(domain, &eventAttrib);
0062   }
0063 
0064   __attribute__((unused)) nvtxRangeId_t nvtxDomainRangeStart(nvtxDomainHandle_t domain, const char* message) {
0065     nvtxEventAttributes_t eventAttrib = {};
0066     eventAttrib.version = NVTX_VERSION;
0067     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0068     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0069     eventAttrib.message.ascii = message;
0070     return nvtxDomainRangeStartEx(domain, &eventAttrib);
0071   }
0072 
0073   nvtxRangeId_t nvtxDomainRangeStartColor(nvtxDomainHandle_t domain, const char* message, uint32_t color) {
0074     nvtxEventAttributes_t eventAttrib = {};
0075     eventAttrib.version = NVTX_VERSION;
0076     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0077     eventAttrib.colorType = NVTX_COLOR_ARGB;
0078     eventAttrib.color = color;
0079     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0080     eventAttrib.message.ascii = message;
0081     return nvtxDomainRangeStartEx(domain, &eventAttrib);
0082   }
0083 
0084   void nvtxDomainMark(nvtxDomainHandle_t domain, const char* message) {
0085     nvtxEventAttributes_t eventAttrib = {};
0086     eventAttrib.version = NVTX_VERSION;
0087     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0088     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0089     eventAttrib.message.ascii = message;
0090     nvtxDomainMarkEx(domain, &eventAttrib);
0091   }
0092 
0093   __attribute__((unused)) void nvtxDomainMarkColor(nvtxDomainHandle_t domain, const char* message, uint32_t color) {
0094     nvtxEventAttributes_t eventAttrib = {};
0095     eventAttrib.version = NVTX_VERSION;
0096     eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
0097     eventAttrib.colorType = NVTX_COLOR_ARGB;
0098     eventAttrib.color = color;
0099     eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
0100     eventAttrib.message.ascii = message;
0101     nvtxDomainMarkEx(domain, &eventAttrib);
0102   }
0103 
0104   enum {
0105     nvtxBlack = 0x00000000,
0106     nvtxRed = 0x00ff0000,
0107     nvtxDarkGreen = 0x00009900,
0108     nvtxGreen = 0x0000ff00,
0109     nvtxLightGreen = 0x00ccffcc,
0110     nvtxBlue = 0x000000ff,
0111     nvtxAmber = 0x00ffbf00,
0112     nvtxLightAmber = 0x00fff2cc,
0113     nvtxWhite = 0x00ffffff
0114   };
0115 
0116   constexpr nvtxRangeId_t nvtxInvalidRangeId = 0xfffffffffffffffful;
0117 }  // namespace
0118 
0119 class NVProfilerService {
0120 public:
0121   NVProfilerService(const edm::ParameterSet&, edm::ActivityRegistry&);
0122   ~NVProfilerService();
0123 
0124   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0125 
0126   void preallocate(edm::service::SystemBounds const&);
0127 
0128   // these signal pair are NOT guaranteed to be called by the same thread
0129   void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&);
0130   void postBeginJob();
0131 
0132   // there is no preEndJob() signal
0133   void postEndJob();
0134 
0135   // these signal pair are NOT guaranteed to be called by the same thread
0136   void preGlobalBeginRun(edm::GlobalContext const&);
0137   void postGlobalBeginRun(edm::GlobalContext const&);
0138 
0139   // these signal pair are NOT guaranteed to be called by the same thread
0140   void preGlobalEndRun(edm::GlobalContext const&);
0141   void postGlobalEndRun(edm::GlobalContext const&);
0142 
0143   // these signal pair are NOT guaranteed to be called by the same thread
0144   void preStreamBeginRun(edm::StreamContext const&);
0145   void postStreamBeginRun(edm::StreamContext const&);
0146 
0147   // these signal pair are NOT guaranteed to be called by the same thread
0148   void preStreamEndRun(edm::StreamContext const&);
0149   void postStreamEndRun(edm::StreamContext const&);
0150 
0151   // these signal pair are NOT guaranteed to be called by the same thread
0152   void preGlobalBeginLumi(edm::GlobalContext const&);
0153   void postGlobalBeginLumi(edm::GlobalContext const&);
0154 
0155   // these signal pair are NOT guaranteed to be called by the same thread
0156   void preGlobalEndLumi(edm::GlobalContext const&);
0157   void postGlobalEndLumi(edm::GlobalContext const&);
0158 
0159   // these signal pair are NOT guaranteed to be called by the same thread
0160   void preStreamBeginLumi(edm::StreamContext const&);
0161   void postStreamBeginLumi(edm::StreamContext const&);
0162 
0163   // these signal pair are NOT guaranteed to be called by the same thread
0164   void preStreamEndLumi(edm::StreamContext const&);
0165   void postStreamEndLumi(edm::StreamContext const&);
0166 
0167   // these signal pair are NOT guaranteed to be called by the same thread
0168   void preEvent(edm::StreamContext const&);
0169   void postEvent(edm::StreamContext const&);
0170 
0171   // these signal pair are NOT guaranteed to be called by the same thread
0172   void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
0173   void postPathEvent(edm::StreamContext const&, edm::PathContext const&, edm::HLTPathStatus const&);
0174 
0175   // these signal pair are NOT guaranteed to be called by the same thread
0176   void preModuleEventPrefetching(edm::StreamContext const&, edm::ModuleCallingContext const&);
0177   void postModuleEventPrefetching(edm::StreamContext const&, edm::ModuleCallingContext const&);
0178 
0179   // these signal pair are guaranteed to be called by the same thread
0180   void preOpenFile(std::string const&);
0181   void postOpenFile(std::string const&);
0182 
0183   // these signal pair are guaranteed to be called by the same thread
0184   void preCloseFile(std::string const&);
0185   void postCloseFile(std::string const&);
0186 
0187   // these signal pair are guaranteed to be called by the same thread
0188   void preSourceConstruction(edm::ModuleDescription const&);
0189   void postSourceConstruction(edm::ModuleDescription const&);
0190 
0191   // these signal pair are guaranteed to be called by the same thread
0192   void preSourceRun(edm::RunIndex);
0193   void postSourceRun(edm::RunIndex);
0194 
0195   // these signal pair are guaranteed to be called by the same thread
0196   void preSourceLumi(edm::LuminosityBlockIndex);
0197   void postSourceLumi(edm::LuminosityBlockIndex);
0198 
0199   // these signal pair are guaranteed to be called by the same thread
0200   void preSourceEvent(edm::StreamID);
0201   void postSourceEvent(edm::StreamID);
0202 
0203   // these signal pair are guaranteed to be called by the same thread
0204   void preModuleConstruction(edm::ModuleDescription const&);
0205   void postModuleConstruction(edm::ModuleDescription const&);
0206 
0207   // these signal pair are guaranteed to be called by the same thread
0208   void preModuleDestruction(edm::ModuleDescription const&);
0209   void postModuleDestruction(edm::ModuleDescription const&);
0210 
0211   // these signal pair are guaranteed to be called by the same thread
0212   void preModuleBeginJob(edm::ModuleDescription const&);
0213   void postModuleBeginJob(edm::ModuleDescription const&);
0214 
0215   // these signal pair are guaranteed to be called by the same thread
0216   void preModuleEndJob(edm::ModuleDescription const&);
0217   void postModuleEndJob(edm::ModuleDescription const&);
0218 
0219   // these signal pair are guaranteed to be called by the same thread
0220   void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0221   void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0222 
0223   // these signal pair are guaranteed to be called by the same thread
0224   void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0225   void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
0226 
0227   // these signal pair are guaranteed to be called by the same thread
0228   void preModuleGlobalBeginRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0229   void postModuleGlobalBeginRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0230 
0231   // these signal pair are guaranteed to be called by the same thread
0232   void preModuleGlobalEndRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0233   void postModuleGlobalEndRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0234 
0235   // these signal pair are guaranteed to be called by the same thread
0236   void preModuleGlobalBeginLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0237   void postModuleGlobalBeginLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0238 
0239   // these signal pair are guaranteed to be called by the same thread
0240   void preModuleGlobalEndLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0241   void postModuleGlobalEndLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
0242 
0243   // these signal pair are guaranteed to be called by the same thread
0244   void preModuleStreamBeginRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0245   void postModuleStreamBeginRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0246 
0247   // these signal pair are guaranteed to be called by the same thread
0248   void preModuleStreamEndRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0249   void postModuleStreamEndRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
0250 
0251   // these signal pair are guaranteed to be called by the same thread
0252   void preModuleStreamBeginLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0253   void postModuleStreamBeginLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0254 
0255   // these signal pair are guaranteed to be called by the same thread
0256   void preModuleStreamEndLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0257   void postModuleStreamEndLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
0258 
0259   // these signal pair are guaranteed to be called by the same thread
0260   void preModuleEventAcquire(edm::StreamContext const&, edm::ModuleCallingContext const&);
0261   void postModuleEventAcquire(edm::StreamContext const&, edm::ModuleCallingContext const&);
0262 
0263   // these signal pair are guaranteed to be called by the same thread
0264   void preModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const&);
0265   void postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const&);
0266 
0267   // these signal pair are guaranteed to be called by the same thread
0268   void preModuleEventDelayedGet(edm::StreamContext const&, edm::ModuleCallingContext const&);
0269   void postModuleEventDelayedGet(edm::StreamContext const&, edm::ModuleCallingContext const&);
0270 
0271   // these signal pair are guaranteed to be called by the same thread
0272   void preEventReadFromSource(edm::StreamContext const&, edm::ModuleCallingContext const&);
0273   void postEventReadFromSource(edm::StreamContext const&, edm::ModuleCallingContext const&);
0274 
0275 private:
0276   bool highlight(std::string const& label) const {
0277     return (std::binary_search(highlightModules_.begin(), highlightModules_.end(), label));
0278   }
0279 
0280   uint32_t labelColor(std::string const& label) const { return highlight(label) ? nvtxAmber : nvtxGreen; }
0281 
0282   uint32_t labelColorLight(std::string const& label) const {
0283     return highlight(label) ? nvtxLightAmber : nvtxLightGreen;
0284   }
0285 
0286   // build a complete representation of the modules in the whole job
0287   ProcessCallGraph callgraph_;
0288 
0289   std::vector<std::string> highlightModules_;
0290   const bool showModulePrefetching_;
0291   const bool skipFirstEvent_;
0292 
0293   std::atomic<bool> globalFirstEventDone_ = false;
0294   std::vector<std::atomic<bool>> streamFirstEventDone_;
0295   std::vector<nvtxRangeId_t> event_;                        // per-stream event ranges
0296   std::vector<std::vector<nvtxRangeId_t>> stream_modules_;  // per-stream, per-module ranges
0297   // use a tbb::concurrent_vector rather than an std::vector because its final size is not known
0298   tbb::concurrent_vector<nvtxRangeId_t> global_modules_;  // global per-module events
0299 
0300   nvtxDomainHandle_t global_domain_;               // NVTX domain for global EDM transitions
0301   std::vector<nvtxDomainHandle_t> stream_domain_;  // NVTX domains for per-EDM-stream transitions
0302 };
0303 
0304 NVProfilerService::NVProfilerService(edm::ParameterSet const& config, edm::ActivityRegistry& registry)
0305     : highlightModules_(config.getUntrackedParameter<std::vector<std::string>>("highlightModules")),
0306       showModulePrefetching_(config.getUntrackedParameter<bool>("showModulePrefetching")),
0307       skipFirstEvent_(config.getUntrackedParameter<bool>("skipFirstEvent")) {
0308   // make sure that CUDA is initialised, and that the CUDAInterface destructor is called after this service's destructor
0309   edm::Service<CUDAInterface> cuda;
0310   if (not cuda or not cuda->enabled())
0311     return;
0312 
0313   std::sort(highlightModules_.begin(), highlightModules_.end());
0314 
0315   // create the NVTX domain for global EDM transitions
0316   global_domain_ = nvtxDomainCreate("EDM Global");
0317 
0318   // enables profile collection; if profiling is already enabled it has no effect
0319   if (not skipFirstEvent_) {
0320     cudaProfilerStart();
0321   }
0322 
0323   registry.watchPreallocate(this, &NVProfilerService::preallocate);
0324 
0325   // these signal pair are NOT guaranteed to be called by the same thread
0326   registry.watchPreBeginJob(this, &NVProfilerService::preBeginJob);
0327   registry.watchPostBeginJob(this, &NVProfilerService::postBeginJob);
0328 
0329   // there is no preEndJob() signal
0330   registry.watchPostEndJob(this, &NVProfilerService::postEndJob);
0331 
0332   // these signal pair are NOT guaranteed to be called by the same thread
0333   registry.watchPreGlobalBeginRun(this, &NVProfilerService::preGlobalBeginRun);
0334   registry.watchPostGlobalBeginRun(this, &NVProfilerService::postGlobalBeginRun);
0335 
0336   // these signal pair are NOT guaranteed to be called by the same thread
0337   registry.watchPreGlobalEndRun(this, &NVProfilerService::preGlobalEndRun);
0338   registry.watchPostGlobalEndRun(this, &NVProfilerService::postGlobalEndRun);
0339 
0340   // these signal pair are NOT guaranteed to be called by the same thread
0341   registry.watchPreStreamBeginRun(this, &NVProfilerService::preStreamBeginRun);
0342   registry.watchPostStreamBeginRun(this, &NVProfilerService::postStreamBeginRun);
0343 
0344   // these signal pair are NOT guaranteed to be called by the same thread
0345   registry.watchPreStreamEndRun(this, &NVProfilerService::preStreamEndRun);
0346   registry.watchPostStreamEndRun(this, &NVProfilerService::postStreamEndRun);
0347 
0348   // these signal pair are NOT guaranteed to be called by the same thread
0349   registry.watchPreGlobalBeginLumi(this, &NVProfilerService::preGlobalBeginLumi);
0350   registry.watchPostGlobalBeginLumi(this, &NVProfilerService::postGlobalBeginLumi);
0351 
0352   // these signal pair are NOT guaranteed to be called by the same thread
0353   registry.watchPreGlobalEndLumi(this, &NVProfilerService::preGlobalEndLumi);
0354   registry.watchPostGlobalEndLumi(this, &NVProfilerService::postGlobalEndLumi);
0355 
0356   // these signal pair are NOT guaranteed to be called by the same thread
0357   registry.watchPreStreamBeginLumi(this, &NVProfilerService::preStreamBeginLumi);
0358   registry.watchPostStreamBeginLumi(this, &NVProfilerService::postStreamBeginLumi);
0359 
0360   // these signal pair are NOT guaranteed to be called by the same thread
0361   registry.watchPreStreamEndLumi(this, &NVProfilerService::preStreamEndLumi);
0362   registry.watchPostStreamEndLumi(this, &NVProfilerService::postStreamEndLumi);
0363 
0364   // these signal pair are NOT guaranteed to be called by the same thread
0365   registry.watchPreEvent(this, &NVProfilerService::preEvent);
0366   registry.watchPostEvent(this, &NVProfilerService::postEvent);
0367 
0368   // these signal pair are NOT guaranteed to be called by the same thread
0369   registry.watchPrePathEvent(this, &NVProfilerService::prePathEvent);
0370   registry.watchPostPathEvent(this, &NVProfilerService::postPathEvent);
0371 
0372   if (showModulePrefetching_) {
0373     // these signal pair are NOT guaranteed to be called by the same thread
0374     registry.watchPreModuleEventPrefetching(this, &NVProfilerService::preModuleEventPrefetching);
0375     registry.watchPostModuleEventPrefetching(this, &NVProfilerService::postModuleEventPrefetching);
0376   }
0377 
0378   // these signal pair are guaranteed to be called by the same thread
0379   registry.watchPreOpenFile(this, &NVProfilerService::preOpenFile);
0380   registry.watchPostOpenFile(this, &NVProfilerService::postOpenFile);
0381 
0382   // these signal pair are guaranteed to be called by the same thread
0383   registry.watchPreCloseFile(this, &NVProfilerService::preCloseFile);
0384   registry.watchPostCloseFile(this, &NVProfilerService::postCloseFile);
0385 
0386   // these signal pair are guaranteed to be called by the same thread
0387   registry.watchPreSourceConstruction(this, &NVProfilerService::preSourceConstruction);
0388   registry.watchPostSourceConstruction(this, &NVProfilerService::postSourceConstruction);
0389 
0390   // these signal pair are guaranteed to be called by the same thread
0391   registry.watchPreSourceRun(this, &NVProfilerService::preSourceRun);
0392   registry.watchPostSourceRun(this, &NVProfilerService::postSourceRun);
0393 
0394   // these signal pair are guaranteed to be called by the same thread
0395   registry.watchPreSourceLumi(this, &NVProfilerService::preSourceLumi);
0396   registry.watchPostSourceLumi(this, &NVProfilerService::postSourceLumi);
0397 
0398   // these signal pair are guaranteed to be called by the same thread
0399   registry.watchPreSourceEvent(this, &NVProfilerService::preSourceEvent);
0400   registry.watchPostSourceEvent(this, &NVProfilerService::postSourceEvent);
0401 
0402   // these signal pair are guaranteed to be called by the same thread
0403   registry.watchPreModuleConstruction(this, &NVProfilerService::preModuleConstruction);
0404   registry.watchPostModuleConstruction(this, &NVProfilerService::postModuleConstruction);
0405 
0406   // these signal pair are guaranteed to be called by the same thread
0407   registry.watchPreModuleDestruction(this, &NVProfilerService::preModuleDestruction);
0408   registry.watchPostModuleDestruction(this, &NVProfilerService::postModuleDestruction);
0409 
0410   // these signal pair are guaranteed to be called by the same thread
0411   registry.watchPreModuleBeginJob(this, &NVProfilerService::preModuleBeginJob);
0412   registry.watchPostModuleBeginJob(this, &NVProfilerService::postModuleBeginJob);
0413 
0414   // these signal pair are guaranteed to be called by the same thread
0415   registry.watchPreModuleEndJob(this, &NVProfilerService::preModuleEndJob);
0416   registry.watchPostModuleEndJob(this, &NVProfilerService::postModuleEndJob);
0417 
0418   // these signal pair are guaranteed to be called by the same thread
0419   registry.watchPreModuleBeginStream(this, &NVProfilerService::preModuleBeginStream);
0420   registry.watchPostModuleBeginStream(this, &NVProfilerService::postModuleBeginStream);
0421 
0422   // these signal pair are guaranteed to be called by the same thread
0423   registry.watchPreModuleEndStream(this, &NVProfilerService::preModuleEndStream);
0424   registry.watchPostModuleEndStream(this, &NVProfilerService::postModuleEndStream);
0425 
0426   // these signal pair are guaranteed to be called by the same thread
0427   registry.watchPreModuleGlobalBeginRun(this, &NVProfilerService::preModuleGlobalBeginRun);
0428   registry.watchPostModuleGlobalBeginRun(this, &NVProfilerService::postModuleGlobalBeginRun);
0429 
0430   // these signal pair are guaranteed to be called by the same thread
0431   registry.watchPreModuleGlobalEndRun(this, &NVProfilerService::preModuleGlobalEndRun);
0432   registry.watchPostModuleGlobalEndRun(this, &NVProfilerService::postModuleGlobalEndRun);
0433 
0434   // these signal pair are guaranteed to be called by the same thread
0435   registry.watchPreModuleGlobalBeginLumi(this, &NVProfilerService::preModuleGlobalBeginLumi);
0436   registry.watchPostModuleGlobalBeginLumi(this, &NVProfilerService::postModuleGlobalBeginLumi);
0437 
0438   // these signal pair are guaranteed to be called by the same thread
0439   registry.watchPreModuleGlobalEndLumi(this, &NVProfilerService::preModuleGlobalEndLumi);
0440   registry.watchPostModuleGlobalEndLumi(this, &NVProfilerService::postModuleGlobalEndLumi);
0441 
0442   // these signal pair are guaranteed to be called by the same thread
0443   registry.watchPreModuleStreamBeginRun(this, &NVProfilerService::preModuleStreamBeginRun);
0444   registry.watchPostModuleStreamBeginRun(this, &NVProfilerService::postModuleStreamBeginRun);
0445 
0446   // these signal pair are guaranteed to be called by the same thread
0447   registry.watchPreModuleStreamEndRun(this, &NVProfilerService::preModuleStreamEndRun);
0448   registry.watchPostModuleStreamEndRun(this, &NVProfilerService::postModuleStreamEndRun);
0449 
0450   // these signal pair are guaranteed to be called by the same thread
0451   registry.watchPreModuleStreamBeginLumi(this, &NVProfilerService::preModuleStreamBeginLumi);
0452   registry.watchPostModuleStreamBeginLumi(this, &NVProfilerService::postModuleStreamBeginLumi);
0453 
0454   // these signal pair are guaranteed to be called by the same thread
0455   registry.watchPreModuleStreamEndLumi(this, &NVProfilerService::preModuleStreamEndLumi);
0456   registry.watchPostModuleStreamEndLumi(this, &NVProfilerService::postModuleStreamEndLumi);
0457 
0458   // these signal pair are guaranteed to be called by the same thread
0459   registry.watchPreModuleEventAcquire(this, &NVProfilerService::preModuleEventAcquire);
0460   registry.watchPostModuleEventAcquire(this, &NVProfilerService::postModuleEventAcquire);
0461 
0462   // these signal pair are guaranteed to be called by the same thread
0463   registry.watchPreModuleEvent(this, &NVProfilerService::preModuleEvent);
0464   registry.watchPostModuleEvent(this, &NVProfilerService::postModuleEvent);
0465 
0466   // these signal pair are guaranteed to be called by the same thread
0467   registry.watchPreModuleEventDelayedGet(this, &NVProfilerService::preModuleEventDelayedGet);
0468   registry.watchPostModuleEventDelayedGet(this, &NVProfilerService::postModuleEventDelayedGet);
0469 
0470   // these signal pair are guaranteed to be called by the same thread
0471   registry.watchPreEventReadFromSource(this, &NVProfilerService::preEventReadFromSource);
0472   registry.watchPostEventReadFromSource(this, &NVProfilerService::postEventReadFromSource);
0473 }
0474 
0475 NVProfilerService::~NVProfilerService() {
0476   for (unsigned int sid = 0; sid < stream_domain_.size(); ++sid) {
0477     nvtxDomainDestroy(stream_domain_[sid]);
0478   }
0479   nvtxDomainDestroy(global_domain_);
0480   cudaProfilerStop();
0481 }
0482 
0483 void NVProfilerService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0484   edm::ParameterSetDescription desc;
0485   desc.addUntracked<std::vector<std::string>>("highlightModules", {})->setComment("");
0486   desc.addUntracked<bool>("showModulePrefetching", false)
0487       ->setComment("Show the stack of dependencies that requested to run a module.");
0488   desc.addUntracked<bool>("skipFirstEvent", false)
0489       ->setComment(
0490           "Start profiling after the first event has completed.\nWith multiple streams, ignore transitions belonging "
0491           "to events started in parallel to the first event.\nRequires running nvprof with the '--profile-from-start "
0492           "off' option.");
0493   descriptions.add("NVProfilerService", desc);
0494   descriptions.setComment(R"(This Service provides CMSSW-aware annotations to nvprof/nvvm.
0495 
0496 Notes on nvprof options:
0497   - the option '--profile-from-start off' should be used if skipFirstEvent is True.
0498   - the option '--cpu-profiling on' currently results in cmsRun being stuck at the beginning of the job.
0499   - the option '--cpu-thread-tracing on' is not compatible with jemalloc, and should only be used with cmsRunGlibC.)");
0500 }
0501 
0502 void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
0503   std::stringstream out;
0504   out << "preallocate: " << bounds.maxNumberOfConcurrentRuns() << " concurrent runs, "
0505       << bounds.maxNumberOfConcurrentLuminosityBlocks() << " luminosity sections, " << bounds.maxNumberOfStreams()
0506       << " streams\nrunning on " << bounds.maxNumberOfThreads() << " threads";
0507   nvtxDomainMark(global_domain_, out.str().c_str());
0508 
0509   auto concurrentStreams = bounds.maxNumberOfStreams();
0510   // create the NVTX domains for per-EDM-stream transitions
0511   stream_domain_.resize(concurrentStreams);
0512   for (unsigned int sid = 0; sid < concurrentStreams; ++sid) {
0513     stream_domain_[sid] = nvtxDomainCreate(fmt::sprintf("EDM Stream %d", sid).c_str());
0514   }
0515 
0516   event_.resize(concurrentStreams);
0517   stream_modules_.resize(concurrentStreams);
0518   if (skipFirstEvent_) {
0519     globalFirstEventDone_ = false;
0520     std::vector<std::atomic<bool>> tmp(concurrentStreams);
0521     for (auto& element : tmp)
0522       std::atomic_init(&element, false);
0523     streamFirstEventDone_ = std::move(tmp);
0524   }
0525 }
0526 
0527 void NVProfilerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
0528                                     edm::ProcessContext const& context) {
0529   callgraph_.preBeginJob(pathsAndConsumes, context);
0530   nvtxDomainMark(global_domain_, "preBeginJob");
0531 
0532   // this assumes that preBeginJob is not called concurrently with the modules' beginJob method
0533   // or the preBeginJob for a subprocess
0534   unsigned int modules = callgraph_.size();
0535   global_modules_.resize(modules, nvtxInvalidRangeId);
0536   for (unsigned int sid = 0; sid < stream_modules_.size(); ++sid) {
0537     stream_modules_[sid].resize(modules, nvtxInvalidRangeId);
0538   }
0539 }
0540 
0541 void NVProfilerService::postBeginJob() {
0542   if (not skipFirstEvent_ or globalFirstEventDone_) {
0543     nvtxDomainMark(global_domain_, "postBeginJob");
0544   }
0545 }
0546 
0547 void NVProfilerService::postEndJob() {
0548   if (not skipFirstEvent_ or globalFirstEventDone_) {
0549     nvtxDomainMark(global_domain_, "postEndJob");
0550   }
0551 }
0552 
0553 void NVProfilerService::preSourceEvent(edm::StreamID sid) {
0554   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0555     nvtxDomainRangePush(stream_domain_[sid], "source");
0556   }
0557 }
0558 
0559 void NVProfilerService::postSourceEvent(edm::StreamID sid) {
0560   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0561     nvtxDomainRangePop(stream_domain_[sid]);
0562   }
0563 }
0564 
0565 void NVProfilerService::preSourceLumi(edm::LuminosityBlockIndex index) {
0566   if (not skipFirstEvent_ or globalFirstEventDone_) {
0567     nvtxDomainRangePush(global_domain_, "source lumi");
0568   }
0569 }
0570 
0571 void NVProfilerService::postSourceLumi(edm::LuminosityBlockIndex index) {
0572   if (not skipFirstEvent_ or globalFirstEventDone_) {
0573     nvtxDomainRangePop(global_domain_);
0574   }
0575 }
0576 
0577 void NVProfilerService::preSourceRun(edm::RunIndex index) {
0578   if (not skipFirstEvent_ or globalFirstEventDone_) {
0579     nvtxDomainRangePush(global_domain_, "source run");
0580   }
0581 }
0582 
0583 void NVProfilerService::postSourceRun(edm::RunIndex index) {
0584   if (not skipFirstEvent_ or globalFirstEventDone_) {
0585     nvtxDomainRangePop(global_domain_);
0586   }
0587 }
0588 
0589 void NVProfilerService::preOpenFile(std::string const& lfn) {
0590   if (not skipFirstEvent_ or globalFirstEventDone_) {
0591     nvtxDomainRangePush(global_domain_, ("open file "s + lfn).c_str());
0592   }
0593 }
0594 
0595 void NVProfilerService::postOpenFile(std::string const& lfn) {
0596   if (not skipFirstEvent_ or globalFirstEventDone_) {
0597     nvtxDomainRangePop(global_domain_);
0598   }
0599 }
0600 
0601 void NVProfilerService::preCloseFile(std::string const& lfn) {
0602   if (not skipFirstEvent_ or globalFirstEventDone_) {
0603     nvtxDomainRangePush(global_domain_, ("close file "s + lfn).c_str());
0604   }
0605 }
0606 
0607 void NVProfilerService::postCloseFile(std::string const& lfn) {
0608   if (not skipFirstEvent_ or globalFirstEventDone_) {
0609     nvtxDomainRangePop(global_domain_);
0610   }
0611 }
0612 
0613 void NVProfilerService::preModuleBeginStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0614   auto sid = sc.streamID();
0615   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0616     auto mid = mcc.moduleDescription()->id();
0617     auto const& label = mcc.moduleDescription()->moduleLabel();
0618     auto const& msg = label + " begin stream";
0619     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0620     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0621   }
0622 }
0623 
0624 void NVProfilerService::postModuleBeginStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0625   auto sid = sc.streamID();
0626   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0627     auto mid = mcc.moduleDescription()->id();
0628     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0629     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0630   }
0631 }
0632 
0633 void NVProfilerService::preModuleEndStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0634   auto sid = sc.streamID();
0635   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0636     auto mid = mcc.moduleDescription()->id();
0637     auto const& label = mcc.moduleDescription()->moduleLabel();
0638     auto const& msg = label + " end stream";
0639     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0640     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0641   }
0642 }
0643 
0644 void NVProfilerService::postModuleEndStream(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0645   auto sid = sc.streamID();
0646   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0647     auto mid = mcc.moduleDescription()->id();
0648     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0649     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0650   }
0651 }
0652 
0653 void NVProfilerService::preGlobalBeginRun(edm::GlobalContext const& gc) {
0654   if (not skipFirstEvent_ or globalFirstEventDone_) {
0655     nvtxDomainRangePush(global_domain_, "global begin run");
0656   }
0657 }
0658 
0659 void NVProfilerService::postGlobalBeginRun(edm::GlobalContext const& gc) {
0660   if (not skipFirstEvent_ or globalFirstEventDone_) {
0661     nvtxDomainRangePop(global_domain_);
0662   }
0663 }
0664 
0665 void NVProfilerService::preGlobalEndRun(edm::GlobalContext const& gc) {
0666   if (not skipFirstEvent_ or globalFirstEventDone_) {
0667     nvtxDomainRangePush(global_domain_, "global end run");
0668   }
0669 }
0670 
0671 void NVProfilerService::postGlobalEndRun(edm::GlobalContext const& gc) {
0672   if (not skipFirstEvent_ or globalFirstEventDone_) {
0673     nvtxDomainRangePop(global_domain_);
0674   }
0675 }
0676 
0677 void NVProfilerService::preStreamBeginRun(edm::StreamContext const& sc) {
0678   auto sid = sc.streamID();
0679   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0680     nvtxDomainRangePush(stream_domain_[sid], "stream begin run");
0681   }
0682 }
0683 
0684 void NVProfilerService::postStreamBeginRun(edm::StreamContext const& sc) {
0685   auto sid = sc.streamID();
0686   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0687     nvtxDomainRangePop(stream_domain_[sid]);
0688   }
0689 }
0690 
0691 void NVProfilerService::preStreamEndRun(edm::StreamContext const& sc) {
0692   auto sid = sc.streamID();
0693   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0694     nvtxDomainRangePush(stream_domain_[sid], "stream end run");
0695   }
0696 }
0697 
0698 void NVProfilerService::postStreamEndRun(edm::StreamContext const& sc) {
0699   auto sid = sc.streamID();
0700   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0701     nvtxDomainRangePop(stream_domain_[sid]);
0702   }
0703 }
0704 
0705 void NVProfilerService::preGlobalBeginLumi(edm::GlobalContext const& gc) {
0706   if (not skipFirstEvent_ or globalFirstEventDone_) {
0707     nvtxDomainRangePush(global_domain_, "global begin lumi");
0708   }
0709 }
0710 
0711 void NVProfilerService::postGlobalBeginLumi(edm::GlobalContext const& gc) {
0712   if (not skipFirstEvent_ or globalFirstEventDone_) {
0713     nvtxDomainRangePop(global_domain_);
0714   }
0715 }
0716 
0717 void NVProfilerService::preGlobalEndLumi(edm::GlobalContext const& gc) {
0718   if (not skipFirstEvent_ or globalFirstEventDone_) {
0719     nvtxDomainRangePush(global_domain_, "global end lumi");
0720   }
0721 }
0722 
0723 void NVProfilerService::postGlobalEndLumi(edm::GlobalContext const& gc) {
0724   if (not skipFirstEvent_ or globalFirstEventDone_) {
0725     nvtxDomainRangePop(global_domain_);
0726   }
0727 }
0728 
0729 void NVProfilerService::preStreamBeginLumi(edm::StreamContext const& sc) {
0730   auto sid = sc.streamID();
0731   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0732     nvtxDomainRangePush(stream_domain_[sid], "stream begin lumi");
0733   }
0734 }
0735 
0736 void NVProfilerService::postStreamBeginLumi(edm::StreamContext const& sc) {
0737   auto sid = sc.streamID();
0738   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0739     nvtxDomainRangePop(stream_domain_[sid]);
0740   }
0741 }
0742 
0743 void NVProfilerService::preStreamEndLumi(edm::StreamContext const& sc) {
0744   auto sid = sc.streamID();
0745   nvtxDomainRangePush(stream_domain_[sid], "stream end lumi");
0746 }
0747 
0748 void NVProfilerService::postStreamEndLumi(edm::StreamContext const& sc) {
0749   auto sid = sc.streamID();
0750   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0751     nvtxDomainRangePop(stream_domain_[sid]);
0752   }
0753 }
0754 
0755 void NVProfilerService::preEvent(edm::StreamContext const& sc) {
0756   auto sid = sc.streamID();
0757   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0758     event_[sid] = nvtxDomainRangeStartColor(stream_domain_[sid], "event", nvtxDarkGreen);
0759   }
0760 }
0761 
0762 void NVProfilerService::postEvent(edm::StreamContext const& sc) {
0763   auto sid = sc.streamID();
0764   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0765     nvtxDomainRangeEnd(stream_domain_[sid], event_[sid]);
0766     event_[sid] = nvtxInvalidRangeId;
0767   } else {
0768     streamFirstEventDone_[sid] = true;
0769     auto identity = [](bool x) { return x; };
0770     if (std::all_of(streamFirstEventDone_.begin(), streamFirstEventDone_.end(), identity)) {
0771       bool expected = false;
0772       if (globalFirstEventDone_.compare_exchange_strong(expected, true))
0773         cudaProfilerStart();
0774     }
0775   }
0776 }
0777 
0778 void NVProfilerService::prePathEvent(edm::StreamContext const& sc, edm::PathContext const& pc) {
0779   auto sid = sc.streamID();
0780   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0781     nvtxDomainMark(global_domain_, ("before path "s + pc.pathName()).c_str());
0782   }
0783 }
0784 
0785 void NVProfilerService::postPathEvent(edm::StreamContext const& sc,
0786                                       edm::PathContext const& pc,
0787                                       edm::HLTPathStatus const& hlts) {
0788   auto sid = sc.streamID();
0789   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0790     nvtxDomainMark(global_domain_, ("after path "s + pc.pathName()).c_str());
0791   }
0792 }
0793 
0794 void NVProfilerService::preModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0795   auto sid = sc.streamID();
0796   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0797     auto mid = mcc.moduleDescription()->id();
0798     auto const& label = mcc.moduleDescription()->moduleLabel();
0799     auto const& msg = label + " prefetching";
0800     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0801     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColorLight(label));
0802   }
0803 }
0804 
0805 void NVProfilerService::postModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0806   auto sid = sc.streamID();
0807   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0808     auto mid = mcc.moduleDescription()->id();
0809     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0810     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0811   }
0812 }
0813 
0814 void NVProfilerService::preModuleConstruction(edm::ModuleDescription const& desc) {
0815   if (not skipFirstEvent_) {
0816     auto mid = desc.id();
0817     global_modules_.grow_to_at_least(mid + 1);
0818     auto const& label = desc.moduleLabel();
0819     auto const& msg = label + " construction";
0820     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0821   }
0822 }
0823 
0824 void NVProfilerService::postModuleConstruction(edm::ModuleDescription const& desc) {
0825   if (not skipFirstEvent_) {
0826     auto mid = desc.id();
0827     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0828     global_modules_[mid] = nvtxInvalidRangeId;
0829   }
0830 }
0831 
0832 void NVProfilerService::preModuleDestruction(edm::ModuleDescription const& desc) {
0833   if (not skipFirstEvent_) {
0834     auto mid = desc.id();
0835     global_modules_.grow_to_at_least(mid + 1);
0836     auto const& label = desc.moduleLabel();
0837     auto const& msg = label + " destruction";
0838     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0839   }
0840 }
0841 
0842 void NVProfilerService::postModuleDestruction(edm::ModuleDescription const& desc) {
0843   if (not skipFirstEvent_) {
0844     auto mid = desc.id();
0845     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0846     global_modules_[mid] = nvtxInvalidRangeId;
0847   }
0848 }
0849 
0850 void NVProfilerService::preModuleBeginJob(edm::ModuleDescription const& desc) {
0851   if (not skipFirstEvent_) {
0852     auto mid = desc.id();
0853     auto const& label = desc.moduleLabel();
0854     auto const& msg = label + " begin job";
0855     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0856   }
0857 }
0858 
0859 void NVProfilerService::postModuleBeginJob(edm::ModuleDescription const& desc) {
0860   if (not skipFirstEvent_) {
0861     auto mid = desc.id();
0862     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0863     global_modules_[mid] = nvtxInvalidRangeId;
0864   }
0865 }
0866 
0867 void NVProfilerService::preModuleEndJob(edm::ModuleDescription const& desc) {
0868   if (not skipFirstEvent_ or globalFirstEventDone_) {
0869     auto mid = desc.id();
0870     auto const& label = desc.moduleLabel();
0871     auto const& msg = label + " end job";
0872     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
0873   }
0874 }
0875 
0876 void NVProfilerService::postModuleEndJob(edm::ModuleDescription const& desc) {
0877   if (not skipFirstEvent_ or globalFirstEventDone_) {
0878     auto mid = desc.id();
0879     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
0880     global_modules_[mid] = nvtxInvalidRangeId;
0881   }
0882 }
0883 
0884 void NVProfilerService::preModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0885   auto sid = sc.streamID();
0886   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0887     auto mid = mcc.moduleDescription()->id();
0888     auto const& label = mcc.moduleDescription()->moduleLabel();
0889     auto const& msg = label + " acquire";
0890     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0891     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0892   }
0893 }
0894 
0895 void NVProfilerService::postModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0896   auto sid = sc.streamID();
0897   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0898     auto mid = mcc.moduleDescription()->id();
0899     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0900     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0901   }
0902 }
0903 
0904 void NVProfilerService::preModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0905   auto sid = sc.streamID();
0906   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0907     auto mid = mcc.moduleDescription()->id();
0908     auto const& label = mcc.moduleDescription()->moduleLabel();
0909     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0910     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], label.c_str(), labelColor(label));
0911   }
0912 }
0913 
0914 void NVProfilerService::postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0915   auto sid = sc.streamID();
0916   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0917     auto mid = mcc.moduleDescription()->id();
0918     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0919     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0920   }
0921 }
0922 
0923 void NVProfilerService::preModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0924   /* FIXME
0925   auto sid = sc.streamID();
0926   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0927     auto mid = mcc.moduleDescription()->id();
0928     auto const & label = mcc.moduleDescription()->moduleLabel();
0929     auto const & msg = label + " delayed get";
0930     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0931     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], label.c_str(), labelColorLight(label));
0932   }
0933   */
0934 }
0935 
0936 void NVProfilerService::postModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0937   /* FIXME
0938   auto sid = sc.streamID();
0939   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0940     auto mid = mcc.moduleDescription()->id();
0941     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0942     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0943   }
0944   */
0945 }
0946 
0947 void NVProfilerService::preEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0948   /* FIXME
0949   auto sid = sc.streamID();
0950   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0951     auto mid = mcc.moduleDescription()->id();
0952     auto const & label = mcc.moduleDescription()->moduleLabel();
0953     auto const & msg = label + " read from source";
0954     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0955     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColorLight(label));
0956   }
0957   */
0958 }
0959 
0960 void NVProfilerService::postEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0961   /* FIXME
0962   auto sid = sc.streamID();
0963   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0964     auto mid = mcc.moduleDescription()->id();
0965     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0966     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0967   }
0968   */
0969 }
0970 
0971 void NVProfilerService::preModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0972   auto sid = sc.streamID();
0973   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0974     auto mid = mcc.moduleDescription()->id();
0975     auto const& label = mcc.moduleDescription()->moduleLabel();
0976     auto const& msg = label + " stream begin run";
0977     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0978     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0979   }
0980 }
0981 
0982 void NVProfilerService::postModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0983   auto sid = sc.streamID();
0984   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0985     auto mid = mcc.moduleDescription()->id();
0986     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
0987     stream_modules_[sid][mid] = nvtxInvalidRangeId;
0988   }
0989 }
0990 
0991 void NVProfilerService::preModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
0992   auto sid = sc.streamID();
0993   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
0994     auto mid = mcc.moduleDescription()->id();
0995     auto const& label = mcc.moduleDescription()->moduleLabel();
0996     auto const& msg = label + " stream end run";
0997     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
0998     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
0999   }
1000 }
1001 
1002 void NVProfilerService::postModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1003   auto sid = sc.streamID();
1004   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1005     auto mid = mcc.moduleDescription()->id();
1006     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
1007     stream_modules_[sid][mid] = nvtxInvalidRangeId;
1008   }
1009 }
1010 
1011 void NVProfilerService::preModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1012   auto sid = sc.streamID();
1013   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1014     auto mid = mcc.moduleDescription()->id();
1015     auto const& label = mcc.moduleDescription()->moduleLabel();
1016     auto const& msg = label + " stream begin lumi";
1017     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
1018     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
1019   }
1020 }
1021 
1022 void NVProfilerService::postModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1023   auto sid = sc.streamID();
1024   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1025     auto mid = mcc.moduleDescription()->id();
1026     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
1027     stream_modules_[sid][mid] = nvtxInvalidRangeId;
1028   }
1029 }
1030 
1031 void NVProfilerService::preModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1032   auto sid = sc.streamID();
1033   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1034     auto mid = mcc.moduleDescription()->id();
1035     auto const& label = mcc.moduleDescription()->moduleLabel();
1036     auto const& msg = label + " stream end lumi";
1037     assert(stream_modules_[sid][mid] == nvtxInvalidRangeId);
1038     stream_modules_[sid][mid] = nvtxDomainRangeStartColor(stream_domain_[sid], msg.c_str(), labelColor(label));
1039   }
1040 }
1041 
1042 void NVProfilerService::postModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1043   auto sid = sc.streamID();
1044   if (not skipFirstEvent_ or streamFirstEventDone_[sid]) {
1045     auto mid = mcc.moduleDescription()->id();
1046     nvtxDomainRangeEnd(stream_domain_[sid], stream_modules_[sid][mid]);
1047     stream_modules_[sid][mid] = nvtxInvalidRangeId;
1048   }
1049 }
1050 
1051 void NVProfilerService::preModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1052   if (not skipFirstEvent_ or globalFirstEventDone_) {
1053     auto mid = mcc.moduleDescription()->id();
1054     auto const& label = mcc.moduleDescription()->moduleLabel();
1055     auto const& msg = label + " global begin run";
1056     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1057   }
1058 }
1059 
1060 void NVProfilerService::postModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1061   if (not skipFirstEvent_ or globalFirstEventDone_) {
1062     auto mid = mcc.moduleDescription()->id();
1063     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1064     global_modules_[mid] = nvtxInvalidRangeId;
1065   }
1066 }
1067 
1068 void NVProfilerService::preModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1069   if (not skipFirstEvent_ or globalFirstEventDone_) {
1070     auto mid = mcc.moduleDescription()->id();
1071     auto const& label = mcc.moduleDescription()->moduleLabel();
1072     auto const& msg = label + " global end run";
1073     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1074   }
1075 }
1076 
1077 void NVProfilerService::postModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1078   if (not skipFirstEvent_ or globalFirstEventDone_) {
1079     auto mid = mcc.moduleDescription()->id();
1080     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1081     global_modules_[mid] = nvtxInvalidRangeId;
1082   }
1083 }
1084 
1085 void NVProfilerService::preModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1086   if (not skipFirstEvent_ or globalFirstEventDone_) {
1087     auto mid = mcc.moduleDescription()->id();
1088     auto const& label = mcc.moduleDescription()->moduleLabel();
1089     auto const& msg = label + " global begin lumi";
1090     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1091   }
1092 }
1093 
1094 void NVProfilerService::postModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1095   if (not skipFirstEvent_ or globalFirstEventDone_) {
1096     auto mid = mcc.moduleDescription()->id();
1097     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1098     global_modules_[mid] = nvtxInvalidRangeId;
1099   }
1100 }
1101 
1102 void NVProfilerService::preModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1103   if (not skipFirstEvent_ or globalFirstEventDone_) {
1104     auto mid = mcc.moduleDescription()->id();
1105     auto const& label = mcc.moduleDescription()->moduleLabel();
1106     auto const& msg = label + " global end lumi";
1107     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1108   }
1109 }
1110 
1111 void NVProfilerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1112   if (not skipFirstEvent_ or globalFirstEventDone_) {
1113     auto mid = mcc.moduleDescription()->id();
1114     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1115     global_modules_[mid] = nvtxInvalidRangeId;
1116   }
1117 }
1118 
1119 void NVProfilerService::preSourceConstruction(edm::ModuleDescription const& desc) {
1120   callgraph_.preSourceConstruction(desc);
1121 
1122   if (not skipFirstEvent_) {
1123     auto mid = desc.id();
1124     global_modules_.grow_to_at_least(mid + 1);
1125     auto const& label = desc.moduleLabel();
1126     auto const& msg = label + " construction";
1127     global_modules_[mid] = nvtxDomainRangeStartColor(global_domain_, msg.c_str(), labelColor(label));
1128   }
1129 }
1130 
1131 void NVProfilerService::postSourceConstruction(edm::ModuleDescription const& desc) {
1132   if (not skipFirstEvent_) {
1133     auto mid = desc.id();
1134     nvtxDomainRangeEnd(global_domain_, global_modules_[mid]);
1135     global_modules_[mid] = nvtxInvalidRangeId;
1136   }
1137 }
1138 
1139 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
1140 DEFINE_FWK_SERVICE(NVProfilerService);