Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-05-07 00:41:25

0001 // C++ headers
0002 #include <cmath>
0003 #include <fstream>
0004 #include <iomanip>
0005 #include <iostream>
0006 #include <limits>
0007 #include <mutex>
0008 #include <sstream>
0009 #include <string>
0010 #include <unordered_map>
0011 #include <unordered_set>
0012 
0013 // boost headers
0014 #include <boost/range/irange.hpp>
0015 
0016 // {fmt} headers
0017 #include <fmt/printf.h>
0018 
0019 // JSON headers
0020 #include <nlohmann/json.hpp>
0021 using json = nlohmann::json;
0022 
0023 // CMSSW headers
0024 #include "DQMServices/Core/interface/DQMStore.h"
0025 #include "DataFormats/Common/interface/HLTPathStatus.h"
0026 #include "DataFormats/Provenance/interface/EventID.h"
0027 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0028 #include "DataFormats/Provenance/interface/Timestamp.h"
0029 #include "DataFormats/Scalers/interface/LumiScalers.h"
0030 #include "FWCore/Framework/interface/Event.h"
0031 #include "FWCore/Framework/interface/LuminosityBlock.h"
0032 #include "FWCore/Framework/interface/TriggerNamesService.h"
0033 #include "FWCore/ServiceRegistry/interface/PlaceInPathContext.h"
0034 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0035 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0036 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0037 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0038 #include "FWCore/Utilities/interface/Exception.h"
0039 #include "FWCore/Utilities/interface/StreamID.h"
0040 #include "HLTrigger/Timer/interface/memory_usage.h"
0041 #include "HLTrigger/Timer/interface/processor_model.h"
0042 #include "FastTimerService.h"
0043 
0044 using namespace std::literals;
0045 
0046 ///////////////////////////////////////////////////////////////////////////////
0047 
0048 namespace {
0049 
0050   // convert any std::chrono::duration to milliseconds
0051   template <class Rep, class Period>
0052   double ms(std::chrono::duration<Rep, Period> duration) {
0053     return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(duration).count();
0054   }
0055 
0056   // convert any boost::chrono::duration to milliseconds
0057   template <class Rep, class Period>
0058   double ms(boost::chrono::duration<Rep, Period> duration) {
0059     return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(duration).count();
0060   }
0061 
0062   // convert a std::atomic<boost::chrono::nanoseconds::rep> to milliseconds
0063   double ms(std::atomic<boost::chrono::nanoseconds::rep> const& duration) {
0064     return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(
0065                boost::chrono::nanoseconds(duration.load()))
0066         .count();
0067   }
0068 
0069   // convert from bytes to kilobytes, rounding down
0070   uint64_t kB(uint64_t bytes) { return bytes / 1024; }
0071 
0072   // convert from bytes to kilobytes, rounding down
0073   uint64_t kB(std::atomic<uint64_t> const& bytes) { return bytes.load() / 1024; }
0074 
0075 }  // namespace
0076 
0077 ///////////////////////////////////////////////////////////////////////////////
0078 
0079 // resources being monitored by the service
0080 
0081 // Resources
0082 
0083 FastTimerService::Resources::Resources()
0084     : time_thread(boost::chrono::nanoseconds::zero()),
0085       time_real(boost::chrono::nanoseconds::zero()),
0086       allocated(0ul),
0087       deallocated(0ul) {}
0088 
0089 void FastTimerService::Resources::reset() {
0090   time_thread = boost::chrono::nanoseconds::zero();
0091   time_real = boost::chrono::nanoseconds::zero();
0092   allocated = 0ul;
0093   deallocated = 0ul;
0094 }
0095 
0096 FastTimerService::Resources& FastTimerService::Resources::operator+=(Resources const& other) {
0097   time_thread += other.time_thread;
0098   time_real += other.time_real;
0099   allocated += other.allocated;
0100   deallocated += other.deallocated;
0101   return *this;
0102 }
0103 
0104 FastTimerService::Resources& FastTimerService::Resources::operator+=(AtomicResources const& other) {
0105   time_thread += boost::chrono::nanoseconds(other.time_thread.load());
0106   time_real += boost::chrono::nanoseconds(other.time_real.load());
0107   allocated += other.allocated.load();
0108   deallocated += other.deallocated.load();
0109   return *this;
0110 }
0111 
0112 FastTimerService::Resources FastTimerService::Resources::operator+(Resources const& other) const {
0113   Resources result(*this);
0114   result += other;
0115   return result;
0116 }
0117 
0118 FastTimerService::Resources FastTimerService::Resources::operator+(AtomicResources const& other) const {
0119   Resources result(*this);
0120   result += other;
0121   return result;
0122 }
0123 
0124 // AtomicResources
0125 // operation on the whole object are not atomic, as the operations
0126 // on the individual fields could be interleaved; however, accumulation
0127 // of results should yield the correct result.
0128 
0129 FastTimerService::AtomicResources::AtomicResources()
0130     : time_thread(0ul), time_real(0ul), allocated(0ul), deallocated(0ul) {}
0131 
0132 FastTimerService::AtomicResources::AtomicResources(AtomicResources const& other)
0133     : time_thread(other.time_thread.load()),
0134       time_real(other.time_real.load()),
0135       allocated(other.allocated.load()),
0136       deallocated(other.deallocated.load()) {}
0137 
0138 void FastTimerService::AtomicResources::reset() {
0139   time_thread = 0ul;
0140   time_real = 0ul;
0141   allocated = 0ul;
0142   deallocated = 0ul;
0143 }
0144 
0145 FastTimerService::AtomicResources& FastTimerService::AtomicResources::operator=(AtomicResources const& other) {
0146   time_thread = other.time_thread.load();
0147   time_real = other.time_real.load();
0148   allocated = other.allocated.load();
0149   deallocated = other.deallocated.load();
0150   return *this;
0151 }
0152 
0153 FastTimerService::AtomicResources& FastTimerService::AtomicResources::operator+=(AtomicResources const& other) {
0154   time_thread += other.time_thread.load();
0155   time_real += other.time_real.load();
0156   allocated += other.allocated.load();
0157   deallocated += other.deallocated.load();
0158   return *this;
0159 }
0160 
0161 FastTimerService::AtomicResources& FastTimerService::AtomicResources::operator+=(Resources const& other) {
0162   time_thread += other.time_thread.count();
0163   time_real += other.time_real.count();
0164   allocated += other.allocated;
0165   deallocated += other.deallocated;
0166   return *this;
0167 }
0168 
0169 FastTimerService::AtomicResources FastTimerService::AtomicResources::operator+(AtomicResources const& other) const {
0170   AtomicResources result(*this);
0171   result += other;
0172   return result;
0173 }
0174 
0175 FastTimerService::Resources FastTimerService::AtomicResources::operator+(Resources const& other) const {
0176   return other + *this;
0177 }
0178 
0179 // ResourcesPerModule
0180 
0181 FastTimerService::ResourcesPerModule::ResourcesPerModule() noexcept : total(), events(0), has_acquire(false) {}
0182 
0183 void FastTimerService::ResourcesPerModule::reset() noexcept {
0184   total.reset();
0185   events = 0;
0186   has_acquire = false;
0187 }
0188 
0189 FastTimerService::ResourcesPerModule& FastTimerService::ResourcesPerModule::operator+=(ResourcesPerModule const& other) {
0190   total += other.total;
0191   events += other.events;
0192   has_acquire = has_acquire or other.has_acquire;
0193   return *this;
0194 }
0195 
0196 FastTimerService::ResourcesPerModule FastTimerService::ResourcesPerModule::operator+(
0197     ResourcesPerModule const& other) const {
0198   ResourcesPerModule result(*this);
0199   result += other;
0200   return result;
0201 }
0202 
0203 // ResourcesPerPath
0204 
0205 void FastTimerService::ResourcesPerPath::reset() {
0206   active.reset();
0207   total.reset();
0208   last = 0;
0209   status = false;
0210 }
0211 
0212 FastTimerService::ResourcesPerPath& FastTimerService::ResourcesPerPath::operator+=(ResourcesPerPath const& other) {
0213   active += other.active;
0214   total += other.total;
0215   last = 0;  // summing these makes no sense, reset them instead
0216   status = false;
0217   return *this;
0218 }
0219 
0220 FastTimerService::ResourcesPerPath FastTimerService::ResourcesPerPath::operator+(ResourcesPerPath const& other) const {
0221   ResourcesPerPath result(*this);
0222   result += other;
0223   return result;
0224 }
0225 
0226 // ResourcesPerProcess
0227 
0228 FastTimerService::ResourcesPerProcess::ResourcesPerProcess(ProcessCallGraph::ProcessType const& process)
0229     : total(), paths(process.paths_.size()), endpaths(process.endPaths_.size()) {}
0230 
0231 void FastTimerService::ResourcesPerProcess::reset() {
0232   total.reset();
0233   for (auto& path : paths)
0234     path.reset();
0235   for (auto& path : endpaths)
0236     path.reset();
0237 }
0238 
0239 FastTimerService::ResourcesPerProcess& FastTimerService::ResourcesPerProcess::operator+=(
0240     ResourcesPerProcess const& other) {
0241   total += other.total;
0242   assert(paths.size() == other.paths.size());
0243   for (unsigned int i : boost::irange(0ul, paths.size()))
0244     paths[i] += other.paths[i];
0245   assert(endpaths.size() == other.endpaths.size());
0246   for (unsigned int i : boost::irange(0ul, endpaths.size()))
0247     endpaths[i] += other.endpaths[i];
0248   return *this;
0249 }
0250 
0251 FastTimerService::ResourcesPerProcess FastTimerService::ResourcesPerProcess::operator+(
0252     ResourcesPerProcess const& other) const {
0253   ResourcesPerProcess result(*this);
0254   result += other;
0255   return result;
0256 }
0257 
0258 // ResourcesPerJob
0259 
0260 FastTimerService::ResourcesPerJob::ResourcesPerJob(ProcessCallGraph const& job,
0261                                                    std::vector<GroupOfModules> const& groups)
0262     : total(), overhead(), eventsetup(), event(), highlight(groups.size()), modules(job.size()), processes(), events(0) {
0263   processes.reserve(job.processes().size());
0264   for (auto const& process : job.processes())
0265     processes.emplace_back(process);
0266 }
0267 
0268 void FastTimerService::ResourcesPerJob::reset() {
0269   total.reset();
0270   overhead.reset();
0271   eventsetup.reset();
0272   event.reset();
0273   for (auto& module : highlight)
0274     module.reset();
0275   for (auto& module : modules)
0276     module.reset();
0277   for (auto& process : processes)
0278     process.reset();
0279   events = 0;
0280 }
0281 
0282 FastTimerService::ResourcesPerJob& FastTimerService::ResourcesPerJob::operator+=(ResourcesPerJob const& other) {
0283   total += other.total;
0284   overhead += other.overhead;
0285   eventsetup += other.eventsetup;
0286   event += other.event;
0287   assert(highlight.size() == other.highlight.size());
0288   for (unsigned int i : boost::irange(0ul, highlight.size()))
0289     highlight[i] += other.highlight[i];
0290   assert(modules.size() == other.modules.size());
0291   for (unsigned int i : boost::irange(0ul, modules.size()))
0292     modules[i] += other.modules[i];
0293   assert(processes.size() == other.processes.size());
0294   for (unsigned int i : boost::irange(0ul, processes.size()))
0295     processes[i] += other.processes[i];
0296   events += other.events;
0297   return *this;
0298 }
0299 
0300 FastTimerService::ResourcesPerJob FastTimerService::ResourcesPerJob::operator+(ResourcesPerJob const& other) const {
0301   ResourcesPerJob result(*this);
0302   result += other;
0303   return result;
0304 }
0305 
0306 // per-thread measurements
0307 
0308 // Measurement
0309 
0310 FastTimerService::Measurement::Measurement() noexcept { measure(); }
0311 
0312 void FastTimerService::Measurement::measure() noexcept {
0313 #ifdef DEBUG_THREAD_CONCURRENCY
0314   id = std::this_thread::get_id();
0315 #endif  // DEBUG_THREAD_CONCURRENCY
0316   time_thread = boost::chrono::thread_clock::now();
0317   time_real = boost::chrono::high_resolution_clock::now();
0318   allocated = memory_usage::allocated();
0319   deallocated = memory_usage::deallocated();
0320 }
0321 
0322 void FastTimerService::Measurement::measure_and_store(Resources& store) noexcept {
0323 #ifdef DEBUG_THREAD_CONCURRENCY
0324   assert(std::this_thread::get_id() == id);
0325 #endif  // DEBUG_THREAD_CONCURRENCY
0326   auto new_time_thread = boost::chrono::thread_clock::now();
0327   auto new_time_real = boost::chrono::high_resolution_clock::now();
0328   auto new_allocated = memory_usage::allocated();
0329   auto new_deallocated = memory_usage::deallocated();
0330   store.time_thread = new_time_thread - time_thread;
0331   store.time_real = new_time_real - time_real;
0332   store.allocated = new_allocated - allocated;
0333   store.deallocated = new_deallocated - deallocated;
0334   time_thread = new_time_thread;
0335   time_real = new_time_real;
0336   allocated = new_allocated;
0337   deallocated = new_deallocated;
0338 }
0339 
0340 void FastTimerService::Measurement::measure_and_accumulate(Resources& store) noexcept {
0341 #ifdef DEBUG_THREAD_CONCURRENCY
0342   assert(std::this_thread::get_id() == id);
0343 #endif  // DEBUG_THREAD_CONCURRENCY
0344   auto new_time_thread = boost::chrono::thread_clock::now();
0345   auto new_time_real = boost::chrono::high_resolution_clock::now();
0346   auto new_allocated = memory_usage::allocated();
0347   auto new_deallocated = memory_usage::deallocated();
0348   store.time_thread += new_time_thread - time_thread;
0349   store.time_real += new_time_real - time_real;
0350   store.allocated += new_allocated - allocated;
0351   store.deallocated += new_deallocated - deallocated;
0352   time_thread = new_time_thread;
0353   time_real = new_time_real;
0354   allocated = new_allocated;
0355   deallocated = new_deallocated;
0356 }
0357 
0358 void FastTimerService::Measurement::measure_and_accumulate(AtomicResources& store) noexcept {
0359 #ifdef DEBUG_THREAD_CONCURRENCY
0360   assert(std::this_thread::get_id() == id);
0361 #endif  // DEBUG_THREAD_CONCURRENCY
0362   auto new_time_thread = boost::chrono::thread_clock::now();
0363   auto new_time_real = boost::chrono::high_resolution_clock::now();
0364   auto new_allocated = memory_usage::allocated();
0365   auto new_deallocated = memory_usage::deallocated();
0366   store.time_thread += boost::chrono::duration_cast<boost::chrono::nanoseconds>(new_time_thread - time_thread).count();
0367   store.time_real += boost::chrono::duration_cast<boost::chrono::nanoseconds>(new_time_real - time_real).count();
0368   store.allocated += new_allocated - allocated;
0369   store.deallocated += new_deallocated - deallocated;
0370   time_thread = new_time_thread;
0371   time_real = new_time_real;
0372   allocated = new_allocated;
0373   deallocated = new_deallocated;
0374 }
0375 
0376 ///////////////////////////////////////////////////////////////////////////////
0377 
0378 void FastTimerService::PlotsPerElement::book(dqm::reco::DQMStore::IBooker& booker,
0379                                              std::string const& name,
0380                                              std::string const& title,
0381                                              PlotRanges const& ranges,
0382                                              unsigned int lumisections,
0383                                              bool byls) {
0384   int time_bins = (int)std::ceil(ranges.time_range / ranges.time_resolution);
0385   int mem_bins = (int)std::ceil(ranges.memory_range / ranges.memory_resolution);
0386   std::string y_title_ms = fmt::sprintf("events / %.1f ms", ranges.time_resolution);
0387   std::string y_title_kB = fmt::sprintf("events / %.1f kB", ranges.memory_resolution);
0388 
0389   // MonitorElement::setStatOverflows(kTRUE) includes underflows and overflows in the computation of mean and RMS
0390   time_thread_ =
0391       booker.book1D(name + " time_thread", title + " processing time (cpu)", time_bins, 0., ranges.time_range);
0392   time_thread_->setXTitle("processing time [ms]");
0393   time_thread_->setYTitle(y_title_ms);
0394   time_thread_->setStatOverflows(kTRUE);
0395 
0396   time_real_ = booker.book1D(name + " time_real", title + " processing time (real)", time_bins, 0., ranges.time_range);
0397   time_real_->setXTitle("processing time [ms]");
0398   time_real_->setYTitle(y_title_ms);
0399   time_real_->setStatOverflows(kTRUE);
0400 
0401   if (memory_usage::is_available()) {
0402     allocated_ = booker.book1D(name + " allocated", title + " allocated memory", mem_bins, 0., ranges.memory_range);
0403     allocated_->setXTitle("memory [kB]");
0404     allocated_->setYTitle(y_title_kB);
0405     allocated_->setStatOverflows(kTRUE);
0406 
0407     deallocated_ =
0408         booker.book1D(name + " deallocated", title + " deallocated memory", mem_bins, 0., ranges.memory_range);
0409     deallocated_->setXTitle("memory [kB]");
0410     deallocated_->setYTitle(y_title_kB);
0411     deallocated_->setStatOverflows(kTRUE);
0412   }
0413 
0414   if (not byls)
0415     return;
0416 
0417   time_thread_byls_ = booker.bookProfile(name + " time_thread_byls",
0418                                          title + " processing time (cpu) vs. lumisection",
0419                                          lumisections,
0420                                          0.5,
0421                                          lumisections + 0.5,
0422                                          time_bins,
0423                                          0.,
0424                                          std::numeric_limits<double>::infinity(),
0425                                          " ");
0426   time_thread_byls_->setXTitle("lumisection");
0427   time_thread_byls_->setYTitle("processing time [ms]");
0428   time_thread_byls_->setStatOverflows(kTRUE);
0429 
0430   time_real_byls_ = booker.bookProfile(name + " time_real_byls",
0431                                        title + " processing time (real) vs. lumisection",
0432                                        lumisections,
0433                                        0.5,
0434                                        lumisections + 0.5,
0435                                        time_bins,
0436                                        0.,
0437                                        std::numeric_limits<double>::infinity(),
0438                                        " ");
0439   time_real_byls_->setXTitle("lumisection");
0440   time_real_byls_->setYTitle("processing time [ms]");
0441   time_real_byls_->setStatOverflows(kTRUE);
0442 
0443   if (memory_usage::is_available()) {
0444     allocated_byls_ = booker.bookProfile(name + " allocated_byls",
0445                                          title + " allocated memory vs. lumisection",
0446                                          lumisections,
0447                                          0.5,
0448                                          lumisections + 0.5,
0449                                          mem_bins,
0450                                          0.,
0451                                          std::numeric_limits<double>::infinity(),
0452                                          " ");
0453     allocated_byls_->setXTitle("lumisection");
0454     allocated_byls_->setYTitle("memory [kB]");
0455     allocated_byls_->setStatOverflows(kTRUE);
0456 
0457     deallocated_byls_ = booker.bookProfile(name + " deallocated_byls",
0458                                            title + " deallocated memory vs. lumisection",
0459                                            lumisections,
0460                                            0.5,
0461                                            lumisections + 0.5,
0462                                            mem_bins,
0463                                            0.,
0464                                            std::numeric_limits<double>::infinity(),
0465                                            " ");
0466     deallocated_byls_->setXTitle("lumisection");
0467     deallocated_byls_->setYTitle("memory [kB]");
0468     deallocated_byls_->setStatOverflows(kTRUE);
0469   }
0470 }
0471 
0472 void FastTimerService::PlotsPerElement::fill(Resources const& data, unsigned int lumisection) {
0473   if (time_thread_)
0474     time_thread_->Fill(ms(data.time_thread));
0475 
0476   if (time_thread_byls_)
0477     time_thread_byls_->Fill(lumisection, ms(data.time_thread));
0478 
0479   if (time_real_)
0480     time_real_->Fill(ms(data.time_real));
0481 
0482   if (time_real_byls_)
0483     time_real_byls_->Fill(lumisection, ms(data.time_real));
0484 
0485   if (allocated_)
0486     allocated_->Fill(kB(data.allocated));
0487 
0488   if (allocated_byls_)
0489     allocated_byls_->Fill(lumisection, kB(data.allocated));
0490 
0491   if (deallocated_)
0492     deallocated_->Fill(kB(data.deallocated));
0493 
0494   if (deallocated_byls_)
0495     deallocated_byls_->Fill(lumisection, kB(data.deallocated));
0496 }
0497 
0498 void FastTimerService::PlotsPerElement::fill(AtomicResources const& data, unsigned int lumisection) {
0499   if (time_thread_)
0500     time_thread_->Fill(ms(boost::chrono::nanoseconds(data.time_thread.load())));
0501 
0502   if (time_thread_byls_)
0503     time_thread_byls_->Fill(lumisection, ms(boost::chrono::nanoseconds(data.time_thread.load())));
0504 
0505   if (time_real_)
0506     time_real_->Fill(ms(boost::chrono::nanoseconds(data.time_real.load())));
0507 
0508   if (time_real_byls_)
0509     time_real_byls_->Fill(lumisection, ms(boost::chrono::nanoseconds(data.time_real.load())));
0510 
0511   if (allocated_)
0512     allocated_->Fill(kB(data.allocated));
0513 
0514   if (allocated_byls_)
0515     allocated_byls_->Fill(lumisection, kB(data.allocated));
0516 
0517   if (deallocated_)
0518     deallocated_->Fill(kB(data.deallocated));
0519 
0520   if (deallocated_byls_)
0521     deallocated_byls_->Fill(lumisection, kB(data.deallocated));
0522 }
0523 
0524 void FastTimerService::PlotsPerElement::fill_fraction(Resources const& data,
0525                                                       Resources const& part,
0526                                                       unsigned int lumisection) {
0527   float total;
0528   float fraction;
0529 
0530   total = ms(data.time_thread);
0531   fraction = (total > 0.) ? (ms(part.time_thread) / total) : 0.;
0532   if (time_thread_)
0533     time_thread_->Fill(total, fraction);
0534 
0535   if (time_thread_byls_)
0536     time_thread_byls_->Fill(lumisection, total, fraction);
0537 
0538   total = ms(data.time_real);
0539   fraction = (total > 0.) ? (ms(part.time_real) / total) : 0.;
0540   if (time_real_)
0541     time_real_->Fill(total, fraction);
0542 
0543   if (time_real_byls_)
0544     time_real_byls_->Fill(lumisection, total, fraction);
0545 
0546   total = kB(data.allocated);
0547   fraction = (total > 0.) ? (kB(part.allocated) / total) : 0.;
0548   if (allocated_)
0549     allocated_->Fill(total, fraction);
0550 
0551   if (allocated_byls_)
0552     allocated_byls_->Fill(lumisection, total, fraction);
0553 
0554   total = kB(data.deallocated);
0555   fraction = (total > 0.) ? (kB(part.deallocated) / total) : 0.;
0556   if (deallocated_)
0557     deallocated_->Fill(total, fraction);
0558 
0559   if (deallocated_byls_)
0560     deallocated_byls_->Fill(lumisection, total, fraction);
0561 }
0562 
0563 void FastTimerService::PlotsPerPath::book(dqm::reco::DQMStore::IBooker& booker,
0564                                           std::string const& prefixDir,
0565                                           ProcessCallGraph const& job,
0566                                           ProcessCallGraph::PathType const& path,
0567                                           PlotRanges const& ranges,
0568                                           unsigned int lumisections,
0569                                           bool byls) {
0570   const std::string basedir = booker.pwd();
0571   booker.setCurrentFolder(basedir + "/" + prefixDir + path.name_);
0572 
0573   total_.book(booker, "path", path.name_, ranges, lumisections, byls);
0574 
0575   // MonitorElement::setStatOverflows(kTRUE) includes underflows and overflows in the computation of mean and RMS
0576   unsigned int bins = path.modules_and_dependencies_.size();
0577   module_counter_ = booker.book1DD("module_counter", "module counter", bins + 1, -0.5, bins + 0.5);
0578   module_counter_->setYTitle("events");
0579   module_counter_->setStatOverflows(kTRUE);
0580   module_time_thread_total_ =
0581       booker.book1DD("module_time_thread_total", "total module time (cpu)", bins, -0.5, bins - 0.5);
0582   module_time_thread_total_->setYTitle("processing time [ms]");
0583   module_time_thread_total_->setStatOverflows(kTRUE);
0584   module_time_real_total_ =
0585       booker.book1DD("module_time_real_total", "total module time (real)", bins, -0.5, bins - 0.5);
0586   module_time_real_total_->setYTitle("processing time [ms]");
0587   module_time_real_total_->setStatOverflows(kTRUE);
0588   if (memory_usage::is_available()) {
0589     module_allocated_total_ =
0590         booker.book1DD("module_allocated_total", "total allocated memory", bins, -0.5, bins - 0.5);
0591     module_allocated_total_->setYTitle("memory [kB]");
0592     module_allocated_total_->setStatOverflows(kTRUE);
0593     module_deallocated_total_ =
0594         booker.book1DD("module_deallocated_total", "total deallocated memory", bins, -0.5, bins - 0.5);
0595     module_deallocated_total_->setYTitle("memory [kB]");
0596     module_deallocated_total_->setStatOverflows(kTRUE);
0597   }
0598   for (unsigned int bin : boost::irange(0u, bins)) {
0599     auto const& module = job[path.modules_and_dependencies_[bin]];
0600     std::string const& label =
0601         module.scheduled_ ? module.module_.moduleLabel() : module.module_.moduleLabel() + " (unscheduled)";
0602     module_counter_->setBinLabel(bin + 1, label);
0603     module_time_thread_total_->setBinLabel(bin + 1, label);
0604     module_time_real_total_->setBinLabel(bin + 1, label);
0605     if (memory_usage::is_available()) {
0606       module_allocated_total_->setBinLabel(bin + 1, label);
0607       module_deallocated_total_->setBinLabel(bin + 1, label);
0608     }
0609   }
0610   module_counter_->setBinLabel(bins + 1, "");
0611 
0612   booker.setCurrentFolder(basedir);
0613 }
0614 
0615 void FastTimerService::PlotsPerPath::fill(ProcessCallGraph::PathType const& description,
0616                                           ResourcesPerJob const& data,
0617                                           ResourcesPerPath const& path,
0618                                           unsigned int ls) {
0619   // fill the total path time
0620   total_.fill(path.total, ls);
0621 
0622   // fill the modules that actually ran and the total time spent in each od them
0623   for (unsigned int i = 0; i < path.last; ++i) {
0624     auto const& module = data.modules[description.modules_and_dependencies_[i]];
0625     if (module_counter_)
0626       module_counter_->Fill(i);
0627 
0628     if (module_time_thread_total_)
0629       module_time_thread_total_->Fill(i, ms(module.total.time_thread));
0630 
0631     if (module_time_real_total_)
0632       module_time_real_total_->Fill(i, ms(module.total.time_real));
0633 
0634     if (module_allocated_total_)
0635       module_allocated_total_->Fill(i, kB(module.total.allocated));
0636 
0637     if (module_deallocated_total_)
0638       module_deallocated_total_->Fill(i, kB(module.total.deallocated));
0639   }
0640   if (module_counter_ and path.status)
0641     module_counter_->Fill(path.last);
0642 }
0643 
0644 FastTimerService::PlotsPerProcess::PlotsPerProcess(ProcessCallGraph::ProcessType const& process)
0645     : event_(), paths_(process.paths_.size()), endpaths_(process.endPaths_.size()) {}
0646 
0647 void FastTimerService::PlotsPerProcess::book(dqm::reco::DQMStore::IBooker& booker,
0648                                              ProcessCallGraph const& job,
0649                                              ProcessCallGraph::ProcessType const& process,
0650                                              PlotRanges const& event_ranges,
0651                                              PlotRanges const& path_ranges,
0652                                              unsigned int lumisections,
0653                                              bool bypath,
0654                                              bool byls) {
0655   const std::string basedir = booker.pwd();
0656   event_.book(booker, "process " + process.name_, "process " + process.name_, event_ranges, lumisections, byls);
0657   if (bypath) {
0658     booker.setCurrentFolder(basedir + "/process " + process.name_ + " paths");
0659     for (unsigned int id : boost::irange(0ul, paths_.size())) {
0660       paths_[id].book(booker, "path ", job, process.paths_[id], path_ranges, lumisections, byls);
0661     }
0662     for (unsigned int id : boost::irange(0ul, endpaths_.size())) {
0663       endpaths_[id].book(booker, "endpath ", job, process.endPaths_[id], path_ranges, lumisections, byls);
0664     }
0665     booker.setCurrentFolder(basedir);
0666   }
0667 }
0668 
0669 void FastTimerService::PlotsPerProcess::fill(ProcessCallGraph::ProcessType const& description,
0670                                              ResourcesPerJob const& data,
0671                                              ResourcesPerProcess const& process,
0672                                              unsigned int ls) {
0673   // fill process event plots
0674   event_.fill(process.total, ls);
0675 
0676   // fill all paths plots
0677   for (unsigned int id : boost::irange(0ul, paths_.size()))
0678     paths_[id].fill(description.paths_[id], data, process.paths[id], ls);
0679 
0680   // fill all endpaths plots
0681   for (unsigned int id : boost::irange(0ul, endpaths_.size()))
0682     endpaths_[id].fill(description.endPaths_[id], data, process.endpaths[id], ls);
0683 }
0684 
0685 FastTimerService::PlotsPerJob::PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups)
0686     : event_(), event_ex_(), overhead_(), highlight_(groups.size()), modules_(job.size()), processes_() {
0687   processes_.reserve(job.processes().size());
0688   for (auto const& process : job.processes())
0689     processes_.emplace_back(process);
0690 }
0691 
0692 void FastTimerService::PlotsPerJob::book(dqm::reco::DQMStore::IBooker& booker,
0693                                          ProcessCallGraph const& job,
0694                                          std::vector<GroupOfModules> const& groups,
0695                                          PlotRanges const& event_ranges,
0696                                          PlotRanges const& path_ranges,
0697                                          PlotRanges const& module_ranges,
0698                                          unsigned int lumisections,
0699                                          bool bymodule,
0700                                          bool bypath,
0701                                          bool byls,
0702                                          bool transitions) {
0703   const std::string basedir = booker.pwd();
0704 
0705   // event summary plots
0706   event_.book(booker, "event", "Event", event_ranges, lumisections, byls);
0707 
0708   event_ex_.book(booker, "explicit", "Event (explicit)", event_ranges, lumisections, byls);
0709 
0710   overhead_.book(booker, "overhead", "Overhead", event_ranges, lumisections, byls);
0711 
0712   modules_[job.source().id()].book(booker, "source", "Source", module_ranges, lumisections, byls);
0713 
0714   if (transitions) {
0715     lumi_.book(booker, "lumi", "LumiSection transitions", event_ranges, lumisections, byls);
0716 
0717     run_.book(booker, "run", "Run transtions", event_ranges, lumisections, false);
0718   }
0719 
0720   // plot the time spent in few given groups of modules
0721   for (unsigned int group : boost::irange(0ul, groups.size())) {
0722     auto const& label = groups[group].label;
0723     highlight_[group].book(booker, "highlight " + label, "Highlight " + label, event_ranges, lumisections, byls);
0724   }
0725 
0726   // plots per subprocess (event, modules, paths and endpaths)
0727   for (unsigned int pid : boost::irange(0ul, job.processes().size())) {
0728     auto const& process = job.processDescription(pid);
0729     processes_[pid].book(booker, job, process, event_ranges, path_ranges, lumisections, bypath, byls);
0730 
0731     if (bymodule) {
0732       booker.setCurrentFolder(basedir + "/process " + process.name_ + " modules");
0733       for (unsigned int id : process.modules_) {
0734         auto const& module_name = job.module(id).moduleLabel();
0735         modules_[id].book(booker, module_name, module_name, module_ranges, lumisections, byls);
0736       }
0737       booker.setCurrentFolder(basedir);
0738     }
0739   }
0740 }
0741 
0742 void FastTimerService::PlotsPerJob::fill(ProcessCallGraph const& job, ResourcesPerJob const& data, unsigned int ls) {
0743   // fill total event plots
0744   event_.fill(data.total, ls);
0745   event_ex_.fill(data.event, ls);
0746   overhead_.fill(data.overhead, ls);
0747 
0748   // fill highltight plots
0749   for (unsigned int group : boost::irange(0ul, highlight_.size()))
0750     highlight_[group].fill_fraction(data.total, data.highlight[group], ls);
0751 
0752   // fill modules plots
0753   for (unsigned int id : boost::irange(0ul, modules_.size()))
0754     modules_[id].fill(data.modules[id].total, ls);
0755 
0756   for (unsigned int pid : boost::irange(0ul, processes_.size()))
0757     processes_[pid].fill(job.processDescription(pid), data, data.processes[pid], ls);
0758 }
0759 
0760 void FastTimerService::PlotsPerJob::fill_run(AtomicResources const& data) {
0761   // fill run transition plots
0762   run_.fill(data, 0);
0763 }
0764 
0765 void FastTimerService::PlotsPerJob::fill_lumi(AtomicResources const& data, unsigned int ls) {
0766   // fill lumisection transition plots
0767   lumi_.fill(data, ls);
0768 }
0769 
0770 ///////////////////////////////////////////////////////////////////////////////
0771 
0772 FastTimerService::FastTimerService(const edm::ParameterSet& config, edm::ActivityRegistry& registry)
0773     :  // configuration
0774       callgraph_(),
0775       // job configuration
0776       concurrent_lumis_(0),
0777       concurrent_runs_(0),
0778       concurrent_streams_(0),
0779       concurrent_threads_(0),
0780       print_event_summary_(config.getUntrackedParameter<bool>("printEventSummary")),
0781       print_run_summary_(config.getUntrackedParameter<bool>("printRunSummary")),
0782       print_job_summary_(config.getUntrackedParameter<bool>("printJobSummary")),
0783       // JSON configuration
0784       //write_json_per_event_(config.getUntrackedParameter<bool>("writeJSONByEvent")),
0785       //write_json_per_ls_(config.getUntrackedParameter<bool>("writeJSONByLumiSection")),
0786       //write_json_per_run_(config.getUntrackedParameter<bool>("writeJSONByRun")),
0787       write_json_summary_(config.getUntrackedParameter<bool>("writeJSONSummary")),
0788       json_filename_(config.getUntrackedParameter<std::string>("jsonFileName")),
0789       // DQM configuration
0790       enable_dqm_(config.getUntrackedParameter<bool>("enableDQM")),
0791       enable_dqm_bymodule_(config.getUntrackedParameter<bool>("enableDQMbyModule")),
0792       enable_dqm_bypath_(config.getUntrackedParameter<bool>("enableDQMbyPath")),
0793       enable_dqm_byls_(config.getUntrackedParameter<bool>("enableDQMbyLumiSection")),
0794       enable_dqm_bynproc_(config.getUntrackedParameter<bool>("enableDQMbyProcesses")),
0795       enable_dqm_transitions_(config.getUntrackedParameter<bool>("enableDQMTransitions")),
0796       dqm_event_ranges_({config.getUntrackedParameter<double>("dqmTimeRange"),                  // ms
0797                          config.getUntrackedParameter<double>("dqmTimeResolution"),             // ms
0798                          config.getUntrackedParameter<double>("dqmMemoryRange"),                // kB
0799                          config.getUntrackedParameter<double>("dqmMemoryResolution")}),         // kB
0800       dqm_path_ranges_({config.getUntrackedParameter<double>("dqmPathTimeRange"),               // ms
0801                         config.getUntrackedParameter<double>("dqmPathTimeResolution"),          // ms
0802                         config.getUntrackedParameter<double>("dqmPathMemoryRange"),             // kB
0803                         config.getUntrackedParameter<double>("dqmPathMemoryResolution")}),      // kB
0804       dqm_module_ranges_({config.getUntrackedParameter<double>("dqmModuleTimeRange"),           // ms
0805                           config.getUntrackedParameter<double>("dqmModuleTimeResolution"),      // ms
0806                           config.getUntrackedParameter<double>("dqmModuleMemoryRange"),         // kB
0807                           config.getUntrackedParameter<double>("dqmModuleMemoryResolution")}),  // kB
0808       dqm_lumisections_range_(config.getUntrackedParameter<unsigned int>("dqmLumiSectionsRange")),
0809       dqm_path_(config.getUntrackedParameter<std::string>("dqmPath")),
0810       // highlight configuration
0811       highlight_module_psets_(config.getUntrackedParameter<std::vector<edm::ParameterSet>>("highlightModules")),
0812       highlight_modules_(highlight_module_psets_.size())  // filled in postBeginJob()
0813 {
0814   // start observing when a thread enters or leaves the TBB global thread arena
0815   tbb::task_scheduler_observer::observe();
0816 
0817   // register EDM call backs
0818   registry.watchPreallocate(this, &FastTimerService::preallocate);
0819   registry.watchPreBeginJob(this, &FastTimerService::preBeginJob);
0820   registry.watchPostBeginJob(this, &FastTimerService::postBeginJob);
0821   registry.watchPostEndJob(this, &FastTimerService::postEndJob);
0822   registry.watchPreGlobalBeginRun(this, &FastTimerService::preGlobalBeginRun);
0823   //registry.watchPostGlobalBeginRun(         this, & FastTimerService::postGlobalBeginRun );
0824   //registry.watchPreGlobalEndRun(            this, & FastTimerService::preGlobalEndRun );
0825   registry.watchPostGlobalEndRun(this, &FastTimerService::postGlobalEndRun);
0826   registry.watchPreStreamBeginRun(this, &FastTimerService::preStreamBeginRun);
0827   //registry.watchPostStreamBeginRun(         this, & FastTimerService::postStreamBeginRun );
0828   //registry.watchPreStreamEndRun(            this, & FastTimerService::preStreamEndRun );
0829   registry.watchPostStreamEndRun(this, &FastTimerService::postStreamEndRun);
0830   registry.watchPreGlobalBeginLumi(this, &FastTimerService::preGlobalBeginLumi);
0831   //registry.watchPostGlobalBeginLumi(        this, & FastTimerService::postGlobalBeginLumi );
0832   //registry.watchPreGlobalEndLumi(           this, & FastTimerService::preGlobalEndLumi );
0833   registry.watchPostGlobalEndLumi(this, &FastTimerService::postGlobalEndLumi);
0834   registry.watchPreStreamBeginLumi(this, &FastTimerService::preStreamBeginLumi);
0835   //registry.watchPostStreamBeginLumi(        this, & FastTimerService::postStreamBeginLumi );
0836   //registry.watchPreStreamEndLumi(           this, & FastTimerService::preStreamEndLumi );
0837   registry.watchPostStreamEndLumi(this, &FastTimerService::postStreamEndLumi);
0838   registry.watchPreEvent(this, &FastTimerService::preEvent);
0839   registry.watchPostEvent(this, &FastTimerService::postEvent);
0840   registry.watchPrePathEvent(this, &FastTimerService::prePathEvent);
0841   registry.watchPostPathEvent(this, &FastTimerService::postPathEvent);
0842   registry.watchPreSourceConstruction(this, &FastTimerService::preSourceConstruction);
0843   //registry.watchPostSourceConstruction(     this, & FastTimerService::postSourceConstruction);
0844   registry.watchPreSourceRun(this, &FastTimerService::preSourceRun);
0845   registry.watchPostSourceRun(this, &FastTimerService::postSourceRun);
0846   registry.watchPreSourceLumi(this, &FastTimerService::preSourceLumi);
0847   registry.watchPostSourceLumi(this, &FastTimerService::postSourceLumi);
0848   registry.watchPreSourceEvent(this, &FastTimerService::preSourceEvent);
0849   registry.watchPostSourceEvent(this, &FastTimerService::postSourceEvent);
0850   //registry.watchPreModuleConstruction(      this, & FastTimerService::preModuleConstruction);
0851   //registry.watchPostModuleConstruction(     this, & FastTimerService::postModuleConstruction);
0852   //registry.watchPreModuleBeginJob(          this, & FastTimerService::preModuleBeginJob );
0853   //registry.watchPostModuleBeginJob(         this, & FastTimerService::postModuleBeginJob );
0854   //registry.watchPreModuleEndJob(            this, & FastTimerService::preModuleEndJob );
0855   //registry.watchPostModuleEndJob(           this, & FastTimerService::postModuleEndJob );
0856   //registry.watchPreModuleBeginStream(       this, & FastTimerService::preModuleBeginStream );
0857   //registry.watchPostModuleBeginStream(      this, & FastTimerService::postModuleBeginStream );
0858   //registry.watchPreModuleEndStream(         this, & FastTimerService::preModuleEndStream );
0859   //registry.watchPostModuleEndStream(        this, & FastTimerService::postModuleEndStream );
0860   registry.watchPreModuleGlobalBeginRun(this, &FastTimerService::preModuleGlobalBeginRun);
0861   registry.watchPostModuleGlobalBeginRun(this, &FastTimerService::postModuleGlobalBeginRun);
0862   registry.watchPreModuleGlobalEndRun(this, &FastTimerService::preModuleGlobalEndRun);
0863   registry.watchPostModuleGlobalEndRun(this, &FastTimerService::postModuleGlobalEndRun);
0864   registry.watchPreModuleGlobalBeginLumi(this, &FastTimerService::preModuleGlobalBeginLumi);
0865   registry.watchPostModuleGlobalBeginLumi(this, &FastTimerService::postModuleGlobalBeginLumi);
0866   registry.watchPreModuleGlobalEndLumi(this, &FastTimerService::preModuleGlobalEndLumi);
0867   registry.watchPostModuleGlobalEndLumi(this, &FastTimerService::postModuleGlobalEndLumi);
0868   registry.watchPreModuleStreamBeginRun(this, &FastTimerService::preModuleStreamBeginRun);
0869   registry.watchPostModuleStreamBeginRun(this, &FastTimerService::postModuleStreamBeginRun);
0870   registry.watchPreModuleStreamEndRun(this, &FastTimerService::preModuleStreamEndRun);
0871   registry.watchPostModuleStreamEndRun(this, &FastTimerService::postModuleStreamEndRun);
0872   registry.watchPreModuleStreamBeginLumi(this, &FastTimerService::preModuleStreamBeginLumi);
0873   registry.watchPostModuleStreamBeginLumi(this, &FastTimerService::postModuleStreamBeginLumi);
0874   registry.watchPreModuleStreamEndLumi(this, &FastTimerService::preModuleStreamEndLumi);
0875   registry.watchPostModuleStreamEndLumi(this, &FastTimerService::postModuleStreamEndLumi);
0876   //registry.watchPreModuleEventPrefetching(  this, & FastTimerService::preModuleEventPrefetching );
0877   //registry.watchPostModuleEventPrefetching( this, & FastTimerService::postModuleEventPrefetching );
0878   registry.watchPreModuleEventAcquire(this, &FastTimerService::preModuleEventAcquire);
0879   registry.watchPostModuleEventAcquire(this, &FastTimerService::postModuleEventAcquire);
0880   registry.watchPreModuleEvent(this, &FastTimerService::preModuleEvent);
0881   registry.watchPostModuleEvent(this, &FastTimerService::postModuleEvent);
0882   registry.watchPreModuleEventDelayedGet(this, &FastTimerService::preModuleEventDelayedGet);
0883   registry.watchPostModuleEventDelayedGet(this, &FastTimerService::postModuleEventDelayedGet);
0884   registry.watchPreEventReadFromSource(this, &FastTimerService::preEventReadFromSource);
0885   registry.watchPostEventReadFromSource(this, &FastTimerService::postEventReadFromSource);
0886   registry.watchPreESModule(this, &FastTimerService::preESModule);
0887   registry.watchPostESModule(this, &FastTimerService::postESModule);
0888 }
0889 
0890 void FastTimerService::ignoredSignal(const std::string& signal) const {
0891   LogDebug("FastTimerService") << "The FastTimerService received is currently not monitoring the signal \"" << signal
0892                                << "\".\n";
0893 }
0894 
0895 void FastTimerService::unsupportedSignal(const std::string& signal) const {
0896   // warn about each signal only once per job
0897   if (unsupported_signals_.insert(signal).second)
0898     edm::LogWarning("FastTimerService") << "The FastTimerService received the unsupported signal \"" << signal
0899                                         << "\".\n"
0900                                         << "Please report how to reproduce the issue to cms-hlt@cern.ch .";
0901 }
0902 
0903 void FastTimerService::preGlobalBeginRun(edm::GlobalContext const& gc) {
0904   ignoredSignal(__func__);
0905 
0906   // reset the run counters only during the main process being run
0907   if (isFirstSubprocess(gc)) {
0908     auto index = gc.runIndex();
0909     subprocess_global_run_check_[index] = 0;
0910     run_transition_[index].reset();
0911     run_summary_[index].reset();
0912 
0913     // book the DQM plots
0914     if (enable_dqm_) {
0915       // define a callback to book the MonitorElements
0916       auto bookTransactionCallback = [&, this](dqm::reco::DQMStore::IBooker& booker, dqm::reco::DQMStore::IGetter&) {
0917         auto scope = dqm::reco::DQMStore::IBooker::UseRunScope(booker);
0918         // we should really do this, but only DQMStore is allowed to touch it
0919         // We could move to postGlobalBeginRun, then the DQMStore has sure set it up.
0920         //booker.setRunLumi(gc.luminosityBlockID());
0921         booker.setCurrentFolder(dqm_path_);
0922         plots_->book(booker,
0923                      callgraph_,
0924                      highlight_modules_,
0925                      dqm_event_ranges_,
0926                      dqm_path_ranges_,
0927                      dqm_module_ranges_,
0928                      dqm_lumisections_range_,
0929                      enable_dqm_bymodule_,
0930                      enable_dqm_bypath_,
0931                      enable_dqm_byls_,
0932                      enable_dqm_transitions_);
0933       };
0934 
0935       // book MonitorElements for this stream
0936       edm::Service<dqm::legacy::DQMStore>()->meBookerGetter(bookTransactionCallback);
0937     }
0938   }
0939 }
0940 
0941 void FastTimerService::postGlobalBeginRun(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
0942 
0943 void FastTimerService::preStreamBeginRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
0944 
0945 void FastTimerService::preallocate(edm::service::SystemBounds const& bounds) {
0946   concurrent_lumis_ = bounds.maxNumberOfConcurrentLuminosityBlocks();
0947   concurrent_runs_ = bounds.maxNumberOfConcurrentRuns();
0948   concurrent_streams_ = bounds.maxNumberOfStreams();
0949   concurrent_threads_ = bounds.maxNumberOfThreads();
0950 
0951   if (enable_dqm_bynproc_)
0952     dqm_path_ += fmt::sprintf(
0953         "/Running on %s with %d streams on %d threads", processor_model, concurrent_streams_, concurrent_threads_);
0954 
0955   // clean characters that are deemed unsafe for DQM
0956   // see the definition of `s_safe` in DQMServices/Core/src/DQMStore.cc
0957   auto safe_for_dqm = "/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+=_()# "s;
0958   for (auto& c : dqm_path_)
0959     if (safe_for_dqm.find(c) == std::string::npos)
0960       c = '_';
0961 
0962   // allocate atomic variables to keep track of the completion of each step, process by process
0963   subprocess_event_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_streams_);
0964   for (unsigned int i = 0; i < concurrent_streams_; ++i)
0965     subprocess_event_check_[i] = 0;
0966   subprocess_global_run_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_runs_);
0967   for (unsigned int i = 0; i < concurrent_runs_; ++i)
0968     subprocess_global_run_check_[i] = 0;
0969   subprocess_global_lumi_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_lumis_);
0970   for (unsigned int i = 0; i < concurrent_lumis_; ++i)
0971     subprocess_global_lumi_check_[i] = 0;
0972 
0973   // allocate buffers to keep track of the resources spent in the lumi and run transitions
0974   lumi_transition_.resize(concurrent_lumis_);
0975   run_transition_.resize(concurrent_runs_);
0976 }
0977 
0978 void FastTimerService::preSourceConstruction(edm::ModuleDescription const& module) {
0979   callgraph_.preSourceConstruction(module);
0980 }
0981 
0982 void FastTimerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
0983                                    edm::ProcessContext const& context) {
0984   callgraph_.preBeginJob(pathsAndConsumes, context);
0985 }
0986 
0987 void FastTimerService::postBeginJob() {
0988   unsigned int modules = callgraph_.size();
0989 
0990   // module highlights
0991   for (unsigned int group : boost::irange(0ul, highlight_module_psets_.size())) {
0992     // copy and sort for faster search via std::binary_search
0993     auto labels = highlight_module_psets_[group].getUntrackedParameter<std::vector<std::string>>("modules");
0994     std::sort(labels.begin(), labels.end());
0995 
0996     highlight_modules_[group].label = highlight_module_psets_[group].getUntrackedParameter<std::string>("label");
0997     highlight_modules_[group].modules.reserve(labels.size());
0998     // convert the module labels in module ids
0999     for (unsigned int i = 0; i < modules; ++i) {
1000       auto const& label = callgraph_.module(i).moduleLabel();
1001       if (std::binary_search(labels.begin(), labels.end(), label))
1002         highlight_modules_[group].modules.push_back(i);
1003     }
1004   }
1005   highlight_module_psets_.clear();
1006 
1007   // allocate the resource counters for each stream, process, path and module
1008   ResourcesPerJob temp(callgraph_, highlight_modules_);
1009   streams_.resize(concurrent_streams_, temp);
1010   run_summary_.resize(concurrent_runs_, temp);
1011   job_summary_ = temp;
1012 
1013   // check that the DQMStore service is available
1014   if (enable_dqm_ and not edm::Service<dqm::legacy::DQMStore>().isAvailable()) {
1015     // the DQMStore is not available, disable all DQM plots
1016     enable_dqm_ = false;
1017     edm::LogWarning("FastTimerService") << "The DQMStore is not avalable, the DQM plots will not be generated";
1018   }
1019 
1020   // allocate the structures to hold pointers to the DQM plots
1021   if (enable_dqm_) {
1022     plots_ = std::make_unique<PlotsPerJob>(callgraph_, highlight_modules_);
1023   }
1024 }
1025 
1026 void FastTimerService::postStreamBeginRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1027 
1028 void FastTimerService::preStreamEndRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1029 
1030 void FastTimerService::postStreamEndRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1031 
1032 void FastTimerService::preGlobalBeginLumi(edm::GlobalContext const& gc) {
1033   ignoredSignal(__func__);
1034 
1035   // reset the lumi counters only during the main process being run
1036   if (isFirstSubprocess(gc)) {
1037     auto index = gc.luminosityBlockIndex();
1038     subprocess_global_lumi_check_[index] = 0;
1039     lumi_transition_[index].reset();
1040   }
1041 }
1042 
1043 void FastTimerService::postGlobalBeginLumi(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
1044 
1045 void FastTimerService::preGlobalEndLumi(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
1046 
1047 void FastTimerService::postGlobalEndLumi(edm::GlobalContext const& gc) {
1048   ignoredSignal(__func__);
1049 
1050   // handle the summaries only after the last subprocess has run
1051   auto index = gc.luminosityBlockIndex();
1052   bool last = isLastSubprocess(subprocess_global_lumi_check_[index]);
1053   if (not last)
1054     return;
1055 
1056   edm::LogVerbatim out("FastReport");
1057   auto const& label =
1058       fmt::sprintf("run %d, lumisection %d", gc.luminosityBlockID().run(), gc.luminosityBlockID().luminosityBlock());
1059   printTransition(out, lumi_transition_[index], label);
1060 
1061   if (enable_dqm_transitions_) {
1062     plots_->fill_lumi(lumi_transition_[index], gc.luminosityBlockID().luminosityBlock());
1063   }
1064 }
1065 
1066 void FastTimerService::preStreamBeginLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1067 
1068 void FastTimerService::postStreamBeginLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1069 
1070 void FastTimerService::preStreamEndLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1071 
1072 void FastTimerService::postStreamEndLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1073 
1074 void FastTimerService::preGlobalEndRun(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
1075 
1076 void FastTimerService::postGlobalEndRun(edm::GlobalContext const& gc) {
1077   ignoredSignal(__func__);
1078 
1079   // handle the summaries only after the last subprocess has run
1080   auto index = gc.runIndex();
1081   bool last = isLastSubprocess(subprocess_global_run_check_[index]);
1082   if (not last)
1083     return;
1084 
1085   edm::LogVerbatim out("FastReport");
1086   auto const& label = fmt::sprintf("Run %d", gc.luminosityBlockID().run());
1087   if (print_run_summary_) {
1088     printSummary(out, run_summary_[index], label);
1089   }
1090   printTransition(out, run_transition_[index], label);
1091 
1092   if (enable_dqm_transitions_) {
1093     plots_->fill_run(run_transition_[index]);
1094   }
1095 }
1096 
1097 void FastTimerService::preSourceRun(edm::RunIndex index) { thread().measure_and_accumulate(overhead_); }
1098 
1099 void FastTimerService::postSourceRun(edm::RunIndex index) { thread().measure_and_accumulate(run_transition_[index]); }
1100 
1101 void FastTimerService::preSourceLumi(edm::LuminosityBlockIndex index) { thread().measure_and_accumulate(overhead_); }
1102 
1103 void FastTimerService::postSourceLumi(edm::LuminosityBlockIndex index) {
1104   thread().measure_and_accumulate(lumi_transition_[index]);
1105 }
1106 
1107 void FastTimerService::postEndJob() {
1108   // stop observing to avoid potential race conditions at exit
1109   tbb::task_scheduler_observer::observe(false);
1110   guard_.finalize();
1111   if (print_job_summary_) {
1112     edm::LogVerbatim out("FastReport");
1113     printSummary(out, job_summary_, "Job");
1114   }
1115   if (write_json_summary_) {
1116     writeSummaryJSON(job_summary_, json_filename_);
1117   }
1118 }
1119 
1120 template <typename T>
1121 void FastTimerService::printHeader(T& out, std::string const& label) const {
1122   out << "FastReport ";
1123   if (label.size() < 60)
1124     for (unsigned int i = (60 - label.size()) / 2; i > 0; --i)
1125       out << '-';
1126   out << ' ' << label << " Summary ";
1127   if (label.size() < 60)
1128     for (unsigned int i = (59 - label.size()) / 2; i > 0; --i)
1129       out << '-';
1130   out << '\n';
1131 }
1132 
1133 template <typename T>
1134 void FastTimerService::printEventHeader(T& out, std::string const& label) const {
1135   out << "FastReport       CPU time      Real time      Allocated    Deallocated  " << label << "\n";
1136   //      FastReport  ########.# ms  ########.# ms  +######### kB  -######### kB  ...
1137 }
1138 
1139 template <typename T>
1140 void FastTimerService::printEventLine(T& out, Resources const& data, std::string const& label) const {
1141   out << fmt::sprintf("FastReport  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %s\n",
1142                       ms(data.time_thread),
1143                       ms(data.time_real),
1144                       +static_cast<int64_t>(kB(data.allocated)),
1145                       -static_cast<int64_t>(kB(data.deallocated)),
1146                       label);
1147 }
1148 
1149 template <typename T>
1150 void FastTimerService::printEventLine(T& out, AtomicResources const& data, std::string const& label) const {
1151   out << fmt::sprintf("FastReport  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %s\n",
1152                       ms(boost::chrono::nanoseconds(data.time_thread.load())),
1153                       ms(boost::chrono::nanoseconds(data.time_real.load())),
1154                       +static_cast<int64_t>(kB(data.allocated)),
1155                       -static_cast<int64_t>(kB(data.deallocated)),
1156                       label);
1157 }
1158 
1159 template <typename T>
1160 void FastTimerService::printEvent(T& out, ResourcesPerJob const& data) const {
1161   printHeader(out, "Event");
1162   printEventHeader(out, "Modules");
1163   auto const& source_d = callgraph_.source();
1164   auto const& source = data.modules[source_d.id()];
1165   printEventLine(out, source.total, source_d.moduleLabel());
1166   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1167     auto const& proc_d = callgraph_.processDescription(i);
1168     auto const& proc = data.processes[i];
1169     printEventLine(out, proc.total, "process " + proc_d.name_);
1170     for (unsigned int m : proc_d.modules_) {
1171       auto const& module_d = callgraph_.module(m);
1172       auto const& module = data.modules[m];
1173       printEventLine(out, module.total, "  " + module_d.moduleLabel());
1174     }
1175   }
1176   printEventLine(out, data.total, "total");
1177   out << '\n';
1178   printEventHeader(out, "Processes and Paths");
1179   printEventLine(out, source.total, source_d.moduleLabel());
1180   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1181     auto const& proc_d = callgraph_.processDescription(i);
1182     auto const& proc = data.processes[i];
1183     printEventLine(out, proc.total, "process " + proc_d.name_);
1184     for (unsigned int p = 0; p < proc.paths.size(); ++p) {
1185       auto const& name = proc_d.paths_[p].name_;
1186       auto const& path = proc.paths[p];
1187       printEventLine(out, path.active, name + " (only scheduled modules)");
1188       printEventLine(out, path.total, name + " (including dependencies)");
1189     }
1190     for (unsigned int p = 0; p < proc.endpaths.size(); ++p) {
1191       auto const& name = proc_d.endPaths_[p].name_;
1192       auto const& path = proc.endpaths[p];
1193       printEventLine(out, path.active, name + " (only scheduled modules)");
1194       printEventLine(out, path.total, name + " (including dependencies)");
1195     }
1196   }
1197   printEventLine(out, data.total, "total");
1198   out << '\n';
1199   for (unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1200     printEventHeader(out, "Highlighted modules");
1201     for (unsigned int m : highlight_modules_[group].modules) {
1202       auto const& module_d = callgraph_.module(m);
1203       auto const& module = data.modules[m];
1204       printEventLine(out, module.total, "  " + module_d.moduleLabel());
1205     }
1206     printEventLine(out, data.highlight[group], highlight_modules_[group].label);
1207     out << '\n';
1208   }
1209 }
1210 
1211 template <typename T>
1212 void FastTimerService::printSummaryHeader(T& out, std::string const& label, bool detailed) const {
1213   // clang-format off
1214   if (detailed)
1215     out << "FastReport   CPU time avg.      when run  Real time avg.      when run     Alloc. avg.      when run   Dealloc. avg.      when run  ";
1216   //        FastReport  ########.# ms  ########.# ms  ########.# ms  ########.# ms  +######### kB  +######### kB  -######### kB  -######### kB  ...
1217   else
1218     out << "FastReport   CPU time avg.                Real time avg.                   Alloc. avg.                 Dealloc. avg.                ";
1219   //        FastReport  ########.# ms                 ########.# ms                 +######### kB                 -######### kB                 ...
1220   out << label << '\n';
1221   // clang-format on
1222 }
1223 
1224 template <typename T>
1225 void FastTimerService::printPathSummaryHeader(T& out, std::string const& label) const {
1226   // clang-format off
1227   out << "FastReport     CPU time sched. / depend.    Real time sched. / depend.       Alloc. sched. / depend.     Dealloc. sched. / depend.  ";
1228   //      FastReport  ########.# ms  ########.# ms  ########.# ms  ########.# ms  +######### kB  +######### kB  -######### kB  -######### kB  ...
1229   out << label << '\n';
1230   // clang-format on
1231 }
1232 
1233 template <typename T>
1234 void FastTimerService::printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const {
1235   out << fmt::sprintf(
1236       // clang-format off
1237       "FastReport  %10.1f ms                 %10.1f ms                 %+10d kB                 %+10d kB                 %s\n",
1238       // clang-format on
1239       (events ? ms(data.time_thread) / events : 0),
1240       (events ? ms(data.time_real) / events : 0),
1241       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1242       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1243       label);
1244 }
1245 
1246 template <typename T>
1247 void FastTimerService::printSummaryLine(
1248     T& out, AtomicResources const& data, uint64_t events, uint64_t active, std::string const& label) const {
1249   out << fmt::sprintf(
1250       // clang-format off
1251       "FastReport  %10.1f ms  %10.1f ms  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %+10d kB  %+10d kB  %s\n",
1252       // clang-format on
1253       (events ? ms(data.time_thread) / events : 0),
1254       (active ? ms(data.time_thread) / active : 0),
1255       (events ? ms(data.time_real) / events : 0),
1256       (active ? ms(data.time_real) / active : 0),
1257       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1258       (active ? +static_cast<int64_t>(kB(data.allocated) / active) : 0),
1259       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1260       (active ? -static_cast<int64_t>(kB(data.deallocated) / active) : 0),
1261       label);
1262 }
1263 
1264 template <typename T>
1265 void FastTimerService::printSummaryLine(T& out,
1266                                         AtomicResources const& data,
1267                                         uint64_t events,
1268                                         std::string const& label) const {
1269   out << fmt::sprintf(
1270       // clang-format off
1271       "FastReport  %10.1f ms                 %10.1f ms                 %+10d kB                 %+10d kB                 %s\n",
1272       // clang-format on
1273       (events ? ms(data.time_thread) / events : 0),
1274       (events ? ms(data.time_real) / events : 0),
1275       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1276       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1277       label);
1278 }
1279 
1280 template <typename T>
1281 void FastTimerService::printSummaryLine(
1282     T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const {
1283   out << fmt::sprintf(
1284       "FastReport  %10.1f ms  %10.1f ms  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %+10d kB  %+10d kB  %s\n",
1285       (events ? ms(data.time_thread) / events : 0),
1286       (active ? ms(data.time_thread) / active : 0),
1287       (events ? ms(data.time_real) / events : 0),
1288       (active ? ms(data.time_real) / active : 0),
1289       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1290       (active ? +static_cast<int64_t>(kB(data.allocated) / active) : 0),
1291       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1292       (active ? -static_cast<int64_t>(kB(data.deallocated) / active) : 0),
1293       label);
1294 }
1295 
1296 template <typename T>
1297 void FastTimerService::printPathSummaryLine(
1298     T& out, Resources const& data, Resources const& total, uint64_t events, std::string const& label) const {
1299   out << fmt::sprintf(
1300       "FastReport  %10.1f ms  %10.1f ms  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %+10d kB  %+10d kB  %s\n",
1301       (events ? ms(data.time_thread) / events : 0),
1302       (events ? ms(total.time_thread) / events : 0),
1303       (events ? ms(data.time_real) / events : 0),
1304       (events ? ms(total.time_real) / events : 0),
1305       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1306       (events ? +static_cast<int64_t>(kB(total.allocated) / events) : 0),
1307       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1308       (events ? -static_cast<int64_t>(kB(total.deallocated) / events) : 0),
1309       label);
1310 }
1311 
1312 template <typename T>
1313 void FastTimerService::printSummary(T& out, ResourcesPerJob const& data, std::string const& label) const {
1314   printHeader(out, label);
1315   printSummaryHeader(out, "Modules", true);
1316   auto const& source_d = callgraph_.source();
1317   auto const& source = data.modules[source_d.id()];
1318   printSummaryLine(out, source.total, data.events, source.events, source_d.moduleLabel());
1319   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1320     auto const& proc_d = callgraph_.processDescription(i);
1321     auto const& proc = data.processes[i];
1322     printSummaryLine(out, proc.total, data.events, "process " + proc_d.name_);
1323     for (unsigned int m : proc_d.modules_) {
1324       auto const& module_d = callgraph_.module(m);
1325       auto const& module = data.modules[m];
1326       printSummaryLine(out, module.total, data.events, module.events, "  " + module_d.moduleLabel());
1327     }
1328   }
1329   printSummaryLine(out, data.total, data.events, "total");
1330   printSummaryLine(out, data.overhead, data.events, "other");
1331   printSummaryLine(out, data.eventsetup, data.events, "eventsetup");
1332   out << '\n';
1333   printPathSummaryHeader(out, "Processes and Paths");
1334   printSummaryLine(out, source.total, data.events, source_d.moduleLabel());
1335   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1336     auto const& proc_d = callgraph_.processDescription(i);
1337     auto const& proc = data.processes[i];
1338     printSummaryLine(out, proc.total, data.events, "process " + proc_d.name_);
1339     for (unsigned int p = 0; p < proc.paths.size(); ++p) {
1340       auto const& name = proc_d.paths_[p].name_;
1341       auto const& path = proc.paths[p];
1342       printPathSummaryLine(out, path.active, path.total, data.events, "  " + name);
1343     }
1344     for (unsigned int p = 0; p < proc.endpaths.size(); ++p) {
1345       auto const& name = proc_d.endPaths_[p].name_;
1346       auto const& path = proc.endpaths[p];
1347       printPathSummaryLine(out, path.active, path.total, data.events, "  " + name);
1348     }
1349   }
1350   printSummaryLine(out, data.total, data.events, "total");
1351   printSummaryLine(out, data.overhead, data.events, "other");
1352   printSummaryLine(out, data.eventsetup, data.events, "eventsetup");
1353   out << '\n';
1354   for (unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1355     printSummaryHeader(out, "Highlighted modules", true);
1356     for (unsigned int m : highlight_modules_[group].modules) {
1357       auto const& module_d = callgraph_.module(m);
1358       auto const& module = data.modules[m];
1359       printSummaryLine(out, module.total, data.events, module.events, module_d.moduleLabel());
1360     }
1361     printSummaryLine(out, data.highlight[group], data.events, highlight_modules_[group].label);
1362     out << '\n';
1363   }
1364 }
1365 
1366 template <typename T>
1367 void FastTimerService::printTransition(T& out, AtomicResources const& data, std::string const& label) const {
1368   printEventHeader(out, "Transition");
1369   printEventLine(out, data, label);
1370 }
1371 
1372 template <typename T>
1373 json FastTimerService::encodeToJSON(std::string const& type,
1374                                     std::string const& label,
1375                                     unsigned int events,
1376                                     T const& data) const {
1377   return json{{"type", type},
1378               {"label", label},
1379               {"events", events},
1380               {"time_thread", ms(data.time_thread)},
1381               {"time_real", ms(data.time_real)},
1382               {"mem_alloc", kB(data.allocated)},
1383               {"mem_free", kB(data.deallocated)}};
1384 }
1385 
1386 json FastTimerService::encodeToJSON(edm::ModuleDescription const& module, ResourcesPerModule const& data) const {
1387   return encodeToJSON(module.moduleName(), module.moduleLabel(), data.events, data.total);
1388 }
1389 
1390 void FastTimerService::writeSummaryJSON(ResourcesPerJob const& data, std::string const& filename) const {
1391   json j;
1392 
1393   // write a description of the resources
1394   j["resources"] = json::array({json{{"time_real", "real time"}},
1395                                 json{{"time_thread", "cpu time"}},
1396                                 json{{"mem_alloc", "allocated memory"}},
1397                                 json{{"mem_free", "deallocated memory"}}});
1398 
1399   // write the resources used by the job
1400   j["total"] = encodeToJSON(
1401       "Job", callgraph_.processDescription(0).name_, data.events, data.total + data.overhead + data.eventsetup);
1402 
1403   // write the resources used by every module
1404   j["modules"] = json::array();
1405   for (unsigned int i = 0; i < callgraph_.size(); ++i) {
1406     auto const& module = callgraph_.module(i);
1407     auto const& data_m = data.modules[i];
1408     j["modules"].push_back(encodeToJSON(module, data_m));
1409   }
1410 
1411   // add an entry for the "overhead"
1412   j["modules"].push_back(encodeToJSON("other", "other", data.events, data.overhead));
1413   j["modules"].push_back(encodeToJSON("eventsetup", "eventsetup", data.events, data.eventsetup));
1414 
1415   std::ofstream out(filename);
1416   out << std::setw(2) << j << std::flush;
1417 }
1418 
1419 // check if this is the first process being signalled
1420 bool FastTimerService::isFirstSubprocess(edm::StreamContext const& sc) {
1421   return (not sc.processContext()->isSubProcess());
1422 }
1423 
1424 bool FastTimerService::isFirstSubprocess(edm::GlobalContext const& gc) {
1425   return (not gc.processContext()->isSubProcess());
1426 }
1427 
1428 // check if this is the last process being signalled
1429 bool FastTimerService::isLastSubprocess(std::atomic<unsigned int>& check) {
1430   // release-acquire semantic guarantees that all writes in this and other threads are visible
1431   // after this operation; full sequentially-consistent ordering is (probably) not needed.
1432   unsigned int old_value = check.fetch_add(1, std::memory_order_acq_rel);
1433   return (old_value == callgraph_.processes().size() - 1);
1434 }
1435 
1436 void FastTimerService::preEvent(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1437 
1438 void FastTimerService::postEvent(edm::StreamContext const& sc) {
1439   ignoredSignal(__func__);
1440 
1441   unsigned int pid = callgraph_.processId(*sc.processContext());
1442   unsigned int sid = sc.streamID();
1443   auto& stream = streams_[sid];
1444   auto& process = callgraph_.processDescription(pid);
1445 
1446   // measure the event resources as the sum of all modules' resources
1447   auto& data = stream.processes[pid].total;
1448   for (unsigned int id : process.modules_)
1449     data += stream.modules[id].total;
1450   stream.total += data;
1451 
1452   // handle the summaries and fill the plots only after the last subprocess has run
1453   bool last = isLastSubprocess(subprocess_event_check_[sid]);
1454   if (not last)
1455     return;
1456 
1457   // measure the event resources explicitly
1458   stream.event_measurement.measure_and_store(stream.event);
1459 
1460   // add to the event resources those used by source (which is not part of any process)
1461   unsigned int id = 0;
1462   stream.total += stream.modules[id].total;
1463 
1464   // highlighted modules
1465   for (unsigned int group : boost::irange(0ul, highlight_modules_.size()))
1466     for (unsigned int i : highlight_modules_[group].modules)
1467       stream.highlight[group] += stream.modules[i].total;
1468 
1469   // avoid concurrent access to the summary objects
1470   {
1471     std::lock_guard<std::mutex> guard(summary_mutex_);
1472     job_summary_ += stream;
1473     run_summary_[sc.runIndex()] += stream;
1474   }
1475 
1476   if (print_event_summary_) {
1477     edm::LogVerbatim out("FastReport");
1478     printEvent(out, stream);
1479   }
1480 
1481   if (enable_dqm_) {
1482     plots_->fill(callgraph_, stream, sc.eventID().luminosityBlock());
1483   }
1484 }
1485 
1486 void FastTimerService::preSourceEvent(edm::StreamID sid) {
1487   // clear the event counters
1488   auto& stream = streams_[sid];
1489   stream.reset();
1490   ++stream.events;
1491 
1492   subprocess_event_check_[sid] = 0;
1493 
1494   // reuse the same measurement for the Source module and for the explicit begin of the Event
1495   auto& measurement = thread();
1496   measurement.measure_and_accumulate(stream.overhead);
1497   stream.event_measurement = measurement;
1498 }
1499 
1500 void FastTimerService::postSourceEvent(edm::StreamID sid) {
1501   edm::ModuleDescription const& md = callgraph_.source();
1502   unsigned int id = md.id();
1503   auto& stream = streams_[sid];
1504   auto& module = stream.modules[id];
1505 
1506   thread().measure_and_store(module.total);
1507   ++stream.modules[id].events;
1508 }
1509 
1510 void FastTimerService::prePathEvent(edm::StreamContext const& sc, edm::PathContext const& pc) {
1511   unsigned int sid = sc.streamID().value();
1512   unsigned int pid = callgraph_.processId(*sc.processContext());
1513   unsigned int id = pc.pathID();
1514   auto& stream = streams_[sid];
1515   auto& data = pc.isEndPath() ? stream.processes[pid].endpaths[id] : stream.processes[pid].paths[id];
1516   data.status = false;
1517   data.last = 0;
1518 }
1519 
1520 void FastTimerService::postPathEvent(edm::StreamContext const& sc,
1521                                      edm::PathContext const& pc,
1522                                      edm::HLTPathStatus const& status) {
1523   unsigned int sid = sc.streamID().value();
1524   unsigned int pid = callgraph_.processId(*sc.processContext());
1525   unsigned int id = pc.pathID();
1526   auto& stream = streams_[sid];
1527   auto& data = pc.isEndPath() ? stream.processes[pid].endpaths[id] : stream.processes[pid].paths[id];
1528 
1529   auto const& path =
1530       pc.isEndPath() ? callgraph_.processDescription(pid).endPaths_[id] : callgraph_.processDescription(pid).paths_[id];
1531   unsigned int index = path.modules_on_path_.empty() ? 0 : status.index() + 1;
1532   data.last = path.modules_on_path_.empty() ? 0 : path.last_dependency_of_module_[status.index()];
1533 
1534   for (unsigned int i = 0; i < index; ++i) {
1535     auto const& module = stream.modules[path.modules_on_path_[i]];
1536     data.active += module.total;
1537   }
1538   for (unsigned int i = 0; i < data.last; ++i) {
1539     auto const& module = stream.modules[path.modules_and_dependencies_[i]];
1540     data.total += module.total;
1541   }
1542 }
1543 
1544 void FastTimerService::preModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1545   unsigned int sid = sc.streamID().value();
1546   auto& stream = streams_[sid];
1547   thread().measure_and_accumulate(stream.overhead);
1548 }
1549 
1550 void FastTimerService::postModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1551   edm::ModuleDescription const& md = *mcc.moduleDescription();
1552   unsigned int id = md.id();
1553   unsigned int sid = sc.streamID().value();
1554   auto& stream = streams_[sid];
1555   auto& module = stream.modules[id];
1556 
1557   module.has_acquire = true;
1558   thread().measure_and_store(module.total);
1559 }
1560 
1561 void FastTimerService::preModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1562   unsigned int sid = sc.streamID().value();
1563   auto& stream = streams_[sid];
1564   thread().measure_and_accumulate(stream.overhead);
1565 }
1566 
1567 void FastTimerService::postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1568   edm::ModuleDescription const& md = *mcc.moduleDescription();
1569   unsigned int id = md.id();
1570   unsigned int sid = sc.streamID().value();
1571   auto& stream = streams_[sid];
1572   auto& module = stream.modules[id];
1573 
1574   if (module.has_acquire) {
1575     thread().measure_and_accumulate(module.total);
1576   } else {
1577     thread().measure_and_store(module.total);
1578   }
1579   ++module.events;
1580 }
1581 
1582 void FastTimerService::preModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1583   unsupportedSignal(__func__);
1584 }
1585 
1586 void FastTimerService::postModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1587   unsupportedSignal(__func__);
1588 }
1589 
1590 void FastTimerService::preModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1591   ignoredSignal(__func__);
1592 }
1593 
1594 void FastTimerService::postModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1595   ignoredSignal(__func__);
1596 }
1597 
1598 void FastTimerService::preEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1599   if (mcc.state() == edm::ModuleCallingContext::State::kPrefetching) {
1600     auto& stream = streams_[sc.streamID()];
1601     thread().measure_and_accumulate(stream.overhead);
1602   }
1603 }
1604 
1605 void FastTimerService::postEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1606   if (mcc.state() == edm::ModuleCallingContext::State::kPrefetching) {
1607     edm::ModuleDescription const& md = callgraph_.source();
1608     unsigned int id = md.id();
1609     auto& stream = streams_[sc.streamID()];
1610     auto& module = stream.modules[id];
1611 
1612     thread().measure_and_accumulate(module.total);
1613   }
1614 }
1615 
1616 void FastTimerService::preModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1617   thread().measure_and_accumulate(overhead_);
1618 }
1619 
1620 void FastTimerService::postModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1621   auto index = gc.runIndex();
1622   thread().measure_and_accumulate(run_transition_[index]);
1623 }
1624 
1625 void FastTimerService::preModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1626   thread().measure_and_accumulate(overhead_);
1627 }
1628 
1629 void FastTimerService::postModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1630   auto index = gc.runIndex();
1631   thread().measure_and_accumulate(run_transition_[index]);
1632 }
1633 
1634 void FastTimerService::preModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1635   thread().measure_and_accumulate(overhead_);
1636 }
1637 
1638 void FastTimerService::postModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1639   auto index = gc.luminosityBlockIndex();
1640   thread().measure_and_accumulate(lumi_transition_[index]);
1641 }
1642 
1643 void FastTimerService::preModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1644   thread().measure_and_accumulate(overhead_);
1645 }
1646 
1647 void FastTimerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1648   auto index = gc.luminosityBlockIndex();
1649   thread().measure_and_accumulate(lumi_transition_[index]);
1650 }
1651 
1652 void FastTimerService::preModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1653   thread().measure_and_accumulate(overhead_);
1654 }
1655 
1656 void FastTimerService::postModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1657   auto index = sc.runIndex();
1658   thread().measure_and_accumulate(run_transition_[index]);
1659 }
1660 
1661 void FastTimerService::preModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1662   thread().measure_and_accumulate(overhead_);
1663 }
1664 
1665 void FastTimerService::postModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1666   auto index = sc.runIndex();
1667   thread().measure_and_accumulate(run_transition_[index]);
1668 }
1669 
1670 void FastTimerService::preModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1671   thread().measure_and_accumulate(overhead_);
1672 }
1673 
1674 void FastTimerService::postModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1675   auto index = sc.luminosityBlockIndex();
1676   thread().measure_and_accumulate(lumi_transition_[index]);
1677 }
1678 
1679 void FastTimerService::preModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1680   thread().measure_and_accumulate(overhead_);
1681 }
1682 
1683 void FastTimerService::postModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1684   auto index = sc.luminosityBlockIndex();
1685   thread().measure_and_accumulate(lumi_transition_[index]);
1686 }
1687 
1688 void FastTimerService::preESModule(edm::eventsetup::EventSetupRecordKey const&, edm::ESModuleCallingContext const& cc) {
1689   auto top = cc.getTopModuleCallingContext();
1690   if (top->type() == edm::ParentContext::Type::kPlaceInPath) {
1691     //Paths are only used when processing an Event
1692     unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1693     auto& stream = streams_[sid];
1694     thread().measure_and_accumulate(stream.overhead);
1695   }
1696 }
1697 void FastTimerService::postESModule(edm::eventsetup::EventSetupRecordKey const&,
1698                                     edm::ESModuleCallingContext const& cc) {
1699   auto top = cc.getTopModuleCallingContext();
1700   if (top->type() == edm::ParentContext::Type::kPlaceInPath) {
1701     unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1702     auto& stream = streams_[sid];
1703     thread().measure_and_accumulate(stream.eventsetup);
1704   }
1705 }
1706 
1707 FastTimerService::ThreadGuard::ThreadGuard() {
1708   auto err = ::pthread_key_create(&key_, retire_thread);
1709   if (err) {
1710     throw cms::Exception("FastTimerService") << "ThreadGuard key creation failed: " << ::strerror(err);
1711   }
1712 }
1713 
1714 // If this is a new thread, register it and return true
1715 bool FastTimerService::ThreadGuard::register_thread(FastTimerService::AtomicResources& r) {
1716   auto ptr = ::pthread_getspecific(key_);
1717 
1718   if (not ptr) {
1719     auto p = thread_resources_.emplace_back(std::make_shared<specific_t>(r));
1720     auto pp = new std::shared_ptr<specific_t>(*p);
1721     auto err = ::pthread_setspecific(key_, pp);
1722     if (err) {
1723       throw cms::Exception("FastTimerService") << "ThreadGuard pthread_setspecific failed: " << ::strerror(err);
1724     }
1725     return true;
1726   }
1727   return false;
1728 }
1729 
1730 std::shared_ptr<FastTimerService::ThreadGuard::specific_t>* FastTimerService::ThreadGuard::ptr(void* p) {
1731   return static_cast<std::shared_ptr<specific_t>*>(p);
1732 }
1733 
1734 // called when a thread exits
1735 void FastTimerService::ThreadGuard::retire_thread(void* p) {
1736   auto ps = ptr(p);
1737   auto expected = true;
1738   if ((*ps)->live_.compare_exchange_strong(expected, false)) {
1739     // account any resources used or freed by the thread before leaving the TBB pool
1740     (*ps)->measurement_.measure_and_accumulate((*ps)->resource_);
1741   }
1742   delete ps;
1743 }
1744 
1745 // finalize all threads that have not retired
1746 void FastTimerService::ThreadGuard::finalize() {
1747   for (auto& p : thread_resources_) {
1748     auto expected = true;
1749     if (p->live_.compare_exchange_strong(expected, false)) {
1750       p->measurement_.measure_and_accumulate(p->resource_);
1751     }
1752   }
1753 }
1754 
1755 FastTimerService::Measurement& FastTimerService::ThreadGuard::thread() {
1756   return (*ptr(::pthread_getspecific(key_)))->measurement_;
1757 }
1758 
1759 void FastTimerService::on_scheduler_entry(bool worker) {
1760   if (guard_.register_thread(overhead_)) {
1761     // initialise the measurement point for a thread that has newly joined the TBB pool
1762     thread().measure();
1763   }
1764 }
1765 
1766 void FastTimerService::on_scheduler_exit(bool worker) {}
1767 
1768 FastTimerService::Measurement& FastTimerService::thread() { return guard_.thread(); }
1769 
1770 // describe the module's configuration
1771 void FastTimerService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
1772   edm::ParameterSetDescription desc;
1773   desc.addUntracked<bool>("printEventSummary", false);
1774   desc.addUntracked<bool>("printRunSummary", true);
1775   desc.addUntracked<bool>("printJobSummary", true);
1776   // JSON configuration
1777   //desc.addUntracked<bool>("writeJSONByEvent", false);
1778   //desc.addUntracked<bool>("writeJSONByLumiSection", false);
1779   //desc.addUntracked<bool>("writeJSONByRun", false);
1780   desc.addUntracked<bool>("writeJSONSummary", false);
1781   desc.addUntracked<std::string>("jsonFileName", "resources.json");
1782   // DQM configuration
1783   desc.addUntracked<bool>("enableDQM", true);
1784   desc.addUntracked<bool>("enableDQMbyModule", false);
1785   desc.addUntracked<bool>("enableDQMbyPath", false);
1786   desc.addUntracked<bool>("enableDQMbyLumiSection", false);
1787   desc.addUntracked<bool>("enableDQMbyProcesses", false);
1788   desc.addUntracked<bool>("enableDQMTransitions", false);
1789   desc.addUntracked<double>("dqmTimeRange", 1000.);              // ms
1790   desc.addUntracked<double>("dqmTimeResolution", 5.);            // ms
1791   desc.addUntracked<double>("dqmMemoryRange", 1000000.);         // kB
1792   desc.addUntracked<double>("dqmMemoryResolution", 5000.);       // kB
1793   desc.addUntracked<double>("dqmPathTimeRange", 100.);           // ms
1794   desc.addUntracked<double>("dqmPathTimeResolution", 0.5);       // ms
1795   desc.addUntracked<double>("dqmPathMemoryRange", 1000000.);     // kB
1796   desc.addUntracked<double>("dqmPathMemoryResolution", 5000.);   // kB
1797   desc.addUntracked<double>("dqmModuleTimeRange", 40.);          // ms
1798   desc.addUntracked<double>("dqmModuleTimeResolution", 0.2);     // ms
1799   desc.addUntracked<double>("dqmModuleMemoryRange", 100000.);    // kB
1800   desc.addUntracked<double>("dqmModuleMemoryResolution", 500.);  // kB
1801   desc.addUntracked<unsigned>("dqmLumiSectionsRange", 2500);     // ~ 16 hours
1802   desc.addUntracked<std::string>("dqmPath", "HLT/TimerService");
1803 
1804   edm::ParameterSetDescription highlightModulesDescription;
1805   highlightModulesDescription.addUntracked<std::vector<std::string>>("modules", {});
1806   highlightModulesDescription.addUntracked<std::string>("label", "producers");
1807   desc.addVPSetUntracked("highlightModules", highlightModulesDescription, {});
1808 
1809   // # OBSOLETE - these parameters are ignored, they are left only not to break old configurations
1810   // they will not be printed in the generated cfi.py file
1811   desc.addOptionalNode(edm::ParameterDescription<bool>("useRealTimeClock", true, false), false)
1812       ->setComment("This parameter is obsolete and will be ignored.");
1813   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingPaths", true, false), false)
1814       ->setComment("This parameter is obsolete and will be ignored.");
1815   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingModules", true, false), false)
1816       ->setComment("This parameter is obsolete and will be ignored.");
1817   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingExclusive", false, false), false)
1818       ->setComment("This parameter is obsolete and will be ignored.");
1819   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingSummary", false, false), false)
1820       ->setComment("This parameter is obsolete and will be ignored.");
1821   desc.addOptionalNode(edm::ParameterDescription<bool>("skipFirstPath", false, false), false)
1822       ->setComment("This parameter is obsolete and will be ignored.");
1823   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathActive", false, false), false)
1824       ->setComment("This parameter is obsolete and will be ignored.");
1825   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathTotal", true, false), false)
1826       ->setComment("This parameter is obsolete and will be ignored.");
1827   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathOverhead", false, false), false)
1828       ->setComment("This parameter is obsolete and will be ignored.");
1829   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathDetails", false, false), false)
1830       ->setComment("This parameter is obsolete and will be ignored.");
1831   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathCounters", true, false), false)
1832       ->setComment("This parameter is obsolete and will be ignored.");
1833   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathExclusive", false, false), false)
1834       ->setComment("This parameter is obsolete and will be ignored.");
1835   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyModuleType", false, false), false)
1836       ->setComment("This parameter is obsolete and will be ignored.");
1837   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMSummary", false, false), false)
1838       ->setComment("This parameter is obsolete and will be ignored.");
1839 
1840   descriptions.add("FastTimerService", desc);
1841 }
1842 
1843 // declare FastTimerService as a framework Service
1844 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
1845 DEFINE_FWK_SERVICE(FastTimerService);