Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:18:44

0001 // C++ headers
0002 #include <cmath>
0003 #include <fstream>
0004 #include <iomanip>
0005 #include <iostream>
0006 #include <limits>
0007 #include <mutex>
0008 #include <sstream>
0009 #include <string>
0010 #include <unordered_map>
0011 #include <unordered_set>
0012 
0013 // boost headers
0014 #include <boost/range/irange.hpp>
0015 
0016 // {fmt} headers
0017 #include <fmt/printf.h>
0018 
0019 // JSON headers
0020 #include <nlohmann/json.hpp>
0021 using json = nlohmann::json;
0022 
0023 // CMSSW headers
0024 #include "DQMServices/Core/interface/DQMStore.h"
0025 #include "DataFormats/Common/interface/HLTPathStatus.h"
0026 #include "DataFormats/Provenance/interface/EventID.h"
0027 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0028 #include "DataFormats/Provenance/interface/Timestamp.h"
0029 #include "DataFormats/Scalers/interface/LumiScalers.h"
0030 #include "FWCore/Framework/interface/Event.h"
0031 #include "FWCore/Framework/interface/LuminosityBlock.h"
0032 #include "FWCore/Framework/interface/TriggerNamesService.h"
0033 #include "FWCore/ServiceRegistry/interface/PlaceInPathContext.h"
0034 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0035 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0036 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0037 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0038 #include "FWCore/Utilities/interface/Exception.h"
0039 #include "FWCore/Utilities/interface/StreamID.h"
0040 #include "HLTrigger/Timer/interface/memory_usage.h"
0041 #include "HLTrigger/Timer/interface/processor_model.h"
0042 #include "FastTimerService.h"
0043 
0044 using namespace std::literals;
0045 
0046 ///////////////////////////////////////////////////////////////////////////////
0047 
0048 namespace {
0049 
0050   // convert any std::chrono::duration to milliseconds
0051   template <class Rep, class Period>
0052   double ms(std::chrono::duration<Rep, Period> duration) {
0053     return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(duration).count();
0054   }
0055 
0056   // convert any boost::chrono::duration to milliseconds
0057   template <class Rep, class Period>
0058   double ms(boost::chrono::duration<Rep, Period> duration) {
0059     return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(duration).count();
0060   }
0061 
0062   // convert a std::atomic<boost::chrono::nanoseconds::rep> to milliseconds
0063   double ms(std::atomic<boost::chrono::nanoseconds::rep> const& duration) {
0064     return boost::chrono::duration_cast<boost::chrono::duration<double, boost::milli>>(
0065                boost::chrono::nanoseconds(duration.load()))
0066         .count();
0067   }
0068 
0069   // convert from bytes to kilobytes, rounding down
0070   uint64_t kB(uint64_t bytes) { return bytes / 1024; }
0071 
0072   // convert from bytes to kilobytes, rounding down
0073   uint64_t kB(std::atomic<uint64_t> const& bytes) { return bytes.load() / 1024; }
0074 
0075 }  // namespace
0076 
0077 ///////////////////////////////////////////////////////////////////////////////
0078 
0079 // resources being monitored by the service
0080 
0081 // Resources
0082 
0083 FastTimerService::Resources::Resources()
0084     : time_thread(boost::chrono::nanoseconds::zero()),
0085       time_real(boost::chrono::nanoseconds::zero()),
0086       allocated(0ul),
0087       deallocated(0ul) {}
0088 
0089 void FastTimerService::Resources::reset() {
0090   time_thread = boost::chrono::nanoseconds::zero();
0091   time_real = boost::chrono::nanoseconds::zero();
0092   allocated = 0ul;
0093   deallocated = 0ul;
0094 }
0095 
0096 FastTimerService::Resources& FastTimerService::Resources::operator+=(Resources const& other) {
0097   time_thread += other.time_thread;
0098   time_real += other.time_real;
0099   allocated += other.allocated;
0100   deallocated += other.deallocated;
0101   return *this;
0102 }
0103 
0104 FastTimerService::Resources& FastTimerService::Resources::operator+=(AtomicResources const& other) {
0105   time_thread += boost::chrono::nanoseconds(other.time_thread.load());
0106   time_real += boost::chrono::nanoseconds(other.time_real.load());
0107   allocated += other.allocated.load();
0108   deallocated += other.deallocated.load();
0109   return *this;
0110 }
0111 
0112 FastTimerService::Resources FastTimerService::Resources::operator+(Resources const& other) const {
0113   Resources result(*this);
0114   result += other;
0115   return result;
0116 }
0117 
0118 FastTimerService::Resources FastTimerService::Resources::operator+(AtomicResources const& other) const {
0119   Resources result(*this);
0120   result += other;
0121   return result;
0122 }
0123 
0124 // AtomicResources
0125 // operation on the whole object are not atomic, as the operations
0126 // on the individual fields could be interleaved; however, accumulation
0127 // of results should yield the correct result.
0128 
0129 FastTimerService::AtomicResources::AtomicResources()
0130     : time_thread(0ul), time_real(0ul), allocated(0ul), deallocated(0ul) {}
0131 
0132 FastTimerService::AtomicResources::AtomicResources(AtomicResources const& other)
0133     : time_thread(other.time_thread.load()),
0134       time_real(other.time_real.load()),
0135       allocated(other.allocated.load()),
0136       deallocated(other.deallocated.load()) {}
0137 
0138 void FastTimerService::AtomicResources::reset() {
0139   time_thread = 0ul;
0140   time_real = 0ul;
0141   allocated = 0ul;
0142   deallocated = 0ul;
0143 }
0144 
0145 FastTimerService::AtomicResources& FastTimerService::AtomicResources::operator=(AtomicResources const& other) {
0146   time_thread = other.time_thread.load();
0147   time_real = other.time_real.load();
0148   allocated = other.allocated.load();
0149   deallocated = other.deallocated.load();
0150   return *this;
0151 }
0152 
0153 FastTimerService::AtomicResources& FastTimerService::AtomicResources::operator+=(AtomicResources const& other) {
0154   time_thread += other.time_thread.load();
0155   time_real += other.time_real.load();
0156   allocated += other.allocated.load();
0157   deallocated += other.deallocated.load();
0158   return *this;
0159 }
0160 
0161 FastTimerService::AtomicResources& FastTimerService::AtomicResources::operator+=(Resources const& other) {
0162   time_thread += other.time_thread.count();
0163   time_real += other.time_real.count();
0164   allocated += other.allocated;
0165   deallocated += other.deallocated;
0166   return *this;
0167 }
0168 
0169 FastTimerService::AtomicResources FastTimerService::AtomicResources::operator+(AtomicResources const& other) const {
0170   AtomicResources result(*this);
0171   result += other;
0172   return result;
0173 }
0174 
0175 FastTimerService::Resources FastTimerService::AtomicResources::operator+(Resources const& other) const {
0176   return other + *this;
0177 }
0178 
0179 // ResourcesPerModule
0180 
0181 FastTimerService::ResourcesPerModule::ResourcesPerModule() noexcept : total(), events(0), has_acquire(false) {}
0182 
0183 void FastTimerService::ResourcesPerModule::reset() noexcept {
0184   total.reset();
0185   events = 0;
0186   has_acquire = false;
0187 }
0188 
0189 FastTimerService::ResourcesPerModule& FastTimerService::ResourcesPerModule::operator+=(ResourcesPerModule const& other) {
0190   total += other.total;
0191   events += other.events;
0192   has_acquire = has_acquire or other.has_acquire;
0193   return *this;
0194 }
0195 
0196 FastTimerService::ResourcesPerModule FastTimerService::ResourcesPerModule::operator+(
0197     ResourcesPerModule const& other) const {
0198   ResourcesPerModule result(*this);
0199   result += other;
0200   return result;
0201 }
0202 
0203 // ResourcesPerPath
0204 
0205 void FastTimerService::ResourcesPerPath::reset() {
0206   active.reset();
0207   total.reset();
0208   last = 0;
0209   status = false;
0210 }
0211 
0212 FastTimerService::ResourcesPerPath& FastTimerService::ResourcesPerPath::operator+=(ResourcesPerPath const& other) {
0213   active += other.active;
0214   total += other.total;
0215   last = 0;  // summing these makes no sense, reset them instead
0216   status = false;
0217   return *this;
0218 }
0219 
0220 FastTimerService::ResourcesPerPath FastTimerService::ResourcesPerPath::operator+(ResourcesPerPath const& other) const {
0221   ResourcesPerPath result(*this);
0222   result += other;
0223   return result;
0224 }
0225 
0226 // ResourcesPerProcess
0227 
0228 FastTimerService::ResourcesPerProcess::ResourcesPerProcess(ProcessCallGraph::ProcessType const& process)
0229     : total(), paths(process.paths_.size()), endpaths(process.endPaths_.size()) {}
0230 
0231 void FastTimerService::ResourcesPerProcess::reset() {
0232   total.reset();
0233   for (auto& path : paths)
0234     path.reset();
0235   for (auto& path : endpaths)
0236     path.reset();
0237 }
0238 
0239 FastTimerService::ResourcesPerProcess& FastTimerService::ResourcesPerProcess::operator+=(
0240     ResourcesPerProcess const& other) {
0241   total += other.total;
0242   assert(paths.size() == other.paths.size());
0243   for (unsigned int i : boost::irange(0ul, paths.size()))
0244     paths[i] += other.paths[i];
0245   assert(endpaths.size() == other.endpaths.size());
0246   for (unsigned int i : boost::irange(0ul, endpaths.size()))
0247     endpaths[i] += other.endpaths[i];
0248   return *this;
0249 }
0250 
0251 FastTimerService::ResourcesPerProcess FastTimerService::ResourcesPerProcess::operator+(
0252     ResourcesPerProcess const& other) const {
0253   ResourcesPerProcess result(*this);
0254   result += other;
0255   return result;
0256 }
0257 
0258 // ResourcesPerJob
0259 
0260 FastTimerService::ResourcesPerJob::ResourcesPerJob(ProcessCallGraph const& job,
0261                                                    std::vector<GroupOfModules> const& groups)
0262     : highlight{groups.size()}, modules{job.size()}, events{0} {
0263   processes.reserve(job.processes().size());
0264   for (auto const& process : job.processes())
0265     processes.emplace_back(process);
0266 }
0267 
0268 void FastTimerService::ResourcesPerJob::reset() {
0269   total.reset();
0270   overhead.reset();
0271   idle.reset();
0272   eventsetup.reset();
0273   event.reset();
0274   for (auto& module : highlight)
0275     module.reset();
0276   for (auto& module : modules)
0277     module.reset();
0278   for (auto& process : processes)
0279     process.reset();
0280   events = 0;
0281 }
0282 
0283 FastTimerService::ResourcesPerJob& FastTimerService::ResourcesPerJob::operator+=(ResourcesPerJob const& other) {
0284   total += other.total;
0285   overhead += other.overhead;
0286   idle += other.idle;
0287   eventsetup += other.eventsetup;
0288   event += other.event;
0289   assert(highlight.size() == other.highlight.size());
0290   for (unsigned int i : boost::irange(0ul, highlight.size()))
0291     highlight[i] += other.highlight[i];
0292   assert(modules.size() == other.modules.size());
0293   for (unsigned int i : boost::irange(0ul, modules.size()))
0294     modules[i] += other.modules[i];
0295   assert(processes.size() == other.processes.size());
0296   for (unsigned int i : boost::irange(0ul, processes.size()))
0297     processes[i] += other.processes[i];
0298   events += other.events;
0299   return *this;
0300 }
0301 
0302 FastTimerService::ResourcesPerJob FastTimerService::ResourcesPerJob::operator+(ResourcesPerJob const& other) const {
0303   ResourcesPerJob result(*this);
0304   result += other;
0305   return result;
0306 }
0307 
0308 // per-thread measurements
0309 
0310 // Measurement
0311 
0312 FastTimerService::Measurement::Measurement() noexcept { measure(); }
0313 
0314 void FastTimerService::Measurement::measure() noexcept {
0315 #ifdef DEBUG_THREAD_CONCURRENCY
0316   id = std::this_thread::get_id();
0317 #endif  // DEBUG_THREAD_CONCURRENCY
0318   time_thread = boost::chrono::thread_clock::now();
0319   time_real = boost::chrono::high_resolution_clock::now();
0320   allocated = memory_usage::allocated();
0321   deallocated = memory_usage::deallocated();
0322 }
0323 
0324 void FastTimerService::Measurement::measure_and_store(Resources& store) noexcept {
0325 #ifdef DEBUG_THREAD_CONCURRENCY
0326   assert(std::this_thread::get_id() == id);
0327 #endif  // DEBUG_THREAD_CONCURRENCY
0328   auto new_time_thread = boost::chrono::thread_clock::now();
0329   auto new_time_real = boost::chrono::high_resolution_clock::now();
0330   auto new_allocated = memory_usage::allocated();
0331   auto new_deallocated = memory_usage::deallocated();
0332   store.time_thread = new_time_thread - time_thread;
0333   store.time_real = new_time_real - time_real;
0334   store.allocated = new_allocated - allocated;
0335   store.deallocated = new_deallocated - deallocated;
0336   time_thread = new_time_thread;
0337   time_real = new_time_real;
0338   allocated = new_allocated;
0339   deallocated = new_deallocated;
0340 }
0341 
0342 void FastTimerService::Measurement::measure_and_accumulate(Resources& store) noexcept {
0343 #ifdef DEBUG_THREAD_CONCURRENCY
0344   assert(std::this_thread::get_id() == id);
0345 #endif  // DEBUG_THREAD_CONCURRENCY
0346   auto new_time_thread = boost::chrono::thread_clock::now();
0347   auto new_time_real = boost::chrono::high_resolution_clock::now();
0348   auto new_allocated = memory_usage::allocated();
0349   auto new_deallocated = memory_usage::deallocated();
0350   store.time_thread += new_time_thread - time_thread;
0351   store.time_real += new_time_real - time_real;
0352   store.allocated += new_allocated - allocated;
0353   store.deallocated += new_deallocated - deallocated;
0354   time_thread = new_time_thread;
0355   time_real = new_time_real;
0356   allocated = new_allocated;
0357   deallocated = new_deallocated;
0358 }
0359 
0360 void FastTimerService::Measurement::measure_and_accumulate(AtomicResources& store) noexcept {
0361 #ifdef DEBUG_THREAD_CONCURRENCY
0362   assert(std::this_thread::get_id() == id);
0363 #endif  // DEBUG_THREAD_CONCURRENCY
0364   auto new_time_thread = boost::chrono::thread_clock::now();
0365   auto new_time_real = boost::chrono::high_resolution_clock::now();
0366   auto new_allocated = memory_usage::allocated();
0367   auto new_deallocated = memory_usage::deallocated();
0368   store.time_thread += boost::chrono::duration_cast<boost::chrono::nanoseconds>(new_time_thread - time_thread).count();
0369   store.time_real += boost::chrono::duration_cast<boost::chrono::nanoseconds>(new_time_real - time_real).count();
0370   store.allocated += new_allocated - allocated;
0371   store.deallocated += new_deallocated - deallocated;
0372   time_thread = new_time_thread;
0373   time_real = new_time_real;
0374   allocated = new_allocated;
0375   deallocated = new_deallocated;
0376 }
0377 
0378 ///////////////////////////////////////////////////////////////////////////////
0379 
0380 void FastTimerService::PlotsPerElement::book(dqm::reco::DQMStore::IBooker& booker,
0381                                              std::string const& name,
0382                                              std::string const& title,
0383                                              PlotRanges const& ranges,
0384                                              unsigned int lumisections,
0385                                              bool byls) {
0386   int time_bins = (int)std::ceil(ranges.time_range / ranges.time_resolution);
0387   int mem_bins = (int)std::ceil(ranges.memory_range / ranges.memory_resolution);
0388   std::string y_title_ms = fmt::sprintf("events / %.1f ms", ranges.time_resolution);
0389   std::string y_title_kB = fmt::sprintf("events / %.1f kB", ranges.memory_resolution);
0390 
0391   // MonitorElement::setStatOverflows(kTRUE) includes underflows and overflows in the computation of mean and RMS
0392   time_thread_ =
0393       booker.book1D(name + " time_thread", title + " processing time (cpu)", time_bins, 0., ranges.time_range);
0394   time_thread_->setXTitle("processing time [ms]");
0395   time_thread_->setYTitle(y_title_ms);
0396   time_thread_->setStatOverflows(kTRUE);
0397 
0398   time_real_ = booker.book1D(name + " time_real", title + " processing time (real)", time_bins, 0., ranges.time_range);
0399   time_real_->setXTitle("processing time [ms]");
0400   time_real_->setYTitle(y_title_ms);
0401   time_real_->setStatOverflows(kTRUE);
0402 
0403   if (memory_usage::is_available()) {
0404     allocated_ = booker.book1D(name + " allocated", title + " allocated memory", mem_bins, 0., ranges.memory_range);
0405     allocated_->setXTitle("memory [kB]");
0406     allocated_->setYTitle(y_title_kB);
0407     allocated_->setStatOverflows(kTRUE);
0408 
0409     deallocated_ =
0410         booker.book1D(name + " deallocated", title + " deallocated memory", mem_bins, 0., ranges.memory_range);
0411     deallocated_->setXTitle("memory [kB]");
0412     deallocated_->setYTitle(y_title_kB);
0413     deallocated_->setStatOverflows(kTRUE);
0414   }
0415 
0416   if (not byls)
0417     return;
0418 
0419   time_thread_byls_ = booker.bookProfile(name + " time_thread_byls",
0420                                          title + " processing time (cpu) vs. lumisection",
0421                                          lumisections,
0422                                          0.5,
0423                                          lumisections + 0.5,
0424                                          time_bins,
0425                                          0.,
0426                                          std::numeric_limits<double>::infinity(),
0427                                          " ");
0428   time_thread_byls_->setXTitle("lumisection");
0429   time_thread_byls_->setYTitle("processing time [ms]");
0430   time_thread_byls_->setStatOverflows(kTRUE);
0431 
0432   time_real_byls_ = booker.bookProfile(name + " time_real_byls",
0433                                        title + " processing time (real) vs. lumisection",
0434                                        lumisections,
0435                                        0.5,
0436                                        lumisections + 0.5,
0437                                        time_bins,
0438                                        0.,
0439                                        std::numeric_limits<double>::infinity(),
0440                                        " ");
0441   time_real_byls_->setXTitle("lumisection");
0442   time_real_byls_->setYTitle("processing time [ms]");
0443   time_real_byls_->setStatOverflows(kTRUE);
0444 
0445   if (memory_usage::is_available()) {
0446     allocated_byls_ = booker.bookProfile(name + " allocated_byls",
0447                                          title + " allocated memory vs. lumisection",
0448                                          lumisections,
0449                                          0.5,
0450                                          lumisections + 0.5,
0451                                          mem_bins,
0452                                          0.,
0453                                          std::numeric_limits<double>::infinity(),
0454                                          " ");
0455     allocated_byls_->setXTitle("lumisection");
0456     allocated_byls_->setYTitle("memory [kB]");
0457     allocated_byls_->setStatOverflows(kTRUE);
0458 
0459     deallocated_byls_ = booker.bookProfile(name + " deallocated_byls",
0460                                            title + " deallocated memory vs. lumisection",
0461                                            lumisections,
0462                                            0.5,
0463                                            lumisections + 0.5,
0464                                            mem_bins,
0465                                            0.,
0466                                            std::numeric_limits<double>::infinity(),
0467                                            " ");
0468     deallocated_byls_->setXTitle("lumisection");
0469     deallocated_byls_->setYTitle("memory [kB]");
0470     deallocated_byls_->setStatOverflows(kTRUE);
0471   }
0472 }
0473 
0474 void FastTimerService::PlotsPerElement::fill(Resources const& data, unsigned int lumisection) {
0475   if (time_thread_)
0476     time_thread_->Fill(ms(data.time_thread));
0477 
0478   if (time_thread_byls_)
0479     time_thread_byls_->Fill(lumisection, ms(data.time_thread));
0480 
0481   if (time_real_)
0482     time_real_->Fill(ms(data.time_real));
0483 
0484   if (time_real_byls_)
0485     time_real_byls_->Fill(lumisection, ms(data.time_real));
0486 
0487   if (allocated_)
0488     allocated_->Fill(kB(data.allocated));
0489 
0490   if (allocated_byls_)
0491     allocated_byls_->Fill(lumisection, kB(data.allocated));
0492 
0493   if (deallocated_)
0494     deallocated_->Fill(kB(data.deallocated));
0495 
0496   if (deallocated_byls_)
0497     deallocated_byls_->Fill(lumisection, kB(data.deallocated));
0498 }
0499 
0500 void FastTimerService::PlotsPerElement::fill(AtomicResources const& data, unsigned int lumisection) {
0501   if (time_thread_)
0502     time_thread_->Fill(ms(boost::chrono::nanoseconds(data.time_thread.load())));
0503 
0504   if (time_thread_byls_)
0505     time_thread_byls_->Fill(lumisection, ms(boost::chrono::nanoseconds(data.time_thread.load())));
0506 
0507   if (time_real_)
0508     time_real_->Fill(ms(boost::chrono::nanoseconds(data.time_real.load())));
0509 
0510   if (time_real_byls_)
0511     time_real_byls_->Fill(lumisection, ms(boost::chrono::nanoseconds(data.time_real.load())));
0512 
0513   if (allocated_)
0514     allocated_->Fill(kB(data.allocated));
0515 
0516   if (allocated_byls_)
0517     allocated_byls_->Fill(lumisection, kB(data.allocated));
0518 
0519   if (deallocated_)
0520     deallocated_->Fill(kB(data.deallocated));
0521 
0522   if (deallocated_byls_)
0523     deallocated_byls_->Fill(lumisection, kB(data.deallocated));
0524 }
0525 
0526 void FastTimerService::PlotsPerElement::fill_fraction(Resources const& data,
0527                                                       Resources const& part,
0528                                                       unsigned int lumisection) {
0529   float total;
0530   float fraction;
0531 
0532   total = ms(data.time_thread);
0533   fraction = (total > 0.) ? (ms(part.time_thread) / total) : 0.;
0534   if (time_thread_)
0535     time_thread_->Fill(total, fraction);
0536 
0537   if (time_thread_byls_)
0538     time_thread_byls_->Fill(lumisection, total, fraction);
0539 
0540   total = ms(data.time_real);
0541   fraction = (total > 0.) ? (ms(part.time_real) / total) : 0.;
0542   if (time_real_)
0543     time_real_->Fill(total, fraction);
0544 
0545   if (time_real_byls_)
0546     time_real_byls_->Fill(lumisection, total, fraction);
0547 
0548   total = kB(data.allocated);
0549   fraction = (total > 0.) ? (kB(part.allocated) / total) : 0.;
0550   if (allocated_)
0551     allocated_->Fill(total, fraction);
0552 
0553   if (allocated_byls_)
0554     allocated_byls_->Fill(lumisection, total, fraction);
0555 
0556   total = kB(data.deallocated);
0557   fraction = (total > 0.) ? (kB(part.deallocated) / total) : 0.;
0558   if (deallocated_)
0559     deallocated_->Fill(total, fraction);
0560 
0561   if (deallocated_byls_)
0562     deallocated_byls_->Fill(lumisection, total, fraction);
0563 }
0564 
0565 void FastTimerService::PlotsPerPath::book(dqm::reco::DQMStore::IBooker& booker,
0566                                           std::string const& prefixDir,
0567                                           ProcessCallGraph const& job,
0568                                           ProcessCallGraph::PathType const& path,
0569                                           PlotRanges const& ranges,
0570                                           unsigned int lumisections,
0571                                           bool byls) {
0572   const std::string basedir = booker.pwd();
0573   std::string folderName = basedir + "/" + prefixDir + path.name_;
0574   fixForDQM(folderName);
0575   booker.setCurrentFolder(folderName);
0576 
0577   total_.book(booker, "path", path.name_, ranges, lumisections, byls);
0578 
0579   // MonitorElement::setStatOverflows(kTRUE) includes underflows and overflows in the computation of mean and RMS
0580   unsigned int bins = path.modules_and_dependencies_.size();
0581   module_counter_ = booker.book1DD("module_counter", "module counter", bins + 1, -0.5, bins + 0.5);
0582   module_counter_->setYTitle("events");
0583   module_counter_->setStatOverflows(kTRUE);
0584   module_time_thread_total_ =
0585       booker.book1DD("module_time_thread_total", "total module time (cpu)", bins, -0.5, bins - 0.5);
0586   module_time_thread_total_->setYTitle("processing time [ms]");
0587   module_time_thread_total_->setStatOverflows(kTRUE);
0588   module_time_real_total_ =
0589       booker.book1DD("module_time_real_total", "total module time (real)", bins, -0.5, bins - 0.5);
0590   module_time_real_total_->setYTitle("processing time [ms]");
0591   module_time_real_total_->setStatOverflows(kTRUE);
0592   if (memory_usage::is_available()) {
0593     module_allocated_total_ =
0594         booker.book1DD("module_allocated_total", "total allocated memory", bins, -0.5, bins - 0.5);
0595     module_allocated_total_->setYTitle("memory [kB]");
0596     module_allocated_total_->setStatOverflows(kTRUE);
0597     module_deallocated_total_ =
0598         booker.book1DD("module_deallocated_total", "total deallocated memory", bins, -0.5, bins - 0.5);
0599     module_deallocated_total_->setYTitle("memory [kB]");
0600     module_deallocated_total_->setStatOverflows(kTRUE);
0601   }
0602   for (unsigned int bin : boost::irange(0u, bins)) {
0603     auto const& module = job[path.modules_and_dependencies_[bin]];
0604     std::string const& label =
0605         module.scheduled_ ? module.module_.moduleLabel() : module.module_.moduleLabel() + " (unscheduled)";
0606     module_counter_->setBinLabel(bin + 1, label);
0607     module_time_thread_total_->setBinLabel(bin + 1, label);
0608     module_time_real_total_->setBinLabel(bin + 1, label);
0609     if (memory_usage::is_available()) {
0610       module_allocated_total_->setBinLabel(bin + 1, label);
0611       module_deallocated_total_->setBinLabel(bin + 1, label);
0612     }
0613   }
0614   module_counter_->setBinLabel(bins + 1, "");
0615 
0616   booker.setCurrentFolder(basedir);
0617 }
0618 
0619 void FastTimerService::PlotsPerPath::fill(ProcessCallGraph::PathType const& description,
0620                                           ResourcesPerJob const& data,
0621                                           ResourcesPerPath const& path,
0622                                           unsigned int ls) {
0623   // fill the total path time
0624   total_.fill(path.total, ls);
0625 
0626   // fill the modules that actually ran and the total time spent in each od them
0627   for (unsigned int i = 0; i < path.last; ++i) {
0628     auto const& module = data.modules[description.modules_and_dependencies_[i]];
0629     if (module_counter_)
0630       module_counter_->Fill(i);
0631 
0632     if (module_time_thread_total_)
0633       module_time_thread_total_->Fill(i, ms(module.total.time_thread));
0634 
0635     if (module_time_real_total_)
0636       module_time_real_total_->Fill(i, ms(module.total.time_real));
0637 
0638     if (module_allocated_total_)
0639       module_allocated_total_->Fill(i, kB(module.total.allocated));
0640 
0641     if (module_deallocated_total_)
0642       module_deallocated_total_->Fill(i, kB(module.total.deallocated));
0643   }
0644   if (module_counter_ and path.status)
0645     module_counter_->Fill(path.last);
0646 }
0647 
0648 FastTimerService::PlotsPerProcess::PlotsPerProcess(ProcessCallGraph::ProcessType const& process)
0649     : event_(), paths_(process.paths_.size()), endpaths_(process.endPaths_.size()) {}
0650 
0651 void FastTimerService::PlotsPerProcess::book(dqm::reco::DQMStore::IBooker& booker,
0652                                              ProcessCallGraph const& job,
0653                                              ProcessCallGraph::ProcessType const& process,
0654                                              PlotRanges const& event_ranges,
0655                                              PlotRanges const& path_ranges,
0656                                              unsigned int lumisections,
0657                                              bool bypath,
0658                                              bool byls) {
0659   const std::string basedir = booker.pwd();
0660   event_.book(booker, "process " + process.name_, "process " + process.name_, event_ranges, lumisections, byls);
0661   if (bypath) {
0662     booker.setCurrentFolder(basedir + "/process " + process.name_ + " paths");
0663     for (unsigned int id : boost::irange(0ul, paths_.size())) {
0664       paths_[id].book(booker, "path ", job, process.paths_[id], path_ranges, lumisections, byls);
0665     }
0666     for (unsigned int id : boost::irange(0ul, endpaths_.size())) {
0667       endpaths_[id].book(booker, "endpath ", job, process.endPaths_[id], path_ranges, lumisections, byls);
0668     }
0669     booker.setCurrentFolder(basedir);
0670   }
0671 }
0672 
0673 void FastTimerService::PlotsPerProcess::fill(ProcessCallGraph::ProcessType const& description,
0674                                              ResourcesPerJob const& data,
0675                                              ResourcesPerProcess const& process,
0676                                              unsigned int ls) {
0677   // fill process event plots
0678   event_.fill(process.total, ls);
0679 
0680   // fill all paths plots
0681   for (unsigned int id : boost::irange(0ul, paths_.size()))
0682     paths_[id].fill(description.paths_[id], data, process.paths[id], ls);
0683 
0684   // fill all endpaths plots
0685   for (unsigned int id : boost::irange(0ul, endpaths_.size()))
0686     endpaths_[id].fill(description.endPaths_[id], data, process.endpaths[id], ls);
0687 }
0688 
0689 FastTimerService::PlotsPerJob::PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups)
0690     : highlight_{groups.size()}, modules_{job.size()} {
0691   processes_.reserve(job.processes().size());
0692   for (auto const& process : job.processes())
0693     processes_.emplace_back(process);
0694 }
0695 
0696 void FastTimerService::PlotsPerJob::book(dqm::reco::DQMStore::IBooker& booker,
0697                                          ProcessCallGraph const& job,
0698                                          std::vector<GroupOfModules> const& groups,
0699                                          PlotRanges const& event_ranges,
0700                                          PlotRanges const& path_ranges,
0701                                          PlotRanges const& module_ranges,
0702                                          unsigned int lumisections,
0703                                          bool bymodule,
0704                                          bool bypath,
0705                                          bool byls,
0706                                          bool transitions) {
0707   const std::string basedir = booker.pwd();
0708 
0709   // event summary plots
0710   event_.book(booker, "event", "Event", event_ranges, lumisections, byls);
0711   event_ex_.book(booker, "explicit", "Event (explicit)", event_ranges, lumisections, byls);
0712   overhead_.book(booker, "overhead", "Overhead", event_ranges, lumisections, byls);
0713   idle_.book(booker, "idle", "Idle", event_ranges, lumisections, byls);
0714 
0715   modules_[job.source().id()].book(booker, "source", "Source", module_ranges, lumisections, byls);
0716 
0717   if (transitions) {
0718     lumi_.book(booker, "lumi", "LumiSection transitions", event_ranges, lumisections, byls);
0719 
0720     run_.book(booker, "run", "Run transtions", event_ranges, lumisections, false);
0721   }
0722 
0723   // plot the time spent in few given groups of modules
0724   for (unsigned int group : boost::irange(0ul, groups.size())) {
0725     auto const& label = groups[group].label;
0726     highlight_[group].book(booker, "highlight " + label, "Highlight " + label, event_ranges, lumisections, byls);
0727   }
0728 
0729   // plots per subprocess (event, modules, paths and endpaths)
0730   for (unsigned int pid : boost::irange(0ul, job.processes().size())) {
0731     auto const& process = job.processDescription(pid);
0732     processes_[pid].book(booker, job, process, event_ranges, path_ranges, lumisections, bypath, byls);
0733 
0734     if (bymodule) {
0735       booker.setCurrentFolder(basedir + "/process " + process.name_ + " modules");
0736       for (unsigned int id : process.modules_) {
0737         std::string const& module_label = job.module(id).moduleLabel();
0738         std::string safe_label = module_label;
0739         fixForDQM(safe_label);
0740         modules_[id].book(booker, safe_label, module_label, module_ranges, lumisections, byls);
0741       }
0742       booker.setCurrentFolder(basedir);
0743     }
0744   }
0745 }
0746 
0747 void FastTimerService::PlotsPerJob::fill(ProcessCallGraph const& job, ResourcesPerJob const& data, unsigned int ls) {
0748   // fill total event plots
0749   event_.fill(data.total, ls);
0750   event_ex_.fill(data.event, ls);
0751   overhead_.fill(data.overhead, ls);
0752   idle_.fill(data.idle, ls);
0753 
0754   // fill highltight plots
0755   for (unsigned int group : boost::irange(0ul, highlight_.size()))
0756     highlight_[group].fill_fraction(data.total, data.highlight[group], ls);
0757 
0758   // fill modules plots
0759   for (unsigned int id : boost::irange(0ul, modules_.size()))
0760     modules_[id].fill(data.modules[id].total, ls);
0761 
0762   for (unsigned int pid : boost::irange(0ul, processes_.size()))
0763     processes_[pid].fill(job.processDescription(pid), data, data.processes[pid], ls);
0764 }
0765 
0766 void FastTimerService::PlotsPerJob::fill_run(AtomicResources const& data) {
0767   // fill run transition plots
0768   run_.fill(data, 0);
0769 }
0770 
0771 void FastTimerService::PlotsPerJob::fill_lumi(AtomicResources const& data, unsigned int ls) {
0772   // fill lumisection transition plots
0773   lumi_.fill(data, ls);
0774 }
0775 
0776 ///////////////////////////////////////////////////////////////////////////////
0777 
0778 FastTimerService::FastTimerService(const edm::ParameterSet& config, edm::ActivityRegistry& registry)
0779     :  // configuration
0780       callgraph_(),
0781       // job configuration
0782       concurrent_lumis_(0),
0783       concurrent_runs_(0),
0784       concurrent_streams_(0),
0785       concurrent_threads_(0),
0786       print_event_summary_(config.getUntrackedParameter<bool>("printEventSummary")),
0787       print_run_summary_(config.getUntrackedParameter<bool>("printRunSummary")),
0788       print_job_summary_(config.getUntrackedParameter<bool>("printJobSummary")),
0789       // JSON configuration
0790       //write_json_per_event_(config.getUntrackedParameter<bool>("writeJSONByEvent")),
0791       //write_json_per_ls_(config.getUntrackedParameter<bool>("writeJSONByLumiSection")),
0792       //write_json_per_run_(config.getUntrackedParameter<bool>("writeJSONByRun")),
0793       write_json_summary_(config.getUntrackedParameter<bool>("writeJSONSummary")),
0794       json_filename_(config.getUntrackedParameter<std::string>("jsonFileName")),
0795       // DQM configuration
0796       enable_dqm_(config.getUntrackedParameter<bool>("enableDQM")),
0797       enable_dqm_bymodule_(config.getUntrackedParameter<bool>("enableDQMbyModule")),
0798       enable_dqm_bypath_(config.getUntrackedParameter<bool>("enableDQMbyPath")),
0799       enable_dqm_byls_(config.getUntrackedParameter<bool>("enableDQMbyLumiSection")),
0800       enable_dqm_bynproc_(config.getUntrackedParameter<bool>("enableDQMbyProcesses")),
0801       enable_dqm_transitions_(config.getUntrackedParameter<bool>("enableDQMTransitions")),
0802       dqm_event_ranges_({config.getUntrackedParameter<double>("dqmTimeRange"),                  // ms
0803                          config.getUntrackedParameter<double>("dqmTimeResolution"),             // ms
0804                          config.getUntrackedParameter<double>("dqmMemoryRange"),                // kB
0805                          config.getUntrackedParameter<double>("dqmMemoryResolution")}),         // kB
0806       dqm_path_ranges_({config.getUntrackedParameter<double>("dqmPathTimeRange"),               // ms
0807                         config.getUntrackedParameter<double>("dqmPathTimeResolution"),          // ms
0808                         config.getUntrackedParameter<double>("dqmPathMemoryRange"),             // kB
0809                         config.getUntrackedParameter<double>("dqmPathMemoryResolution")}),      // kB
0810       dqm_module_ranges_({config.getUntrackedParameter<double>("dqmModuleTimeRange"),           // ms
0811                           config.getUntrackedParameter<double>("dqmModuleTimeResolution"),      // ms
0812                           config.getUntrackedParameter<double>("dqmModuleMemoryRange"),         // kB
0813                           config.getUntrackedParameter<double>("dqmModuleMemoryResolution")}),  // kB
0814       dqm_lumisections_range_(config.getUntrackedParameter<unsigned int>("dqmLumiSectionsRange")),
0815       dqm_path_(config.getUntrackedParameter<std::string>("dqmPath")),
0816       // highlight configuration
0817       highlight_module_psets_(config.getUntrackedParameter<std::vector<edm::ParameterSet>>("highlightModules")),
0818       highlight_modules_(highlight_module_psets_.size())  // filled in postBeginJob()
0819 {
0820   // start observing when a thread enters or leaves the TBB global thread arena
0821   tbb::task_scheduler_observer::observe();
0822 
0823   // register EDM call backs
0824   registry.watchPreallocate(this, &FastTimerService::preallocate);
0825   registry.watchPreBeginJob(this, &FastTimerService::preBeginJob);
0826   registry.watchPostBeginJob(this, &FastTimerService::postBeginJob);
0827   registry.watchPostEndJob(this, &FastTimerService::postEndJob);
0828   registry.watchPreGlobalBeginRun(this, &FastTimerService::preGlobalBeginRun);
0829   //registry.watchPostGlobalBeginRun(         this, & FastTimerService::postGlobalBeginRun );
0830   //registry.watchPreGlobalEndRun(            this, & FastTimerService::preGlobalEndRun );
0831   registry.watchPostGlobalEndRun(this, &FastTimerService::postGlobalEndRun);
0832   registry.watchPreStreamBeginRun(this, &FastTimerService::preStreamBeginRun);
0833   //registry.watchPostStreamBeginRun(         this, & FastTimerService::postStreamBeginRun );
0834   //registry.watchPreStreamEndRun(            this, & FastTimerService::preStreamEndRun );
0835   registry.watchPostStreamEndRun(this, &FastTimerService::postStreamEndRun);
0836   registry.watchPreGlobalBeginLumi(this, &FastTimerService::preGlobalBeginLumi);
0837   //registry.watchPostGlobalBeginLumi(        this, & FastTimerService::postGlobalBeginLumi );
0838   //registry.watchPreGlobalEndLumi(           this, & FastTimerService::preGlobalEndLumi );
0839   registry.watchPostGlobalEndLumi(this, &FastTimerService::postGlobalEndLumi);
0840   registry.watchPreStreamBeginLumi(this, &FastTimerService::preStreamBeginLumi);
0841   //registry.watchPostStreamBeginLumi(        this, & FastTimerService::postStreamBeginLumi );
0842   //registry.watchPreStreamEndLumi(           this, & FastTimerService::preStreamEndLumi );
0843   registry.watchPostStreamEndLumi(this, &FastTimerService::postStreamEndLumi);
0844   registry.watchPreEvent(this, &FastTimerService::preEvent);
0845   registry.watchPostEvent(this, &FastTimerService::postEvent);
0846   registry.watchPrePathEvent(this, &FastTimerService::prePathEvent);
0847   registry.watchPostPathEvent(this, &FastTimerService::postPathEvent);
0848   registry.watchPreSourceConstruction(this, &FastTimerService::preSourceConstruction);
0849   //registry.watchPostSourceConstruction(     this, & FastTimerService::postSourceConstruction);
0850   registry.watchPreSourceRun(this, &FastTimerService::preSourceRun);
0851   registry.watchPostSourceRun(this, &FastTimerService::postSourceRun);
0852   registry.watchPreSourceLumi(this, &FastTimerService::preSourceLumi);
0853   registry.watchPostSourceLumi(this, &FastTimerService::postSourceLumi);
0854   registry.watchPreSourceEvent(this, &FastTimerService::preSourceEvent);
0855   registry.watchPostSourceEvent(this, &FastTimerService::postSourceEvent);
0856   //registry.watchPreModuleConstruction(      this, & FastTimerService::preModuleConstruction);
0857   //registry.watchPostModuleConstruction(     this, & FastTimerService::postModuleConstruction);
0858   //registry.watchPreModuleBeginJob(          this, & FastTimerService::preModuleBeginJob );
0859   //registry.watchPostModuleBeginJob(         this, & FastTimerService::postModuleBeginJob );
0860   //registry.watchPreModuleEndJob(            this, & FastTimerService::preModuleEndJob );
0861   //registry.watchPostModuleEndJob(           this, & FastTimerService::postModuleEndJob );
0862   //registry.watchPreModuleBeginStream(       this, & FastTimerService::preModuleBeginStream );
0863   //registry.watchPostModuleBeginStream(      this, & FastTimerService::postModuleBeginStream );
0864   //registry.watchPreModuleEndStream(         this, & FastTimerService::preModuleEndStream );
0865   //registry.watchPostModuleEndStream(        this, & FastTimerService::postModuleEndStream );
0866   registry.watchPreModuleGlobalBeginRun(this, &FastTimerService::preModuleGlobalBeginRun);
0867   registry.watchPostModuleGlobalBeginRun(this, &FastTimerService::postModuleGlobalBeginRun);
0868   registry.watchPreModuleGlobalEndRun(this, &FastTimerService::preModuleGlobalEndRun);
0869   registry.watchPostModuleGlobalEndRun(this, &FastTimerService::postModuleGlobalEndRun);
0870   registry.watchPreModuleGlobalBeginLumi(this, &FastTimerService::preModuleGlobalBeginLumi);
0871   registry.watchPostModuleGlobalBeginLumi(this, &FastTimerService::postModuleGlobalBeginLumi);
0872   registry.watchPreModuleGlobalEndLumi(this, &FastTimerService::preModuleGlobalEndLumi);
0873   registry.watchPostModuleGlobalEndLumi(this, &FastTimerService::postModuleGlobalEndLumi);
0874   registry.watchPreModuleStreamBeginRun(this, &FastTimerService::preModuleStreamBeginRun);
0875   registry.watchPostModuleStreamBeginRun(this, &FastTimerService::postModuleStreamBeginRun);
0876   registry.watchPreModuleStreamEndRun(this, &FastTimerService::preModuleStreamEndRun);
0877   registry.watchPostModuleStreamEndRun(this, &FastTimerService::postModuleStreamEndRun);
0878   registry.watchPreModuleStreamBeginLumi(this, &FastTimerService::preModuleStreamBeginLumi);
0879   registry.watchPostModuleStreamBeginLumi(this, &FastTimerService::postModuleStreamBeginLumi);
0880   registry.watchPreModuleStreamEndLumi(this, &FastTimerService::preModuleStreamEndLumi);
0881   registry.watchPostModuleStreamEndLumi(this, &FastTimerService::postModuleStreamEndLumi);
0882   //registry.watchPreModuleEventPrefetching(  this, & FastTimerService::preModuleEventPrefetching );
0883   //registry.watchPostModuleEventPrefetching( this, & FastTimerService::postModuleEventPrefetching );
0884   registry.watchPreModuleEventAcquire(this, &FastTimerService::preModuleEventAcquire);
0885   registry.watchPostModuleEventAcquire(this, &FastTimerService::postModuleEventAcquire);
0886   registry.watchPreModuleEvent(this, &FastTimerService::preModuleEvent);
0887   registry.watchPostModuleEvent(this, &FastTimerService::postModuleEvent);
0888   registry.watchPreModuleEventDelayedGet(this, &FastTimerService::preModuleEventDelayedGet);
0889   registry.watchPostModuleEventDelayedGet(this, &FastTimerService::postModuleEventDelayedGet);
0890   registry.watchPreEventReadFromSource(this, &FastTimerService::preEventReadFromSource);
0891   registry.watchPostEventReadFromSource(this, &FastTimerService::postEventReadFromSource);
0892   registry.watchPreESModule(this, &FastTimerService::preESModule);
0893   registry.watchPostESModule(this, &FastTimerService::postESModule);
0894 }
0895 
0896 void FastTimerService::ignoredSignal(const std::string& signal) const {
0897   LogDebug("FastTimerService") << "The FastTimerService received is currently not monitoring the signal \"" << signal
0898                                << "\".\n";
0899 }
0900 
0901 void FastTimerService::unsupportedSignal(const std::string& signal) const {
0902   // warn about each signal only once per job
0903   if (unsupported_signals_.insert(signal).second)
0904     edm::LogWarning("FastTimerService") << "The FastTimerService received the unsupported signal \"" << signal
0905                                         << "\".\n"
0906                                         << "Please report how to reproduce the issue to cms-hlt@cern.ch .";
0907 }
0908 
0909 void FastTimerService::preGlobalBeginRun(edm::GlobalContext const& gc) {
0910   ignoredSignal(__func__);
0911 
0912   // reset the run counters only during the main process being run
0913   if (isFirstSubprocess(gc)) {
0914     auto index = gc.runIndex();
0915     subprocess_global_run_check_[index] = 0;
0916     run_transition_[index].reset();
0917     run_summary_[index].reset();
0918 
0919     // book the DQM plots
0920     if (enable_dqm_) {
0921       // define a callback to book the MonitorElements
0922       auto bookTransactionCallback = [&, this](dqm::reco::DQMStore::IBooker& booker, dqm::reco::DQMStore::IGetter&) {
0923         auto scope = dqm::reco::DQMStore::IBooker::UseRunScope(booker);
0924         // we should really do this, but only DQMStore is allowed to touch it
0925         // We could move to postGlobalBeginRun, then the DQMStore has sure set it up.
0926         //booker.setRunLumi(gc.luminosityBlockID());
0927         booker.setCurrentFolder(dqm_path_);
0928         plots_->book(booker,
0929                      callgraph_,
0930                      highlight_modules_,
0931                      dqm_event_ranges_,
0932                      dqm_path_ranges_,
0933                      dqm_module_ranges_,
0934                      dqm_lumisections_range_,
0935                      enable_dqm_bymodule_,
0936                      enable_dqm_bypath_,
0937                      enable_dqm_byls_,
0938                      enable_dqm_transitions_);
0939       };
0940 
0941       // book MonitorElements for this stream
0942       edm::Service<dqm::legacy::DQMStore>()->meBookerGetter(bookTransactionCallback);
0943     }
0944   }
0945 }
0946 
0947 void FastTimerService::postGlobalBeginRun(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
0948 
0949 void FastTimerService::preStreamBeginRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
0950 
0951 void FastTimerService::fixForDQM(std::string& label) {
0952   // clean characters that are deemed unsafe for DQM
0953   // see the definition of `s_safe` in DQMServices/Core/src/DQMStore.cc
0954   static const auto safe_for_dqm = "/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+=_()# "s;
0955   for (auto& c : label)
0956     if (safe_for_dqm.find(c) == std::string::npos)
0957       c = '_';
0958 }
0959 
0960 void FastTimerService::preallocate(edm::service::SystemBounds const& bounds) {
0961   concurrent_lumis_ = bounds.maxNumberOfConcurrentLuminosityBlocks();
0962   concurrent_runs_ = bounds.maxNumberOfConcurrentRuns();
0963   concurrent_streams_ = bounds.maxNumberOfStreams();
0964   concurrent_threads_ = bounds.maxNumberOfThreads();
0965 
0966   if (enable_dqm_bynproc_)
0967     dqm_path_ += fmt::sprintf(
0968         "/Running on %s with %d streams on %d threads", processor_model, concurrent_streams_, concurrent_threads_);
0969 
0970   // fix the DQM path to avoid invalid characters
0971   fixForDQM(dqm_path_);
0972 
0973   // allocate atomic variables to keep track of the completion of each step, process by process
0974   subprocess_event_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_streams_);
0975   for (unsigned int i = 0; i < concurrent_streams_; ++i)
0976     subprocess_event_check_[i] = 0;
0977   subprocess_global_run_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_runs_);
0978   for (unsigned int i = 0; i < concurrent_runs_; ++i)
0979     subprocess_global_run_check_[i] = 0;
0980   subprocess_global_lumi_check_ = std::make_unique<std::atomic<unsigned int>[]>(concurrent_lumis_);
0981   for (unsigned int i = 0; i < concurrent_lumis_; ++i)
0982     subprocess_global_lumi_check_[i] = 0;
0983 
0984   // allocate buffers to keep track of the resources spent in the lumi and run transitions
0985   lumi_transition_.resize(concurrent_lumis_);
0986   run_transition_.resize(concurrent_runs_);
0987 }
0988 
0989 void FastTimerService::preSourceConstruction(edm::ModuleDescription const& module) {
0990   callgraph_.preSourceConstruction(module);
0991 }
0992 
0993 void FastTimerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
0994                                    edm::ProcessContext const& context) {
0995   callgraph_.preBeginJob(pathsAndConsumes, context);
0996 }
0997 
0998 void FastTimerService::postBeginJob() {
0999   unsigned int modules = callgraph_.size();
1000 
1001   // module highlights
1002   for (unsigned int group : boost::irange(0ul, highlight_module_psets_.size())) {
1003     // copy and sort for faster search via std::binary_search
1004     auto labels = highlight_module_psets_[group].getUntrackedParameter<std::vector<std::string>>("modules");
1005     std::sort(labels.begin(), labels.end());
1006 
1007     highlight_modules_[group].label = highlight_module_psets_[group].getUntrackedParameter<std::string>("label");
1008     highlight_modules_[group].modules.reserve(labels.size());
1009     // convert the module labels in module ids
1010     for (unsigned int i = 0; i < modules; ++i) {
1011       auto const& label = callgraph_.module(i).moduleLabel();
1012       if (std::binary_search(labels.begin(), labels.end(), label))
1013         highlight_modules_[group].modules.push_back(i);
1014     }
1015   }
1016   highlight_module_psets_.clear();
1017 
1018   // allocate the resource counters for each stream, process, path and module
1019   ResourcesPerJob temp(callgraph_, highlight_modules_);
1020   streams_.resize(concurrent_streams_, temp);
1021   run_summary_.resize(concurrent_runs_, temp);
1022   job_summary_ = temp;
1023 
1024   // check that the DQMStore service is available
1025   if (enable_dqm_ and not edm::Service<dqm::legacy::DQMStore>().isAvailable()) {
1026     // the DQMStore is not available, disable all DQM plots
1027     enable_dqm_ = false;
1028     edm::LogWarning("FastTimerService") << "The DQMStore is not avalable, the DQM plots will not be generated";
1029   }
1030 
1031   // allocate the structures to hold pointers to the DQM plots
1032   if (enable_dqm_) {
1033     plots_ = std::make_unique<PlotsPerJob>(callgraph_, highlight_modules_);
1034   }
1035 }
1036 
1037 void FastTimerService::postStreamBeginRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1038 
1039 void FastTimerService::preStreamEndRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1040 
1041 void FastTimerService::postStreamEndRun(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1042 
1043 void FastTimerService::preGlobalBeginLumi(edm::GlobalContext const& gc) {
1044   ignoredSignal(__func__);
1045 
1046   // reset the lumi counters only during the main process being run
1047   if (isFirstSubprocess(gc)) {
1048     auto index = gc.luminosityBlockIndex();
1049     subprocess_global_lumi_check_[index] = 0;
1050     lumi_transition_[index].reset();
1051   }
1052 }
1053 
1054 void FastTimerService::postGlobalBeginLumi(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
1055 
1056 void FastTimerService::preGlobalEndLumi(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
1057 
1058 void FastTimerService::postGlobalEndLumi(edm::GlobalContext const& gc) {
1059   ignoredSignal(__func__);
1060 
1061   // handle the summaries only after the last subprocess has run
1062   auto index = gc.luminosityBlockIndex();
1063   bool last = isLastSubprocess(subprocess_global_lumi_check_[index]);
1064   if (not last)
1065     return;
1066 
1067   edm::LogVerbatim out("FastReport");
1068   auto const& label =
1069       fmt::sprintf("run %d, lumisection %d", gc.luminosityBlockID().run(), gc.luminosityBlockID().luminosityBlock());
1070   printTransition(out, lumi_transition_[index], label);
1071 
1072   if (enable_dqm_ and enable_dqm_transitions_) {
1073     plots_->fill_lumi(lumi_transition_[index], gc.luminosityBlockID().luminosityBlock());
1074   }
1075 }
1076 
1077 void FastTimerService::preStreamBeginLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1078 
1079 void FastTimerService::postStreamBeginLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1080 
1081 void FastTimerService::preStreamEndLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1082 
1083 void FastTimerService::postStreamEndLumi(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1084 
1085 void FastTimerService::preGlobalEndRun(edm::GlobalContext const& gc) { ignoredSignal(__func__); }
1086 
1087 void FastTimerService::postGlobalEndRun(edm::GlobalContext const& gc) {
1088   ignoredSignal(__func__);
1089 
1090   // handle the summaries only after the last subprocess has run
1091   auto index = gc.runIndex();
1092   bool last = isLastSubprocess(subprocess_global_run_check_[index]);
1093   if (not last)
1094     return;
1095 
1096   edm::LogVerbatim out("FastReport");
1097   auto const& label = fmt::sprintf("run %d", gc.luminosityBlockID().run());
1098   if (print_run_summary_) {
1099     printSummary(out, run_summary_[index], label);
1100   }
1101   printTransition(out, run_transition_[index], label);
1102 
1103   if (enable_dqm_ and enable_dqm_transitions_) {
1104     plots_->fill_run(run_transition_[index]);
1105   }
1106 }
1107 
1108 void FastTimerService::preSourceRun(edm::RunIndex index) { thread().measure_and_accumulate(job_summary_.overhead); }
1109 
1110 void FastTimerService::postSourceRun(edm::RunIndex index) { thread().measure_and_accumulate(run_transition_[index]); }
1111 
1112 void FastTimerService::preSourceLumi(edm::LuminosityBlockIndex index) {
1113   thread().measure_and_accumulate(job_summary_.overhead);
1114 }
1115 
1116 void FastTimerService::postSourceLumi(edm::LuminosityBlockIndex index) {
1117   thread().measure_and_accumulate(lumi_transition_[index]);
1118 }
1119 
1120 void FastTimerService::postEndJob() {
1121   // stop observing to avoid potential race conditions at exit
1122   tbb::task_scheduler_observer::observe(false);
1123   guard_.finalize();
1124   // print the job summaries
1125   if (print_job_summary_) {
1126     edm::LogVerbatim out("FastReport");
1127     printSummary(out, job_summary_, "Job");
1128   }
1129   if (write_json_summary_) {
1130     writeSummaryJSON(job_summary_, json_filename_);
1131   }
1132 }
1133 
1134 template <typename T>
1135 void FastTimerService::printHeader(T& out, std::string const& label) const {
1136   out << "FastReport ";
1137   if (label.size() < 60)
1138     for (unsigned int i = (60 - label.size()) / 2; i > 0; --i)
1139       out << '-';
1140   out << ' ' << label << " Summary ";
1141   if (label.size() < 60)
1142     for (unsigned int i = (59 - label.size()) / 2; i > 0; --i)
1143       out << '-';
1144   out << '\n';
1145 }
1146 
1147 template <typename T>
1148 void FastTimerService::printEventHeader(T& out, std::string const& label) const {
1149   out << "FastReport       CPU time      Real time      Allocated    Deallocated  " << label << "\n";
1150   //      FastReport  ########.# ms  ########.# ms  +######### kB  -######### kB  ...
1151 }
1152 
1153 template <typename T>
1154 void FastTimerService::printEventLine(T& out, Resources const& data, std::string const& label) const {
1155   out << fmt::sprintf("FastReport  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %s\n",
1156                       ms(data.time_thread),
1157                       ms(data.time_real),
1158                       +static_cast<int64_t>(kB(data.allocated)),
1159                       -static_cast<int64_t>(kB(data.deallocated)),
1160                       label);
1161 }
1162 
1163 template <typename T>
1164 void FastTimerService::printEventLine(T& out, AtomicResources const& data, std::string const& label) const {
1165   out << fmt::sprintf("FastReport  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %s\n",
1166                       ms(boost::chrono::nanoseconds(data.time_thread.load())),
1167                       ms(boost::chrono::nanoseconds(data.time_real.load())),
1168                       +static_cast<int64_t>(kB(data.allocated)),
1169                       -static_cast<int64_t>(kB(data.deallocated)),
1170                       label);
1171 }
1172 
1173 template <typename T>
1174 void FastTimerService::printEvent(T& out, ResourcesPerJob const& data) const {
1175   printHeader(out, "Event");
1176   printEventHeader(out, "Modules");
1177   auto const& source_d = callgraph_.source();
1178   auto const& source = data.modules[source_d.id()];
1179   printEventLine(out, source.total, source_d.moduleLabel());
1180   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1181     auto const& proc_d = callgraph_.processDescription(i);
1182     auto const& proc = data.processes[i];
1183     printEventLine(out, proc.total, "process " + proc_d.name_);
1184     for (unsigned int m : proc_d.modules_) {
1185       auto const& module_d = callgraph_.module(m);
1186       auto const& module = data.modules[m];
1187       printEventLine(out, module.total, "  " + module_d.moduleLabel());
1188     }
1189   }
1190   printEventLine(out, data.total, "total");
1191   out << '\n';
1192   printEventHeader(out, "Processes and Paths");
1193   printEventLine(out, source.total, source_d.moduleLabel());
1194   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1195     auto const& proc_d = callgraph_.processDescription(i);
1196     auto const& proc = data.processes[i];
1197     printEventLine(out, proc.total, "process " + proc_d.name_);
1198     for (unsigned int p = 0; p < proc.paths.size(); ++p) {
1199       auto const& name = proc_d.paths_[p].name_;
1200       auto const& path = proc.paths[p];
1201       printEventLine(out, path.active, name + " (only scheduled modules)");
1202       printEventLine(out, path.total, name + " (including dependencies)");
1203     }
1204     for (unsigned int p = 0; p < proc.endpaths.size(); ++p) {
1205       auto const& name = proc_d.endPaths_[p].name_;
1206       auto const& path = proc.endpaths[p];
1207       printEventLine(out, path.active, name + " (only scheduled modules)");
1208       printEventLine(out, path.total, name + " (including dependencies)");
1209     }
1210   }
1211   printEventLine(out, data.total, "total");
1212   out << '\n';
1213   for (unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1214     printEventHeader(out, "Highlighted modules");
1215     for (unsigned int m : highlight_modules_[group].modules) {
1216       auto const& module_d = callgraph_.module(m);
1217       auto const& module = data.modules[m];
1218       printEventLine(out, module.total, "  " + module_d.moduleLabel());
1219     }
1220     printEventLine(out, data.highlight[group], highlight_modules_[group].label);
1221     out << '\n';
1222   }
1223 }
1224 
1225 template <typename T>
1226 void FastTimerService::printSummaryHeader(T& out, std::string const& label, bool detailed) const {
1227   // clang-format off
1228   if (detailed)
1229     out << "FastReport   CPU time avg.      when run  Real time avg.      when run     Alloc. avg.      when run   Dealloc. avg.      when run  ";
1230   //        FastReport  ########.# ms  ########.# ms  ########.# ms  ########.# ms  +######### kB  +######### kB  -######### kB  -######### kB  ...
1231   else
1232     out << "FastReport   CPU time avg.                Real time avg.                   Alloc. avg.                 Dealloc. avg.                ";
1233   //        FastReport  ########.# ms                 ########.# ms                 +######### kB                 -######### kB                 ...
1234   out << label << '\n';
1235   // clang-format on
1236 }
1237 
1238 template <typename T>
1239 void FastTimerService::printPathSummaryHeader(T& out, std::string const& label) const {
1240   // clang-format off
1241   out << "FastReport     CPU time sched. / depend.    Real time sched. / depend.       Alloc. sched. / depend.     Dealloc. sched. / depend.  ";
1242   //      FastReport  ########.# ms  ########.# ms  ########.# ms  ########.# ms  +######### kB  +######### kB  -######### kB  -######### kB  ...
1243   out << label << '\n';
1244   // clang-format on
1245 }
1246 
1247 template <typename T>
1248 void FastTimerService::printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const {
1249   out << fmt::sprintf(
1250       // clang-format off
1251       "FastReport  %10.1f ms                 %10.1f ms                 %+10d kB                 %+10d kB                 %s\n",
1252       // clang-format on
1253       (events ? ms(data.time_thread) / events : 0),
1254       (events ? ms(data.time_real) / events : 0),
1255       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1256       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1257       label);
1258 }
1259 
1260 template <typename T>
1261 void FastTimerService::printSummaryLine(
1262     T& out, AtomicResources const& data, uint64_t events, uint64_t active, std::string const& label) const {
1263   out << fmt::sprintf(
1264       // clang-format off
1265       "FastReport  %10.1f ms  %10.1f ms  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %+10d kB  %+10d kB  %s\n",
1266       // clang-format on
1267       (events ? ms(data.time_thread) / events : 0),
1268       (active ? ms(data.time_thread) / active : 0),
1269       (events ? ms(data.time_real) / events : 0),
1270       (active ? ms(data.time_real) / active : 0),
1271       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1272       (active ? +static_cast<int64_t>(kB(data.allocated) / active) : 0),
1273       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1274       (active ? -static_cast<int64_t>(kB(data.deallocated) / active) : 0),
1275       label);
1276 }
1277 
1278 template <typename T>
1279 void FastTimerService::printSummaryLine(T& out,
1280                                         AtomicResources const& data,
1281                                         uint64_t events,
1282                                         std::string const& label) const {
1283   out << fmt::sprintf(
1284       // clang-format off
1285       "FastReport  %10.1f ms                 %10.1f ms                 %+10d kB                 %+10d kB                 %s\n",
1286       // clang-format on
1287       (events ? ms(data.time_thread) / events : 0),
1288       (events ? ms(data.time_real) / events : 0),
1289       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1290       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1291       label);
1292 }
1293 
1294 template <typename T>
1295 void FastTimerService::printSummaryLine(
1296     T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const {
1297   out << fmt::sprintf(
1298       "FastReport  %10.1f ms  %10.1f ms  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %+10d kB  %+10d kB  %s\n",
1299       (events ? ms(data.time_thread) / events : 0),
1300       (active ? ms(data.time_thread) / active : 0),
1301       (events ? ms(data.time_real) / events : 0),
1302       (active ? ms(data.time_real) / active : 0),
1303       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1304       (active ? +static_cast<int64_t>(kB(data.allocated) / active) : 0),
1305       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1306       (active ? -static_cast<int64_t>(kB(data.deallocated) / active) : 0),
1307       label);
1308 }
1309 
1310 template <typename T>
1311 void FastTimerService::printPathSummaryLine(
1312     T& out, Resources const& data, Resources const& total, uint64_t events, std::string const& label) const {
1313   out << fmt::sprintf(
1314       "FastReport  %10.1f ms  %10.1f ms  %10.1f ms  %10.1f ms  %+10d kB  %+10d kB  %+10d kB  %+10d kB  %s\n",
1315       (events ? ms(data.time_thread) / events : 0),
1316       (events ? ms(total.time_thread) / events : 0),
1317       (events ? ms(data.time_real) / events : 0),
1318       (events ? ms(total.time_real) / events : 0),
1319       (events ? +static_cast<int64_t>(kB(data.allocated) / events) : 0),
1320       (events ? +static_cast<int64_t>(kB(total.allocated) / events) : 0),
1321       (events ? -static_cast<int64_t>(kB(data.deallocated) / events) : 0),
1322       (events ? -static_cast<int64_t>(kB(total.deallocated) / events) : 0),
1323       label);
1324 }
1325 
1326 template <typename T>
1327 void FastTimerService::printSummary(T& out, ResourcesPerJob const& data, std::string const& label) const {
1328   printHeader(out, label);
1329   printSummaryHeader(out, "Modules", true);
1330   auto const& source_d = callgraph_.source();
1331   auto const& source = data.modules[source_d.id()];
1332   printSummaryLine(out, source.total, data.events, source.events, source_d.moduleLabel());
1333   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1334     auto const& proc_d = callgraph_.processDescription(i);
1335     auto const& proc = data.processes[i];
1336     printSummaryLine(out, proc.total, data.events, "process " + proc_d.name_);
1337     for (unsigned int m : proc_d.modules_) {
1338       auto const& module_d = callgraph_.module(m);
1339       auto const& module = data.modules[m];
1340       printSummaryLine(out, module.total, data.events, module.events, "  " + module_d.moduleLabel());
1341     }
1342   }
1343   printSummaryLine(out, data.total, data.events, "total");
1344   printSummaryLine(out, data.eventsetup, data.events, "eventsetup");
1345   printSummaryLine(out, data.overhead, data.events, "other");
1346   printSummaryLine(out, data.idle, data.events, "idle");
1347   out << '\n';
1348   printPathSummaryHeader(out, "Processes and Paths");
1349   printSummaryLine(out, source.total, data.events, source_d.moduleLabel());
1350   for (unsigned int i = 0; i < callgraph_.processes().size(); ++i) {
1351     auto const& proc_d = callgraph_.processDescription(i);
1352     auto const& proc = data.processes[i];
1353     printSummaryLine(out, proc.total, data.events, "process " + proc_d.name_);
1354     for (unsigned int p = 0; p < proc.paths.size(); ++p) {
1355       auto const& name = proc_d.paths_[p].name_;
1356       auto const& path = proc.paths[p];
1357       printPathSummaryLine(out, path.active, path.total, data.events, "  " + name);
1358     }
1359     for (unsigned int p = 0; p < proc.endpaths.size(); ++p) {
1360       auto const& name = proc_d.endPaths_[p].name_;
1361       auto const& path = proc.endpaths[p];
1362       printPathSummaryLine(out, path.active, path.total, data.events, "  " + name);
1363     }
1364   }
1365   printSummaryLine(out, data.total, data.events, "total");
1366   printSummaryLine(out, data.eventsetup, data.events, "eventsetup");
1367   printSummaryLine(out, data.overhead, data.events, "other");
1368   printSummaryLine(out, data.idle, data.events, "idle");
1369   out << '\n';
1370   for (unsigned int group : boost::irange(0ul, highlight_modules_.size())) {
1371     printSummaryHeader(out, "Highlighted modules", true);
1372     for (unsigned int m : highlight_modules_[group].modules) {
1373       auto const& module_d = callgraph_.module(m);
1374       auto const& module = data.modules[m];
1375       printSummaryLine(out, module.total, data.events, module.events, module_d.moduleLabel());
1376     }
1377     printSummaryLine(out, data.highlight[group], data.events, highlight_modules_[group].label);
1378     out << '\n';
1379   }
1380 }
1381 
1382 template <typename T>
1383 void FastTimerService::printTransition(T& out, AtomicResources const& data, std::string const& label) const {
1384   printEventHeader(out, "Transition");
1385   printEventLine(out, data, label);
1386 }
1387 
1388 template <typename T>
1389 json FastTimerService::encodeToJSON(std::string const& type,
1390                                     std::string const& label,
1391                                     unsigned int events,
1392                                     T const& data) const {
1393   return json{{"type", type},
1394               {"label", label},
1395               {"events", events},
1396               {"time_thread", ms(data.time_thread)},
1397               {"time_real", ms(data.time_real)},
1398               {"mem_alloc", kB(data.allocated)},
1399               {"mem_free", kB(data.deallocated)}};
1400 }
1401 
1402 json FastTimerService::encodeToJSON(edm::ModuleDescription const& module, ResourcesPerModule const& data) const {
1403   return encodeToJSON(module.moduleName(), module.moduleLabel(), data.events, data.total);
1404 }
1405 
1406 void FastTimerService::writeSummaryJSON(ResourcesPerJob const& data, std::string const& filename) const {
1407   json j;
1408 
1409   // write a description of the resources
1410   j["resources"] = json::array({json{{"time_real", "real time"}},
1411                                 json{{"time_thread", "cpu time"}},
1412                                 json{{"mem_alloc", "allocated memory"}},
1413                                 json{{"mem_free", "deallocated memory"}}});
1414 
1415   // write the resources used by the job
1416   j["total"] = encodeToJSON("Job",
1417                             callgraph_.processDescription(0).name_,
1418                             data.events,
1419                             data.total + data.eventsetup + data.overhead + data.idle);
1420 
1421   // write the resources used by every module
1422   j["modules"] = json::array();
1423   for (unsigned int i = 0; i < callgraph_.size(); ++i) {
1424     auto const& module = callgraph_.module(i);
1425     auto const& data_m = data.modules[i];
1426     j["modules"].push_back(encodeToJSON(module, data_m));
1427   }
1428 
1429   // add an entry for the non-event transitions, modules, and idle states
1430   j["modules"].push_back(encodeToJSON("other", "other", data.events, data.overhead));
1431   j["modules"].push_back(encodeToJSON("eventsetup", "eventsetup", data.events, data.eventsetup));
1432   j["modules"].push_back(encodeToJSON("idle", "idle", data.events, data.idle));
1433 
1434   std::ofstream out(filename);
1435   out << std::setw(2) << j << std::flush;
1436 }
1437 
1438 // check if this is the first process being signalled
1439 bool FastTimerService::isFirstSubprocess(edm::StreamContext const& sc) {
1440   return (not sc.processContext()->isSubProcess());
1441 }
1442 
1443 bool FastTimerService::isFirstSubprocess(edm::GlobalContext const& gc) {
1444   return (not gc.processContext()->isSubProcess());
1445 }
1446 
1447 // check if this is the last process being signalled
1448 bool FastTimerService::isLastSubprocess(std::atomic<unsigned int>& check) {
1449   // release-acquire semantic guarantees that all writes in this and other threads are visible
1450   // after this operation; full sequentially-consistent ordering is (probably) not needed.
1451   unsigned int old_value = check.fetch_add(1, std::memory_order_acq_rel);
1452   return (old_value == callgraph_.processes().size() - 1);
1453 }
1454 
1455 void FastTimerService::preEvent(edm::StreamContext const& sc) { ignoredSignal(__func__); }
1456 
1457 void FastTimerService::postEvent(edm::StreamContext const& sc) {
1458   ignoredSignal(__func__);
1459 
1460   unsigned int pid = callgraph_.processId(*sc.processContext());
1461   unsigned int sid = sc.streamID();
1462   auto& stream = streams_[sid];
1463   auto& process = callgraph_.processDescription(pid);
1464 
1465   // measure the event resources as the sum of all modules' resources
1466   auto& data = stream.processes[pid].total;
1467   for (unsigned int id : process.modules_)
1468     data += stream.modules[id].total;
1469   stream.total += data;
1470 
1471   // handle the summaries and fill the plots only after the last subprocess has run
1472   bool last = isLastSubprocess(subprocess_event_check_[sid]);
1473   if (not last)
1474     return;
1475 
1476   // measure the event resources explicitly
1477   stream.event_measurement.measure_and_store(stream.event);
1478 
1479   // add to the event resources those used by source (which is not part of any process)
1480   unsigned int id = 0;
1481   stream.total += stream.modules[id].total;
1482 
1483   // highlighted modules
1484   for (unsigned int group : boost::irange(0ul, highlight_modules_.size()))
1485     for (unsigned int i : highlight_modules_[group].modules)
1486       stream.highlight[group] += stream.modules[i].total;
1487 
1488   // avoid concurrent access to the summary objects
1489   {
1490     std::lock_guard<std::mutex> guard(summary_mutex_);
1491     job_summary_ += stream;
1492     run_summary_[sc.runIndex()] += stream;
1493   }
1494 
1495   if (print_event_summary_) {
1496     edm::LogVerbatim out("FastReport");
1497     printEvent(out, stream);
1498   }
1499 
1500   if (enable_dqm_) {
1501     plots_->fill(callgraph_, stream, sc.eventID().luminosityBlock());
1502   }
1503 }
1504 
1505 void FastTimerService::preSourceEvent(edm::StreamID sid) {
1506   // clear the event counters
1507   auto& stream = streams_[sid];
1508   stream.reset();
1509   ++stream.events;
1510 
1511   subprocess_event_check_[sid] = 0;
1512 
1513   // reuse the same measurement for the Source module and for the explicit begin of the Event
1514   auto& measurement = thread();
1515   measurement.measure_and_accumulate(stream.overhead);
1516   stream.event_measurement = measurement;
1517 }
1518 
1519 void FastTimerService::postSourceEvent(edm::StreamID sid) {
1520   edm::ModuleDescription const& md = callgraph_.source();
1521   unsigned int id = md.id();
1522   auto& stream = streams_[sid];
1523   auto& module = stream.modules[id];
1524 
1525   thread().measure_and_store(module.total);
1526   ++stream.modules[id].events;
1527 }
1528 
1529 void FastTimerService::prePathEvent(edm::StreamContext const& sc, edm::PathContext const& pc) {
1530   unsigned int sid = sc.streamID().value();
1531   unsigned int pid = callgraph_.processId(*sc.processContext());
1532   unsigned int id = pc.pathID();
1533   auto& stream = streams_[sid];
1534   auto& data = pc.isEndPath() ? stream.processes[pid].endpaths[id] : stream.processes[pid].paths[id];
1535   data.status = false;
1536   data.last = 0;
1537 }
1538 
1539 void FastTimerService::postPathEvent(edm::StreamContext const& sc,
1540                                      edm::PathContext const& pc,
1541                                      edm::HLTPathStatus const& status) {
1542   unsigned int sid = sc.streamID().value();
1543   unsigned int pid = callgraph_.processId(*sc.processContext());
1544   unsigned int id = pc.pathID();
1545   auto& stream = streams_[sid];
1546   auto& data = pc.isEndPath() ? stream.processes[pid].endpaths[id] : stream.processes[pid].paths[id];
1547 
1548   auto const& path =
1549       pc.isEndPath() ? callgraph_.processDescription(pid).endPaths_[id] : callgraph_.processDescription(pid).paths_[id];
1550   unsigned int index = path.modules_on_path_.empty() ? 0 : status.index() + 1;
1551   data.last = path.modules_on_path_.empty() ? 0 : path.last_dependency_of_module_[status.index()];
1552 
1553   for (unsigned int i = 0; i < index; ++i) {
1554     auto const& module = stream.modules[path.modules_on_path_[i]];
1555     data.active += module.total;
1556   }
1557   for (unsigned int i = 0; i < data.last; ++i) {
1558     auto const& module = stream.modules[path.modules_and_dependencies_[i]];
1559     data.total += module.total;
1560   }
1561 }
1562 
1563 void FastTimerService::preModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1564   unsigned int sid = sc.streamID().value();
1565   auto& stream = streams_[sid];
1566   thread().measure_and_accumulate(stream.overhead);
1567 }
1568 
1569 void FastTimerService::postModuleEventAcquire(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1570   edm::ModuleDescription const& md = *mcc.moduleDescription();
1571   unsigned int id = md.id();
1572   unsigned int sid = sc.streamID().value();
1573   auto& stream = streams_[sid];
1574   auto& module = stream.modules[id];
1575 
1576   module.has_acquire = true;
1577   thread().measure_and_store(module.total);
1578 }
1579 
1580 void FastTimerService::preModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1581   unsigned int sid = sc.streamID().value();
1582   auto& stream = streams_[sid];
1583   thread().measure_and_accumulate(stream.overhead);
1584 }
1585 
1586 void FastTimerService::postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1587   edm::ModuleDescription const& md = *mcc.moduleDescription();
1588   unsigned int id = md.id();
1589   unsigned int sid = sc.streamID().value();
1590   auto& stream = streams_[sid];
1591   auto& module = stream.modules[id];
1592 
1593   if (module.has_acquire) {
1594     thread().measure_and_accumulate(module.total);
1595   } else {
1596     thread().measure_and_store(module.total);
1597   }
1598   ++module.events;
1599 }
1600 
1601 void FastTimerService::preModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1602   unsupportedSignal(__func__);
1603 }
1604 
1605 void FastTimerService::postModuleEventDelayedGet(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1606   unsupportedSignal(__func__);
1607 }
1608 
1609 void FastTimerService::preModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1610   ignoredSignal(__func__);
1611 }
1612 
1613 void FastTimerService::postModuleEventPrefetching(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1614   ignoredSignal(__func__);
1615 }
1616 
1617 void FastTimerService::preEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1618   if (mcc.state() == edm::ModuleCallingContext::State::kPrefetching) {
1619     auto& stream = streams_[sc.streamID()];
1620     thread().measure_and_accumulate(stream.overhead);
1621   }
1622 }
1623 
1624 void FastTimerService::postEventReadFromSource(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1625   if (mcc.state() == edm::ModuleCallingContext::State::kPrefetching) {
1626     edm::ModuleDescription const& md = callgraph_.source();
1627     unsigned int id = md.id();
1628     auto& stream = streams_[sc.streamID()];
1629     auto& module = stream.modules[id];
1630 
1631     thread().measure_and_accumulate(module.total);
1632   }
1633 }
1634 
1635 void FastTimerService::preModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1636   thread().measure_and_accumulate(job_summary_.overhead);
1637 }
1638 
1639 void FastTimerService::postModuleGlobalBeginRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1640   auto index = gc.runIndex();
1641   thread().measure_and_accumulate(run_transition_[index]);
1642 }
1643 
1644 void FastTimerService::preModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1645   thread().measure_and_accumulate(job_summary_.overhead);
1646 }
1647 
1648 void FastTimerService::postModuleGlobalEndRun(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1649   auto index = gc.runIndex();
1650   thread().measure_and_accumulate(run_transition_[index]);
1651 }
1652 
1653 void FastTimerService::preModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1654   thread().measure_and_accumulate(job_summary_.overhead);
1655 }
1656 
1657 void FastTimerService::postModuleGlobalBeginLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1658   auto index = gc.luminosityBlockIndex();
1659   thread().measure_and_accumulate(lumi_transition_[index]);
1660 }
1661 
1662 void FastTimerService::preModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1663   thread().measure_and_accumulate(job_summary_.overhead);
1664 }
1665 
1666 void FastTimerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, edm::ModuleCallingContext const& mcc) {
1667   auto index = gc.luminosityBlockIndex();
1668   thread().measure_and_accumulate(lumi_transition_[index]);
1669 }
1670 
1671 void FastTimerService::preModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1672   thread().measure_and_accumulate(job_summary_.overhead);
1673 }
1674 
1675 void FastTimerService::postModuleStreamBeginRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1676   auto index = sc.runIndex();
1677   thread().measure_and_accumulate(run_transition_[index]);
1678 }
1679 
1680 void FastTimerService::preModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1681   thread().measure_and_accumulate(job_summary_.overhead);
1682 }
1683 
1684 void FastTimerService::postModuleStreamEndRun(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1685   auto index = sc.runIndex();
1686   thread().measure_and_accumulate(run_transition_[index]);
1687 }
1688 
1689 void FastTimerService::preModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1690   thread().measure_and_accumulate(job_summary_.overhead);
1691 }
1692 
1693 void FastTimerService::postModuleStreamBeginLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1694   auto index = sc.luminosityBlockIndex();
1695   thread().measure_and_accumulate(lumi_transition_[index]);
1696 }
1697 
1698 void FastTimerService::preModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1699   thread().measure_and_accumulate(job_summary_.overhead);
1700 }
1701 
1702 void FastTimerService::postModuleStreamEndLumi(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc) {
1703   auto index = sc.luminosityBlockIndex();
1704   thread().measure_and_accumulate(lumi_transition_[index]);
1705 }
1706 
1707 void FastTimerService::preESModule(edm::eventsetup::EventSetupRecordKey const&, edm::ESModuleCallingContext const& cc) {
1708   auto top = cc.getTopModuleCallingContext();
1709   if (top->type() == edm::ParentContext::Type::kPlaceInPath) {
1710     //Paths are only used when processing an Event
1711     unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1712     auto& stream = streams_[sid];
1713     thread().measure_and_accumulate(stream.overhead);
1714   }
1715 }
1716 void FastTimerService::postESModule(edm::eventsetup::EventSetupRecordKey const&,
1717                                     edm::ESModuleCallingContext const& cc) {
1718   auto top = cc.getTopModuleCallingContext();
1719   if (top->type() == edm::ParentContext::Type::kPlaceInPath) {
1720     unsigned int sid = top->parent().placeInPathContext()->pathContext()->streamContext()->streamID().value();
1721     auto& stream = streams_[sid];
1722     thread().measure_and_accumulate(stream.eventsetup);
1723   }
1724 }
1725 
1726 FastTimerService::ThreadGuard::ThreadGuard() {
1727   auto err = ::pthread_key_create(&key_, retire_thread);
1728   if (err) {
1729     throw cms::Exception("FastTimerService") << "ThreadGuard key creation failed: " << ::strerror(err);
1730   }
1731 }
1732 
1733 // If this is a new thread, register it and return true
1734 bool FastTimerService::ThreadGuard::register_thread(FastTimerService::AtomicResources& r) {
1735   auto ptr = ::pthread_getspecific(key_);
1736 
1737   if (not ptr) {
1738     auto it = thread_resources_.emplace_back(std::make_shared<specific_t>(r));
1739     auto pp = new std::shared_ptr<specific_t>(*it);
1740     auto err = ::pthread_setspecific(key_, pp);
1741     if (err) {
1742       throw cms::Exception("FastTimerService") << "ThreadGuard pthread_setspecific failed: " << ::strerror(err);
1743     }
1744     return true;
1745   }
1746   return false;
1747 }
1748 
1749 std::shared_ptr<FastTimerService::ThreadGuard::specific_t>* FastTimerService::ThreadGuard::ptr(void* p) {
1750   return static_cast<std::shared_ptr<specific_t>*>(p);
1751 }
1752 
1753 // called when a thread exits
1754 void FastTimerService::ThreadGuard::retire_thread(void* p) {
1755   auto ps = ptr(p);
1756   auto expected = true;
1757   if ((*ps)->live_.compare_exchange_strong(expected, false)) {
1758     // account any resources used or freed by the thread before leaving the TBB pool
1759     (*ps)->measurement_.measure_and_accumulate((*ps)->resource_);
1760   }
1761   delete ps;
1762 }
1763 
1764 // finalize all threads that have not retired
1765 void FastTimerService::ThreadGuard::finalize() {
1766   for (auto& p : thread_resources_) {
1767     auto expected = true;
1768     if (p->live_.compare_exchange_strong(expected, false)) {
1769       p->measurement_.measure_and_accumulate(p->resource_);
1770     }
1771   }
1772 }
1773 
1774 FastTimerService::Measurement& FastTimerService::ThreadGuard::thread() {
1775   return (*ptr(::pthread_getspecific(key_)))->measurement_;
1776 }
1777 
1778 void FastTimerService::on_scheduler_entry(bool worker) {
1779   // The AtomicResources passed to register_thread are used to account the resources
1780   // used or freed by the thread after the last active measurement and before leaving the TBB pool.
1781   if (guard_.register_thread(job_summary_.idle)) {
1782     // initialise the measurement point for a thread that has newly joined the TBB pool
1783     thread().measure();
1784   } else {
1785     // An existing thread is re-joining the TBB pool.
1786     // Note: unsure whether the resources used outside of the TBB pool should be
1787     //   - not accounted:       thread().measure()
1788     //   - considered as idle:  thread().measure_and_accumulate(job_summary_.idle)
1789     //   - considered as other: thread().measure_and_accumulate(job_summary_.overhead)
1790     // FIXME "considered as other" has been seen to produce unreliable results; revert to "not accounted" for the time being.
1791     thread().measure();
1792   }
1793 }
1794 
1795 void FastTimerService::on_scheduler_exit(bool worker) {
1796   // Account for the resources used or freed by the thread after the last active measurement and before leaving the TBB pool.
1797   thread().measure_and_accumulate(job_summary_.idle);
1798 }
1799 
1800 FastTimerService::Measurement& FastTimerService::thread() { return guard_.thread(); }
1801 
1802 // describe the module's configuration
1803 void FastTimerService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
1804   edm::ParameterSetDescription desc;
1805   desc.addUntracked<bool>("printEventSummary", false);
1806   desc.addUntracked<bool>("printRunSummary", true);
1807   desc.addUntracked<bool>("printJobSummary", true);
1808   // JSON configuration
1809   //desc.addUntracked<bool>("writeJSONByEvent", false);
1810   //desc.addUntracked<bool>("writeJSONByLumiSection", false);
1811   //desc.addUntracked<bool>("writeJSONByRun", false);
1812   desc.addUntracked<bool>("writeJSONSummary", false);
1813   desc.addUntracked<std::string>("jsonFileName", "resources.json");
1814   // DQM configuration
1815   desc.addUntracked<bool>("enableDQM", true);
1816   desc.addUntracked<bool>("enableDQMbyModule", false);
1817   desc.addUntracked<bool>("enableDQMbyPath", false);
1818   desc.addUntracked<bool>("enableDQMbyLumiSection", false);
1819   desc.addUntracked<bool>("enableDQMbyProcesses", false);
1820   desc.addUntracked<bool>("enableDQMTransitions", false);
1821   desc.addUntracked<double>("dqmTimeRange", 1000.);              // ms
1822   desc.addUntracked<double>("dqmTimeResolution", 5.);            // ms
1823   desc.addUntracked<double>("dqmMemoryRange", 1000000.);         // kB
1824   desc.addUntracked<double>("dqmMemoryResolution", 5000.);       // kB
1825   desc.addUntracked<double>("dqmPathTimeRange", 100.);           // ms
1826   desc.addUntracked<double>("dqmPathTimeResolution", 0.5);       // ms
1827   desc.addUntracked<double>("dqmPathMemoryRange", 1000000.);     // kB
1828   desc.addUntracked<double>("dqmPathMemoryResolution", 5000.);   // kB
1829   desc.addUntracked<double>("dqmModuleTimeRange", 40.);          // ms
1830   desc.addUntracked<double>("dqmModuleTimeResolution", 0.2);     // ms
1831   desc.addUntracked<double>("dqmModuleMemoryRange", 100000.);    // kB
1832   desc.addUntracked<double>("dqmModuleMemoryResolution", 500.);  // kB
1833   desc.addUntracked<unsigned>("dqmLumiSectionsRange", 2500);     // ~ 16 hours
1834   desc.addUntracked<std::string>("dqmPath", "HLT/TimerService");
1835 
1836   edm::ParameterSetDescription highlightModulesDescription;
1837   highlightModulesDescription.addUntracked<std::vector<std::string>>("modules", {});
1838   highlightModulesDescription.addUntracked<std::string>("label", "producers");
1839   desc.addVPSetUntracked("highlightModules", highlightModulesDescription, {});
1840 
1841   // # OBSOLETE - these parameters are ignored, they are left only not to break old configurations
1842   // they will not be printed in the generated cfi.py file
1843   desc.addOptionalNode(edm::ParameterDescription<bool>("useRealTimeClock", true, false), false)
1844       ->setComment("This parameter is obsolete and will be ignored.");
1845   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingPaths", true, false), false)
1846       ->setComment("This parameter is obsolete and will be ignored.");
1847   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingModules", true, false), false)
1848       ->setComment("This parameter is obsolete and will be ignored.");
1849   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingExclusive", false, false), false)
1850       ->setComment("This parameter is obsolete and will be ignored.");
1851   desc.addOptionalNode(edm::ParameterDescription<bool>("enableTimingSummary", false, false), false)
1852       ->setComment("This parameter is obsolete and will be ignored.");
1853   desc.addOptionalNode(edm::ParameterDescription<bool>("skipFirstPath", false, false), false)
1854       ->setComment("This parameter is obsolete and will be ignored.");
1855   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathActive", false, false), false)
1856       ->setComment("This parameter is obsolete and will be ignored.");
1857   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathTotal", true, false), false)
1858       ->setComment("This parameter is obsolete and will be ignored.");
1859   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathOverhead", false, false), false)
1860       ->setComment("This parameter is obsolete and will be ignored.");
1861   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathDetails", false, false), false)
1862       ->setComment("This parameter is obsolete and will be ignored.");
1863   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathCounters", true, false), false)
1864       ->setComment("This parameter is obsolete and will be ignored.");
1865   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyPathExclusive", false, false), false)
1866       ->setComment("This parameter is obsolete and will be ignored.");
1867   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMbyModuleType", false, false), false)
1868       ->setComment("This parameter is obsolete and will be ignored.");
1869   desc.addOptionalNode(edm::ParameterDescription<bool>("enableDQMSummary", false, false), false)
1870       ->setComment("This parameter is obsolete and will be ignored.");
1871 
1872   descriptions.add("FastTimerService", desc);
1873 }
1874 
1875 // declare FastTimerService as a framework Service
1876 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
1877 DEFINE_FWK_SERVICE(FastTimerService);