1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
|
#ifndef FastTimerService_h
#define FastTimerService_h
// system headers
#include <unistd.h>
#include <pthread.h>
// C++ headers
#include <chrono>
#include <cmath>
#include <map>
#include <mutex>
#include <string>
#include <unordered_map>
// boost headers
#include <boost/chrono.hpp>
// tbb headers
#include <oneapi/tbb/concurrent_unordered_set.h>
#include <oneapi/tbb/enumerable_thread_specific.h>
#include <oneapi/tbb/task_scheduler_observer.h>
// JSON headers
#include <nlohmann/json_fwd.hpp>
using json = nlohmann::json;
// CMSSW headers
#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/ServiceRegistry/interface/SystemBounds.h"
#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
#include "FWCore/ServiceRegistry/interface/PathContext.h"
#include "FWCore/ServiceRegistry/interface/StreamContext.h"
#include "FWCore/ServiceRegistry/interface/ProcessContext.h"
#include "FWCore/ServiceRegistry/interface/GlobalContext.h"
#include "FWCore/ServiceRegistry/interface/ESModuleCallingContext.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/Framework/interface/TriggerNamesService.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "DataFormats/Common/interface/HLTPathStatus.h"
#include "DataFormats/Provenance/interface/EventID.h"
#include "DataFormats/Provenance/interface/Timestamp.h"
#include "DataFormats/Provenance/interface/ModuleDescription.h"
#include "DQMServices/Core/interface/DQMStore.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"
/*
procesing time is divided into
- source
- event processing, sum of the time spent in all the modules
*/
class FastTimerService : public tbb::task_scheduler_observer {
public:
FastTimerService(const edm::ParameterSet&, edm::ActivityRegistry&);
~FastTimerService() override = default;
private:
void ignoredSignal(const std::string& signal) const;
void unsupportedSignal(const std::string& signal) const;
// these signal pairs are not guaranteed to happen in the same thread
void preallocate(edm::service::SystemBounds const&);
void lookupInitializationComplete(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&);
void postEndJob();
void preGlobalBeginRun(edm::GlobalContext const&);
void postGlobalBeginRun(edm::GlobalContext const&);
void preGlobalEndRun(edm::GlobalContext const&);
void postGlobalEndRun(edm::GlobalContext const&);
void preStreamBeginRun(edm::StreamContext const&);
void postStreamBeginRun(edm::StreamContext const&);
void preStreamEndRun(edm::StreamContext const&);
void postStreamEndRun(edm::StreamContext const&);
void preGlobalBeginLumi(edm::GlobalContext const&);
void postGlobalBeginLumi(edm::GlobalContext const&);
void preGlobalEndLumi(edm::GlobalContext const&);
void postGlobalEndLumi(edm::GlobalContext const&);
void preStreamBeginLumi(edm::StreamContext const&);
void postStreamBeginLumi(edm::StreamContext const&);
void preStreamEndLumi(edm::StreamContext const&);
void postStreamEndLumi(edm::StreamContext const&);
void preEvent(edm::StreamContext const&);
void postEvent(edm::StreamContext const&);
void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
void postPathEvent(edm::StreamContext const&, edm::PathContext const&, edm::HLTPathStatus const&);
void preModuleEventPrefetching(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleEventPrefetching(edm::StreamContext const&, edm::ModuleCallingContext const&);
// these signal pairs are guaranteed to be called from the same thread
//void preOpenFile(std::string const&, bool);
//void postOpenFile(std::string const&, bool);
//void preCloseFile(std::string const&, bool);
//void postCloseFile(std::string const&, bool);
void preSourceConstruction(edm::ModuleDescription const&);
//void postSourceConstruction(edm::ModuleDescription const&);
void preSourceRun(edm::RunIndex);
void postSourceRun(edm::RunIndex);
void preSourceLumi(edm::LuminosityBlockIndex);
void postSourceLumi(edm::LuminosityBlockIndex);
void preSourceEvent(edm::StreamID);
void postSourceEvent(edm::StreamID);
//void preModuleConstruction(edm::ModuleDescription const&);
//void postModuleConstruction(edm::ModuleDescription const&);
//void preModuleBeginJob(edm::ModuleDescription const&);
//void postModuleBeginJob(edm::ModuleDescription const&);
//void preModuleEndJob(edm::ModuleDescription const&);
//void postModuleEndJob(edm::ModuleDescription const&);
//void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
//void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
//void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
//void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleGlobalBeginRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void postModuleGlobalBeginRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void preModuleGlobalEndRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void postModuleGlobalEndRun(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void preModuleGlobalBeginLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void postModuleGlobalBeginLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void preModuleGlobalEndLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void postModuleGlobalEndLumi(edm::GlobalContext const&, edm::ModuleCallingContext const&);
void preModuleStreamBeginRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleStreamBeginRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleStreamEndRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleStreamEndRun(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleStreamBeginLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleStreamBeginLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleStreamEndLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleStreamEndLumi(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleEventAcquire(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleEventAcquire(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preModuleEventDelayedGet(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postModuleEventDelayedGet(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preEventReadFromSource(edm::StreamContext const&, edm::ModuleCallingContext const&);
void postEventReadFromSource(edm::StreamContext const&, edm::ModuleCallingContext const&);
void preESModule(edm::eventsetup::EventSetupRecordKey const&, edm::ESModuleCallingContext const&);
void postESModule(edm::eventsetup::EventSetupRecordKey const&, edm::ESModuleCallingContext const&);
// inherited from TBB task_scheduler_observer
void on_scheduler_entry(bool worker) final;
void on_scheduler_exit(bool worker) final;
public:
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
static void fixForDQM(std::string& label);
private:
// forward declarations
struct Resources;
struct AtomicResources;
// per-thread measurements
struct Measurement {
public:
Measurement() noexcept;
// take per-thread measurements
void measure() noexcept;
// take per-thread measurements, compute the delta with respect to the previous measurement, and store them in the argument
void measure_and_store(Resources& store) noexcept;
// take per-thread measurements, compute the delta with respect to the previous measurement, and add them to the argument
void measure_and_accumulate(Resources& store) noexcept;
void measure_and_accumulate(AtomicResources& store) noexcept;
public:
#ifdef DEBUG_THREAD_CONCURRENCY
std::thread::id id;
#endif // DEBUG_THREAD_CONCURRENCY
boost::chrono::thread_clock::time_point time_thread;
boost::chrono::high_resolution_clock::time_point time_real;
uint64_t allocated;
uint64_t deallocated;
};
// highlight a group of modules
struct GroupOfModules {
public:
std::string label;
std::vector<unsigned int> modules;
};
// resources being monitored by the service
struct Resources {
public:
Resources();
void reset();
Resources& operator+=(Resources const& other);
Resources& operator+=(struct AtomicResources const& other);
Resources operator+(Resources const& other) const;
Resources operator+(struct AtomicResources const& other) const;
public:
boost::chrono::nanoseconds time_thread;
boost::chrono::nanoseconds time_real;
uint64_t allocated;
uint64_t deallocated;
};
// atomic version of Resources
// Note: the structure as a whole is *not* atomic, only the individual fields are
struct AtomicResources {
public:
AtomicResources();
AtomicResources(AtomicResources const& other);
void reset();
AtomicResources& operator=(AtomicResources const& other);
AtomicResources& operator+=(AtomicResources const& other);
AtomicResources& operator+=(Resources const& other);
AtomicResources operator+(AtomicResources const& other) const;
Resources operator+(Resources const& other) const;
public:
std::atomic<boost::chrono::nanoseconds::rep> time_thread;
std::atomic<boost::chrono::nanoseconds::rep> time_real;
std::atomic<uint64_t> allocated;
std::atomic<uint64_t> deallocated;
};
// resources associated to each module, path, process and job
struct ResourcesPerModule {
public:
ResourcesPerModule() noexcept;
void reset() noexcept;
ResourcesPerModule& operator+=(ResourcesPerModule const& other);
ResourcesPerModule operator+(ResourcesPerModule const& other) const;
public:
Resources total;
unsigned events;
bool has_acquire; // whether this module has an acquire() method
};
struct ResourcesPerPath {
public:
void reset();
ResourcesPerPath& operator+=(ResourcesPerPath const& other);
ResourcesPerPath operator+(ResourcesPerPath const& other) const;
public:
Resources active; // resources used by all modules on this path
Resources total; // resources used by all modules on this path, and their dependencies
unsigned last; // one-past-the last module that ran on this path
bool status; // whether the path accepted or rejected the event
};
struct ResourcesPerProcess {
public:
ResourcesPerProcess(ProcessCallGraph::ProcessType const& process);
void reset();
ResourcesPerProcess& operator+=(ResourcesPerProcess const& other);
ResourcesPerProcess operator+(ResourcesPerProcess const& other) const;
public:
Resources total;
std::vector<ResourcesPerPath> paths;
std::vector<ResourcesPerPath> endpaths;
};
struct ResourcesPerJob {
public:
ResourcesPerJob() = default;
ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
void reset();
ResourcesPerJob& operator+=(ResourcesPerJob const& other);
ResourcesPerJob operator+(ResourcesPerJob const& other) const;
public:
Resources total;
AtomicResources overhead;
AtomicResources eventsetup;
AtomicResources idle;
Resources event; // total time etc. spent between preSourceEvent and postEvent
Measurement event_measurement;
std::vector<Resources> highlight;
std::vector<ResourcesPerModule> modules;
std::vector<ResourcesPerProcess> processes;
unsigned events;
};
// plot ranges and resolution
struct PlotRanges {
double time_range;
double time_resolution;
double memory_range;
double memory_resolution;
};
// plots associated to each module or other element (path, process, etc)
class PlotsPerElement {
public:
PlotsPerElement() = default;
void book(dqm::reco::DQMStore::IBooker&,
std::string const& name,
std::string const& title,
PlotRanges const& ranges,
unsigned int lumisections,
bool byls);
void fill(Resources const&, unsigned int lumisection);
void fill(AtomicResources const&, unsigned int lumisection);
void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
private:
// resources spent in the module
dqm::reco::MonitorElement* time_thread_ = nullptr; // TH1F
dqm::reco::MonitorElement* time_thread_byls_ = nullptr; // TProfile
dqm::reco::MonitorElement* time_real_ = nullptr; // TH1F
dqm::reco::MonitorElement* time_real_byls_ = nullptr; // TProfile
dqm::reco::MonitorElement* allocated_ = nullptr; // TH1F
dqm::reco::MonitorElement* allocated_byls_ = nullptr; // TProfile
dqm::reco::MonitorElement* deallocated_ = nullptr; // TH1F
dqm::reco::MonitorElement* deallocated_byls_ = nullptr; // TProfile
};
// plots associated to each path or endpath
class PlotsPerPath {
public:
PlotsPerPath() = default;
void book(dqm::reco::DQMStore::IBooker&,
std::string const&,
ProcessCallGraph const&,
ProcessCallGraph::PathType const&,
PlotRanges const& ranges,
unsigned int lumisections,
bool byls);
void fill(ProcessCallGraph::PathType const&,
ResourcesPerJob const&,
ResourcesPerPath const&,
unsigned int lumisection);
private:
// resources spent in all the modules in the path, including their dependencies
PlotsPerElement total_;
// Note:
// a TH1F has 7 significant digits, while a 24-hour long run could process
// order of 10 billion events; a 64-bit long integer would work and might
// be better suited than a double, but there is no "TH1L" in ROOT.
// how many times each module and their dependencies has run
dqm::reco::MonitorElement* module_counter_ = nullptr; // TH1D
// resources spent in each module and their dependencies
dqm::reco::MonitorElement* module_time_thread_total_ = nullptr; // TH1D
dqm::reco::MonitorElement* module_time_real_total_ = nullptr; // TH1D
dqm::reco::MonitorElement* module_allocated_total_ = nullptr; // TH1D
dqm::reco::MonitorElement* module_deallocated_total_ = nullptr; // TH1D
};
class PlotsPerProcess {
public:
PlotsPerProcess(ProcessCallGraph::ProcessType const&);
void book(dqm::reco::DQMStore::IBooker&,
ProcessCallGraph const&,
ProcessCallGraph::ProcessType const&,
PlotRanges const& event_ranges,
PlotRanges const& path_ranges,
unsigned int lumisections,
bool bypath,
bool byls);
void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
private:
// resources spent in all the modules of the (sub)process
PlotsPerElement event_;
// resources spent in each path and endpath
std::vector<PlotsPerPath> paths_;
std::vector<PlotsPerPath> endpaths_;
};
class PlotsPerJob {
public:
PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
void book(dqm::reco::DQMStore::IBooker&,
ProcessCallGraph const&,
std::vector<GroupOfModules> const&,
PlotRanges const& event_ranges,
PlotRanges const& path_ranges,
PlotRanges const& module_ranges,
unsigned int lumisections,
bool bymodule,
bool bypath,
bool byls,
bool transitions);
void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
void fill_run(AtomicResources const&);
void fill_lumi(AtomicResources const&, unsigned int lumisection);
private:
// resources spent in all the modules of the job
PlotsPerElement event_;
PlotsPerElement event_ex_;
PlotsPerElement overhead_;
PlotsPerElement idle_;
// resources spent in the modules' lumi and run transitions
PlotsPerElement lumi_;
PlotsPerElement run_;
// resources spent in the highlighted modules
std::vector<PlotsPerElement> highlight_;
// resources spent in each module
std::vector<PlotsPerElement> modules_;
// resources spent in each (sub)process
std::vector<PlotsPerProcess> processes_;
};
// keep track of the dependencies among modules
ProcessCallGraph callgraph_;
// per-stream information
std::vector<ResourcesPerJob> streams_;
// concurrent histograms and profiles
std::unique_ptr<PlotsPerJob> plots_;
// per-lumi and per-run information
std::vector<AtomicResources> lumi_transition_; // resources spent in the modules' global and stream lumi transitions
std::vector<AtomicResources> run_transition_; // resources spent in the modules' global and stream run transitions
// summary data
ResourcesPerJob job_summary_; // whole event time accounting per-job
std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
//
struct ThreadGuard {
struct specific_t {
specific_t(AtomicResources& r) : resource_(r), live_(true) {}
~specific_t() = default;
Measurement measurement_;
AtomicResources& resource_;
std::atomic<bool> live_;
};
ThreadGuard();
~ThreadGuard() = default;
static void retire_thread(void* t);
static std::shared_ptr<specific_t>* ptr(void* p);
bool register_thread(FastTimerService::AtomicResources& r);
Measurement& thread();
void finalize();
tbb::concurrent_vector<std::shared_ptr<specific_t>> thread_resources_;
pthread_key_t key_;
};
//
ThreadGuard guard_;
// atomic variables to keep track of the completion of each step, process by process
std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_lumi_check_;
std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
// retrieve the current thread's per-thread quantities
Measurement& thread();
// job configuration
unsigned int concurrent_lumis_;
unsigned int concurrent_runs_;
unsigned int concurrent_streams_;
unsigned int concurrent_threads_;
// logging configuration
const bool print_event_summary_; // print the time spent in each process, path and module after every event
const bool print_run_summary_; // print the time spent in each process, path and module for each run
const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
// JSON configuration
//const bool write_json_per_event_;
//const bool write_json_per_ls_;
//const bool write_json_per_run_;
const bool write_json_summary_;
const std::string json_filename_;
// dqm configuration
bool enable_dqm_; // non const, depends on the availability of the DQMStore
const bool enable_dqm_bymodule_;
const bool enable_dqm_bypath_;
const bool enable_dqm_byls_;
const bool enable_dqm_bynproc_;
const bool enable_dqm_transitions_;
const PlotRanges dqm_event_ranges_;
const PlotRanges dqm_path_ranges_;
const PlotRanges dqm_module_ranges_;
const unsigned int dqm_lumisections_range_;
std::string dqm_path_;
std::vector<edm::ParameterSet> highlight_module_psets_; // non-const, cleared in postBeginJob()
std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
// log unsupported signals
mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
// print the resource usage summary for en event, a run, or the while job
template <typename T>
void printHeader(T& out, std::string const& label) const;
template <typename T>
void printEventHeader(T& out, std::string const& label) const;
template <typename T>
void printEventLine(T& out, Resources const& data, std::string const& label) const;
template <typename T>
void printEventLine(T& out, AtomicResources const& data, std::string const& label) const;
template <typename T>
void printEvent(T& out, ResourcesPerJob const&) const;
template <typename T>
void printSummaryHeader(T& out, std::string const& label, bool detailed) const;
template <typename T>
void printPathSummaryHeader(T& out, std::string const& label) const;
template <typename T>
void printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const;
template <typename T>
void printSummaryLine(T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const;
template <typename T>
void printSummaryLine(T& out, AtomicResources const& data, uint64_t events, std::string const& label) const;
template <typename T>
void printSummaryLine(
T& out, AtomicResources const& data, uint64_t events, uint64_t active, std::string const& label) const;
template <typename T>
void printPathSummaryLine(
T& out, Resources const& data, Resources const& total, uint64_t events, std::string const& label) const;
template <typename T>
void printSummary(T& out, ResourcesPerJob const& data, std::string const& label) const;
template <typename T>
void printTransition(T& out, AtomicResources const& data, std::string const& label) const;
template <typename T>
json encodeToJSON(std::string const& type, std::string const& label, unsigned int events, T const& data) const;
json encodeToJSON(edm::ModuleDescription const& module, ResourcesPerModule const& data) const;
void writeSummaryJSON(ResourcesPerJob const& data, std::string const& filename) const;
// check if this is the first process being signalled
bool isFirstSubprocess(edm::StreamContext const&);
bool isFirstSubprocess(edm::GlobalContext const&);
// check if this is the lest process being signalled
bool isLastSubprocess(std::atomic<unsigned int>& check);
};
#endif // ! FastTimerService_h
|