File indexing completed on 2025-01-31 02:19:50
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <algorithm>
0015 #include <memory>
0016
0017 #include "Compression.h"
0018 #include "TFile.h"
0019 #include "TObjString.h"
0020 #include "TROOT.h"
0021 #include "TTree.h"
0022 #include <string>
0023
0024
0025 #include "FWCore/Framework/interface/one/OutputModule.h"
0026 #include "FWCore/Framework/interface/RunForOutput.h"
0027 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/ServiceRegistry/interface/Service.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0032 #include "FWCore/MessageLogger/interface/JobReport.h"
0033 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0034 #include "FWCore/Utilities/interface/Digest.h"
0035 #include "IOPool/Provenance/interface/CommonProvenanceFiller.h"
0036 #include "DataFormats/Provenance/interface/BranchType.h"
0037 #include "DataFormats/Provenance/interface/ProductDescription.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 #include "DataFormats/NanoAOD/interface/FlatTable.h"
0040 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0041 #include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h"
0042 #include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h"
0043 #include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h"
0044 #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h"
0045 #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h"
0046
0047 #include <iostream>
0048
0049 #include "oneapi/tbb/task_arena.h"
0050
0051 class NanoAODOutputModule : public edm::one::OutputModule<> {
0052 public:
0053 NanoAODOutputModule(edm::ParameterSet const& pset);
0054 ~NanoAODOutputModule() override;
0055
0056 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0057
0058 private:
0059 void write(edm::EventForOutput const& e) override;
0060 void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061 void writeRun(edm::RunForOutput const&) override;
0062 bool isFileOpen() const override;
0063 void openFile(edm::FileBlock const&) override;
0064 void reallyCloseFile() override;
0065
0066 std::string m_fileName;
0067 std::string m_logicalFileName;
0068 int m_compressionLevel;
0069 int m_eventsSinceFlush{0};
0070 std::string m_compressionAlgorithm;
0071 bool m_writeProvenance;
0072 bool m_fakeName;
0073 int m_autoFlush;
0074 edm::ProcessHistoryRegistry m_processHistoryRegistry;
0075 edm::JobReport::Token m_jrToken;
0076 std::unique_ptr<TFile> m_file;
0077 std::unique_ptr<TTree> m_tree, m_lumiTree, m_runTree, m_metaDataTree, m_parameterSetsTree;
0078
0079 static constexpr int m_firstFlush{1000};
0080
0081 class CommonEventBranches {
0082 public:
0083 void branch(TTree& tree) {
0084 tree.Branch("run", &m_run, "run/i");
0085 tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0086 tree.Branch("event", &m_event, "event/l");
0087 tree.Branch("bunchCrossing", &m_bunchCrossing, "bunchCrossing/i");
0088 tree.Branch("orbitNumber", &m_orbitNumber, "orbitNumber/i");
0089 }
0090 void fill(const edm::EventAuxiliary& aux) {
0091 m_run = aux.id().run();
0092 m_luminosityBlock = aux.id().luminosityBlock();
0093 m_event = aux.id().event();
0094 m_bunchCrossing = aux.bunchCrossing();
0095 m_orbitNumber = aux.orbitNumber();
0096 }
0097
0098 private:
0099 UInt_t m_run;
0100 UInt_t m_luminosityBlock;
0101 ULong64_t m_event;
0102 UInt_t m_bunchCrossing;
0103 UInt_t m_orbitNumber;
0104 } m_commonBranches;
0105
0106 class CommonLumiBranches {
0107 public:
0108 void branch(TTree& tree) {
0109 tree.Branch("run", &m_run, "run/i");
0110 tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0111 }
0112 void fill(const edm::LuminosityBlockID& id) {
0113 m_run = id.run();
0114 m_luminosityBlock = id.value();
0115 }
0116
0117 private:
0118 UInt_t m_run;
0119 UInt_t m_luminosityBlock;
0120 } m_commonLumiBranches;
0121
0122 class CommonRunBranches {
0123 public:
0124 void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
0125 void fill(const edm::RunID& id) { m_run = id.run(); }
0126
0127 private:
0128 UInt_t m_run;
0129 } m_commonRunBranches;
0130
0131 std::vector<TableOutputBranches> m_tables;
0132 std::vector<TriggerOutputBranches> m_triggers;
0133 bool m_triggers_areSorted = false;
0134 std::vector<EventStringOutputBranches> m_evstrings;
0135
0136 std::vector<SummaryTableOutputBranches> m_runTables;
0137 std::vector<SummaryTableOutputBranches> m_lumiTables;
0138 std::vector<LumiOutputBranches> m_lumiTables2;
0139 std::vector<TableOutputBranches> m_runFlatTables;
0140
0141 std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0142 };
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155 NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset)
0156 : edm::one::OutputModuleBase::OutputModuleBase(pset),
0157 edm::one::OutputModule<>(pset),
0158 m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0159 m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0160 m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0161 m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0162 m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0163 m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
0164 m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
0165 m_processHistoryRegistry() {}
0166
0167 NanoAODOutputModule::~NanoAODOutputModule() {}
0168
0169 void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) {
0170
0171 edm::Service<edm::JobReport> jr;
0172 jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0173
0174 if (m_autoFlush) {
0175 int64_t events = m_tree->GetEntriesFast();
0176 if (events == m_firstFlush) {
0177 m_tree->FlushBaskets();
0178 float maxMemory;
0179 if (m_autoFlush > 0) {
0180
0181
0182 float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
0183 maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
0184 } else if (m_tree->GetZipBytes() == 0) {
0185 maxMemory = 100 * 1024 * 1024;
0186 } else {
0187
0188 float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
0189 maxMemory = -m_autoFlush * cxnRatio;
0190 float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
0191 m_autoFlush = m_firstFlush / percentBytesDone;
0192 }
0193
0194
0195
0196
0197
0198 m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
0199 }
0200 if (m_eventsSinceFlush == m_autoFlush) {
0201 m_tree->FlushBaskets();
0202 m_eventsSinceFlush = 0;
0203 }
0204 m_eventsSinceFlush++;
0205 }
0206
0207 m_commonBranches.fill(iEvent.eventAuxiliary());
0208
0209 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0210 for (auto& t : m_tables)
0211 t.fill(iEvent, *m_tree, extensions);
0212 }
0213 if (!m_triggers_areSorted) {
0214 std::vector<std::string> pnames;
0215 for (auto& p : iEvent.processHistory())
0216 pnames.push_back(p.processName());
0217 std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
0218 return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
0219 (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
0220 });
0221 m_triggers_areSorted = true;
0222 }
0223
0224 for (auto& t : m_triggers)
0225 t.fill(iEvent, *m_tree);
0226
0227 for (auto& t : m_evstrings)
0228 t.fill(iEvent, *m_tree);
0229 tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
0230
0231 m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0232 }
0233
0234 void NanoAODOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0235 edm::Service<edm::JobReport> jr;
0236 jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0237
0238 m_commonLumiBranches.fill(iLumi.id());
0239
0240 for (auto& t : m_lumiTables)
0241 t.fill(iLumi, *m_lumiTree);
0242
0243 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0244 for (auto& t : m_lumiTables2)
0245 t.fill(iLumi, *m_lumiTree, extensions);
0246 }
0247
0248 tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
0249
0250 m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0251 }
0252
0253 void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) {
0254 edm::Service<edm::JobReport> jr;
0255 jr->reportRunNumber(m_jrToken, iRun.id().run());
0256
0257 m_commonRunBranches.fill(iRun.id());
0258
0259 for (auto& t : m_runTables)
0260 t.fill(iRun, *m_runTree);
0261
0262 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0263 for (auto& t : m_runFlatTables)
0264 t.fill(iRun, *m_runTree, extensions);
0265 }
0266
0267 edm::Handle<nanoaod::UniqueString> hstring;
0268 for (const auto& p : m_nanoMetadata) {
0269 iRun.getByToken(p.second, hstring);
0270 TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0271 if (tos) {
0272 if (hstring->str() != tos->GetString())
0273 throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0274 } else {
0275 auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0276 m_file->WriteTObject(ostr.release(), p.first.c_str());
0277 }
0278 }
0279
0280 tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
0281
0282 m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0283 }
0284
0285 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
0286
0287 void NanoAODOutputModule::openFile(edm::FileBlock const&) {
0288 m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0289 edm::Service<edm::JobReport> jr;
0290 cms::Digest branchHash;
0291 m_jrToken = jr->outputFileOpened(m_fileName,
0292 m_logicalFileName,
0293 std::string(),
0294 m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0295 description().moduleLabel(),
0296 edm::createGlobalIdentifier(),
0297 std::string(),
0298 branchHash.digest().toString(),
0299 std::vector<std::string>());
0300
0301 if (m_compressionAlgorithm == std::string("ZLIB")) {
0302 m_file->SetCompressionAlgorithm(ROOT::RCompressionSetting::EAlgorithm::kZLIB);
0303 } else if (m_compressionAlgorithm == std::string("LZMA")) {
0304 m_file->SetCompressionAlgorithm(ROOT::RCompressionSetting::EAlgorithm::kLZMA);
0305 } else if (m_compressionAlgorithm == std::string("ZSTD")) {
0306 m_file->SetCompressionAlgorithm(ROOT::RCompressionSetting::EAlgorithm::kZSTD);
0307 } else if (m_compressionAlgorithm == std::string("LZ4")) {
0308 m_file->SetCompressionAlgorithm(ROOT::RCompressionSetting::EAlgorithm::kLZ4);
0309 } else {
0310 throw cms::Exception("Configuration")
0311 << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0312 << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
0313 }
0314
0315 m_tables.clear();
0316 m_triggers.clear();
0317 m_triggers_areSorted = false;
0318 m_evstrings.clear();
0319 m_runTables.clear();
0320 m_lumiTables.clear();
0321 m_lumiTables2.clear();
0322 m_runFlatTables.clear();
0323 const auto& keeps = keptProducts();
0324 for (const auto& keep : keeps[edm::InEvent]) {
0325 if (keep.first->className() == "nanoaod::FlatTable")
0326 m_tables.emplace_back(keep.first, keep.second);
0327 else if (keep.first->className() == "edm::TriggerResults") {
0328 m_triggers.emplace_back(keep.first, keep.second);
0329 } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0330 keep.first->productInstanceName() == "genModel") {
0331 m_evstrings.emplace_back(keep.first, keep.second, true);
0332 } else
0333 throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0334 }
0335
0336 for (const auto& keep : keeps[edm::InLumi]) {
0337 if (keep.first->className() == "nanoaod::MergeableCounterTable")
0338 m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0339 else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0340 m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0341 else if (keep.first->className() == "nanoaod::FlatTable")
0342 m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
0343 else
0344 throw cms::Exception(
0345 "Configuration",
0346 "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
0347 }
0348
0349 for (const auto& keep : keeps[edm::InRun]) {
0350 if (keep.first->className() == "nanoaod::MergeableCounterTable")
0351 m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0352 else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0353 m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0354 else if (keep.first->className() == "nanoaod::FlatTable")
0355 m_runFlatTables.emplace_back(keep.first, keep.second);
0356 else
0357 throw cms::Exception("Configuration",
0358 "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0359 }
0360
0361
0362 m_tree = std::make_unique<TTree>("Events", "Events");
0363 m_tree->SetAutoSave(0);
0364 m_tree->SetAutoFlush(0);
0365 m_commonBranches.branch(*m_tree);
0366
0367 m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
0368 m_lumiTree->SetAutoSave(0);
0369 m_commonLumiBranches.branch(*m_lumiTree);
0370
0371 m_runTree = std::make_unique<TTree>("Runs", "Runs");
0372 m_runTree->SetAutoSave(0);
0373 m_commonRunBranches.branch(*m_runTree);
0374
0375 if (m_writeProvenance) {
0376 m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
0377 m_metaDataTree->SetAutoSave(0);
0378 m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
0379 m_parameterSetsTree->SetAutoSave(0);
0380 }
0381 }
0382 void NanoAODOutputModule::reallyCloseFile() {
0383 if (m_writeProvenance) {
0384 int basketSize = 16384;
0385 edm::fillParameterSetBranch(m_parameterSetsTree.get(), basketSize);
0386 edm::fillProcessHistoryBranch(m_metaDataTree.get(), basketSize, m_processHistoryRegistry);
0387 if (m_metaDataTree->GetNbranches() != 0) {
0388 m_metaDataTree->SetEntries(-1);
0389 }
0390 if (m_parameterSetsTree->GetNbranches() != 0) {
0391 m_parameterSetsTree->SetEntries(-1);
0392 }
0393 }
0394 m_file->Write();
0395 m_file->Close();
0396 m_file.reset();
0397 m_tree.release();
0398 m_lumiTree.release();
0399 m_runTree.release();
0400 m_metaDataTree.release();
0401 m_parameterSetsTree.release();
0402 edm::Service<edm::JobReport> jr;
0403 jr->outputFileClosed(m_jrToken);
0404 }
0405
0406 void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0407 edm::ParameterSetDescription desc;
0408
0409 desc.addUntracked<std::string>("fileName");
0410 desc.addUntracked<std::string>("logicalFileName", "");
0411
0412 desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0413 desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0414 ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0415 desc.addUntracked<bool>("saveProvenance", true)
0416 ->setComment("Save process provenance information, e.g. for edmProvDump");
0417 desc.addUntracked<bool>("fakeNameForCrab", false)
0418 ->setComment(
0419 "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
0420 "(and publish) till crab is fixed");
0421 desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
0422
0423
0424 const std::vector<std::string> keep = {"drop *",
0425 "keep nanoaodFlatTable_*Table_*_*",
0426 "keep edmTriggerResults_*_*_*",
0427 "keep String_*_genModel_*",
0428 "keep nanoaodMergeableCounterTable_*Table_*_*",
0429 "keep nanoaodUniqueString_nanoMetadata_*_*"};
0430 edm::one::OutputModule<>::fillDescription(desc, keep);
0431
0432
0433 edm::ParameterSetDescription dataSet;
0434 dataSet.setAllowAnything();
0435 desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0436 ->setComment("PSet is only used by Data Operations and not by this module.");
0437
0438 edm::ParameterSetDescription branchSet;
0439 branchSet.setAllowAnything();
0440 desc.add<edm::ParameterSetDescription>("branches", branchSet);
0441
0442 descriptions.addDefault(desc);
0443 }
0444
0445 DEFINE_FWK_MODULE(NanoAODOutputModule);