File indexing completed on 2023-03-17 11:16:07
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <algorithm>
0015 #include <memory>
0016
0017 #include "Compression.h"
0018 #include "TFile.h"
0019 #include "TObjString.h"
0020 #include "TROOT.h"
0021 #include "TTree.h"
0022 #include <string>
0023
0024
0025 #include "FWCore/Framework/interface/one/OutputModule.h"
0026 #include "FWCore/Framework/interface/RunForOutput.h"
0027 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/ServiceRegistry/interface/Service.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0032 #include "FWCore/MessageLogger/interface/JobReport.h"
0033 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0034 #include "FWCore/Utilities/interface/Digest.h"
0035 #include "IOPool/Provenance/interface/CommonProvenanceFiller.h"
0036 #include "DataFormats/Provenance/interface/BranchType.h"
0037 #include "DataFormats/Provenance/interface/BranchDescription.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 #include "DataFormats/NanoAOD/interface/FlatTable.h"
0040 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0041 #include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h"
0042 #include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h"
0043 #include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h"
0044 #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h"
0045 #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h"
0046
0047 #include <iostream>
0048
0049 #include "oneapi/tbb/task_arena.h"
0050
0051 class NanoAODOutputModule : public edm::one::OutputModule<> {
0052 public:
0053 NanoAODOutputModule(edm::ParameterSet const& pset);
0054 ~NanoAODOutputModule() override;
0055
0056 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0057
0058 private:
0059 void write(edm::EventForOutput const& e) override;
0060 void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061 void writeRun(edm::RunForOutput const&) override;
0062 bool isFileOpen() const override;
0063 void openFile(edm::FileBlock const&) override;
0064 void reallyCloseFile() override;
0065
0066 std::string m_fileName;
0067 std::string m_logicalFileName;
0068 int m_compressionLevel;
0069 int m_eventsSinceFlush{0};
0070 std::string m_compressionAlgorithm;
0071 bool m_writeProvenance;
0072 bool m_fakeName;
0073 int m_autoFlush;
0074 edm::ProcessHistoryRegistry m_processHistoryRegistry;
0075 edm::JobReport::Token m_jrToken;
0076 std::unique_ptr<TFile> m_file;
0077 std::unique_ptr<TTree> m_tree, m_lumiTree, m_runTree, m_metaDataTree, m_parameterSetsTree;
0078
0079 static constexpr int m_firstFlush{1000};
0080
0081 class CommonEventBranches {
0082 public:
0083 void branch(TTree& tree) {
0084 tree.Branch("run", &m_run, "run/i");
0085 tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0086 tree.Branch("event", &m_event, "event/l");
0087 tree.Branch("bunchCrossing", &m_bunchCrossing, "bunchCrossing/i");
0088 }
0089 void fill(const edm::EventAuxiliary& aux) {
0090 m_run = aux.id().run();
0091 m_luminosityBlock = aux.id().luminosityBlock();
0092 m_event = aux.id().event();
0093 m_bunchCrossing = aux.bunchCrossing();
0094 }
0095
0096 private:
0097 UInt_t m_run;
0098 UInt_t m_luminosityBlock;
0099 ULong64_t m_event;
0100 UInt_t m_bunchCrossing;
0101 } m_commonBranches;
0102
0103 class CommonLumiBranches {
0104 public:
0105 void branch(TTree& tree) {
0106 tree.Branch("run", &m_run, "run/i");
0107 tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0108 }
0109 void fill(const edm::LuminosityBlockID& id) {
0110 m_run = id.run();
0111 m_luminosityBlock = id.value();
0112 }
0113
0114 private:
0115 UInt_t m_run;
0116 UInt_t m_luminosityBlock;
0117 } m_commonLumiBranches;
0118
0119 class CommonRunBranches {
0120 public:
0121 void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
0122 void fill(const edm::RunID& id) { m_run = id.run(); }
0123
0124 private:
0125 UInt_t m_run;
0126 } m_commonRunBranches;
0127
0128 std::vector<TableOutputBranches> m_tables;
0129 std::vector<TriggerOutputBranches> m_triggers;
0130 bool m_triggers_areSorted = false;
0131 std::vector<EventStringOutputBranches> m_evstrings;
0132
0133 std::vector<SummaryTableOutputBranches> m_runTables;
0134 std::vector<SummaryTableOutputBranches> m_lumiTables;
0135 std::vector<LumiOutputBranches> m_lumiTables2;
0136 std::vector<TableOutputBranches> m_runFlatTables;
0137
0138 std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0139 };
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152 NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset)
0153 : edm::one::OutputModuleBase::OutputModuleBase(pset),
0154 edm::one::OutputModule<>(pset),
0155 m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0156 m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0157 m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0158 m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0159 m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0160 m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
0161 m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
0162 m_processHistoryRegistry() {}
0163
0164 NanoAODOutputModule::~NanoAODOutputModule() {}
0165
0166 void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) {
0167
0168 edm::Service<edm::JobReport> jr;
0169 jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0170
0171 if (m_autoFlush) {
0172 int64_t events = m_tree->GetEntriesFast();
0173 if (events == m_firstFlush) {
0174 m_tree->FlushBaskets();
0175 float maxMemory;
0176 if (m_autoFlush > 0) {
0177
0178
0179 float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
0180 maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
0181 } else if (m_tree->GetZipBytes() == 0) {
0182 maxMemory = 100 * 1024 * 1024;
0183 } else {
0184
0185 float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
0186 maxMemory = -m_autoFlush * cxnRatio;
0187 float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
0188 m_autoFlush = m_firstFlush / percentBytesDone;
0189 }
0190
0191
0192
0193
0194
0195 m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
0196 }
0197 if (m_eventsSinceFlush == m_autoFlush) {
0198 m_tree->FlushBaskets();
0199 m_eventsSinceFlush = 0;
0200 }
0201 m_eventsSinceFlush++;
0202 }
0203
0204 m_commonBranches.fill(iEvent.eventAuxiliary());
0205
0206 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0207 for (auto& t : m_tables)
0208 t.fill(iEvent, *m_tree, extensions);
0209 }
0210 if (!m_triggers_areSorted) {
0211 std::vector<std::string> pnames;
0212 for (auto& p : iEvent.processHistory())
0213 pnames.push_back(p.processName());
0214 std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
0215 return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
0216 (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
0217 });
0218 m_triggers_areSorted = true;
0219 }
0220
0221 for (auto& t : m_triggers)
0222 t.fill(iEvent, *m_tree);
0223
0224 for (auto& t : m_evstrings)
0225 t.fill(iEvent, *m_tree);
0226 tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
0227
0228 m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0229 }
0230
0231 void NanoAODOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0232 edm::Service<edm::JobReport> jr;
0233 jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0234
0235 m_commonLumiBranches.fill(iLumi.id());
0236
0237 for (auto& t : m_lumiTables)
0238 t.fill(iLumi, *m_lumiTree);
0239
0240 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0241 for (auto& t : m_lumiTables2)
0242 t.fill(iLumi, *m_lumiTree, extensions);
0243 }
0244
0245 tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
0246
0247 m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0248 }
0249
0250 void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) {
0251 edm::Service<edm::JobReport> jr;
0252 jr->reportRunNumber(m_jrToken, iRun.id().run());
0253
0254 m_commonRunBranches.fill(iRun.id());
0255
0256 for (auto& t : m_runTables)
0257 t.fill(iRun, *m_runTree);
0258
0259 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0260 for (auto& t : m_runFlatTables)
0261 t.fill(iRun, *m_runTree, extensions);
0262 }
0263
0264 edm::Handle<nanoaod::UniqueString> hstring;
0265 for (const auto& p : m_nanoMetadata) {
0266 iRun.getByToken(p.second, hstring);
0267 TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0268 if (tos) {
0269 if (hstring->str() != tos->GetString())
0270 throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0271 } else {
0272 auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0273 m_file->WriteTObject(ostr.release(), p.first.c_str());
0274 }
0275 }
0276
0277 tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
0278
0279 m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0280 }
0281
0282 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
0283
0284 void NanoAODOutputModule::openFile(edm::FileBlock const&) {
0285 m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0286 edm::Service<edm::JobReport> jr;
0287 cms::Digest branchHash;
0288 m_jrToken = jr->outputFileOpened(m_fileName,
0289 m_logicalFileName,
0290 std::string(),
0291 m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0292 description().moduleLabel(),
0293 edm::createGlobalIdentifier(),
0294 std::string(),
0295 branchHash.digest().toString(),
0296 std::vector<std::string>());
0297
0298 if (m_compressionAlgorithm == std::string("ZLIB")) {
0299 m_file->SetCompressionAlgorithm(ROOT::kZLIB);
0300 } else if (m_compressionAlgorithm == std::string("LZMA")) {
0301 m_file->SetCompressionAlgorithm(ROOT::kLZMA);
0302 } else if (m_compressionAlgorithm == std::string("ZSTD")) {
0303 m_file->SetCompressionAlgorithm(ROOT::kZSTD);
0304 } else if (m_compressionAlgorithm == std::string("LZ4")) {
0305 m_file->SetCompressionAlgorithm(ROOT::kLZ4);
0306 } else {
0307 throw cms::Exception("Configuration")
0308 << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0309 << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
0310 }
0311
0312 m_tables.clear();
0313 m_triggers.clear();
0314 m_triggers_areSorted = false;
0315 m_evstrings.clear();
0316 m_runTables.clear();
0317 m_lumiTables.clear();
0318 m_lumiTables2.clear();
0319 m_runFlatTables.clear();
0320 const auto& keeps = keptProducts();
0321 for (const auto& keep : keeps[edm::InEvent]) {
0322 if (keep.first->className() == "nanoaod::FlatTable")
0323 m_tables.emplace_back(keep.first, keep.second);
0324 else if (keep.first->className() == "edm::TriggerResults") {
0325 m_triggers.emplace_back(keep.first, keep.second);
0326 } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0327 keep.first->productInstanceName() == "genModel") {
0328 m_evstrings.emplace_back(keep.first, keep.second, true);
0329 } else
0330 throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0331 }
0332
0333 for (const auto& keep : keeps[edm::InLumi]) {
0334 if (keep.first->className() == "nanoaod::MergeableCounterTable")
0335 m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0336 else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0337 m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0338 else if (keep.first->className() == "nanoaod::FlatTable")
0339 m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
0340 else
0341 throw cms::Exception(
0342 "Configuration",
0343 "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
0344 }
0345
0346 for (const auto& keep : keeps[edm::InRun]) {
0347 if (keep.first->className() == "nanoaod::MergeableCounterTable")
0348 m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0349 else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0350 m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0351 else if (keep.first->className() == "nanoaod::FlatTable")
0352 m_runFlatTables.emplace_back(keep.first, keep.second);
0353 else
0354 throw cms::Exception("Configuration",
0355 "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0356 }
0357
0358
0359 m_tree = std::make_unique<TTree>("Events", "Events");
0360 m_tree->SetAutoSave(0);
0361 m_tree->SetAutoFlush(0);
0362 m_commonBranches.branch(*m_tree);
0363
0364 m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
0365 m_lumiTree->SetAutoSave(0);
0366 m_commonLumiBranches.branch(*m_lumiTree);
0367
0368 m_runTree = std::make_unique<TTree>("Runs", "Runs");
0369 m_runTree->SetAutoSave(0);
0370 m_commonRunBranches.branch(*m_runTree);
0371
0372 if (m_writeProvenance) {
0373 m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
0374 m_metaDataTree->SetAutoSave(0);
0375 m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
0376 m_parameterSetsTree->SetAutoSave(0);
0377 }
0378 }
0379 void NanoAODOutputModule::reallyCloseFile() {
0380 if (m_writeProvenance) {
0381 int basketSize = 16384;
0382 edm::fillParameterSetBranch(m_parameterSetsTree.get(), basketSize);
0383 edm::fillProcessHistoryBranch(m_metaDataTree.get(), basketSize, m_processHistoryRegistry);
0384 if (m_metaDataTree->GetNbranches() != 0) {
0385 m_metaDataTree->SetEntries(-1);
0386 }
0387 if (m_parameterSetsTree->GetNbranches() != 0) {
0388 m_parameterSetsTree->SetEntries(-1);
0389 }
0390 }
0391 m_file->Write();
0392 m_file->Close();
0393 m_file.reset();
0394 m_tree.release();
0395 m_lumiTree.release();
0396 m_runTree.release();
0397 m_metaDataTree.release();
0398 m_parameterSetsTree.release();
0399 edm::Service<edm::JobReport> jr;
0400 jr->outputFileClosed(m_jrToken);
0401 }
0402
0403 void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0404 edm::ParameterSetDescription desc;
0405
0406 desc.addUntracked<std::string>("fileName");
0407 desc.addUntracked<std::string>("logicalFileName", "");
0408
0409 desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0410 desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0411 ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0412 desc.addUntracked<bool>("saveProvenance", true)
0413 ->setComment("Save process provenance information, e.g. for edmProvDump");
0414 desc.addUntracked<bool>("fakeNameForCrab", false)
0415 ->setComment(
0416 "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
0417 "(and publish) till crab is fixed");
0418 desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
0419
0420
0421 const std::vector<std::string> keep = {"drop *",
0422 "keep nanoaodFlatTable_*Table_*_*",
0423 "keep edmTriggerResults_*_*_*",
0424 "keep String_*_genModel_*",
0425 "keep nanoaodMergeableCounterTable_*Table_*_*",
0426 "keep nanoaodUniqueString_nanoMetadata_*_*"};
0427 edm::one::OutputModule<>::fillDescription(desc, keep);
0428
0429
0430 edm::ParameterSetDescription dataSet;
0431 dataSet.setAllowAnything();
0432 desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0433 ->setComment("PSet is only used by Data Operations and not by this module.");
0434
0435 edm::ParameterSetDescription branchSet;
0436 branchSet.setAllowAnything();
0437 desc.add<edm::ParameterSetDescription>("branches", branchSet);
0438
0439 descriptions.addDefault(desc);
0440 }
0441
0442 DEFINE_FWK_MODULE(NanoAODOutputModule);