File indexing completed on 2022-06-13 09:27:10
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <algorithm>
0015 #include <memory>
0016
0017 #include "Compression.h"
0018 #include "TFile.h"
0019 #include "TObjString.h"
0020 #include "TROOT.h"
0021 #include "TTree.h"
0022 #include <string>
0023
0024
0025 #include "FWCore/Framework/interface/one/OutputModule.h"
0026 #include "FWCore/Framework/interface/RunForOutput.h"
0027 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/ServiceRegistry/interface/Service.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0032 #include "FWCore/MessageLogger/interface/JobReport.h"
0033 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0034 #include "FWCore/Utilities/interface/Digest.h"
0035 #include "IOPool/Provenance/interface/CommonProvenanceFiller.h"
0036 #include "DataFormats/Provenance/interface/BranchType.h"
0037 #include "DataFormats/Provenance/interface/BranchDescription.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 #include "DataFormats/NanoAOD/interface/FlatTable.h"
0040 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0041 #include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h"
0042 #include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h"
0043 #include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h"
0044 #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h"
0045 #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h"
0046
0047 #include <iostream>
0048
0049 #include "oneapi/tbb/task_arena.h"
0050
0051 class NanoAODOutputModule : public edm::one::OutputModule<> {
0052 public:
0053 NanoAODOutputModule(edm::ParameterSet const& pset);
0054 ~NanoAODOutputModule() override;
0055
0056 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0057
0058 private:
0059 void write(edm::EventForOutput const& e) override;
0060 void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061 void writeRun(edm::RunForOutput const&) override;
0062 bool isFileOpen() const override;
0063 void openFile(edm::FileBlock const&) override;
0064 void reallyCloseFile() override;
0065
0066 std::string m_fileName;
0067 std::string m_logicalFileName;
0068 int m_compressionLevel;
0069 int m_eventsSinceFlush{0};
0070 std::string m_compressionAlgorithm;
0071 bool m_writeProvenance;
0072 bool m_fakeName;
0073 int m_autoFlush;
0074 edm::ProcessHistoryRegistry m_processHistoryRegistry;
0075 edm::JobReport::Token m_jrToken;
0076 std::unique_ptr<TFile> m_file;
0077 std::unique_ptr<TTree> m_tree, m_lumiTree, m_runTree, m_metaDataTree, m_parameterSetsTree;
0078
0079 static constexpr int m_firstFlush{1000};
0080
0081 class CommonEventBranches {
0082 public:
0083 void branch(TTree& tree) {
0084 tree.Branch("run", &m_run, "run/i");
0085 tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0086 tree.Branch("event", &m_event, "event/l");
0087 }
0088 void fill(const edm::EventID& id) {
0089 m_run = id.run();
0090 m_luminosityBlock = id.luminosityBlock();
0091 m_event = id.event();
0092 }
0093
0094 private:
0095 UInt_t m_run;
0096 UInt_t m_luminosityBlock;
0097 ULong64_t m_event;
0098 } m_commonBranches;
0099
0100 class CommonLumiBranches {
0101 public:
0102 void branch(TTree& tree) {
0103 tree.Branch("run", &m_run, "run/i");
0104 tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0105 }
0106 void fill(const edm::LuminosityBlockID& id) {
0107 m_run = id.run();
0108 m_luminosityBlock = id.value();
0109 }
0110
0111 private:
0112 UInt_t m_run;
0113 UInt_t m_luminosityBlock;
0114 } m_commonLumiBranches;
0115
0116 class CommonRunBranches {
0117 public:
0118 void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
0119 void fill(const edm::RunID& id) { m_run = id.run(); }
0120
0121 private:
0122 UInt_t m_run;
0123 } m_commonRunBranches;
0124
0125 std::vector<TableOutputBranches> m_tables;
0126 std::vector<TriggerOutputBranches> m_triggers;
0127 bool m_triggers_areSorted = false;
0128 std::vector<EventStringOutputBranches> m_evstrings;
0129
0130 std::vector<SummaryTableOutputBranches> m_runTables;
0131 std::vector<SummaryTableOutputBranches> m_lumiTables;
0132 std::vector<LumiOutputBranches> m_lumiTables2;
0133 std::vector<TableOutputBranches> m_runFlatTables;
0134
0135 std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0136 };
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149 NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset)
0150 : edm::one::OutputModuleBase::OutputModuleBase(pset),
0151 edm::one::OutputModule<>(pset),
0152 m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0153 m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0154 m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0155 m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0156 m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0157 m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
0158 m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
0159 m_processHistoryRegistry() {}
0160
0161 NanoAODOutputModule::~NanoAODOutputModule() {}
0162
0163 void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) {
0164
0165 edm::Service<edm::JobReport> jr;
0166 jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0167
0168 if (m_autoFlush) {
0169 int64_t events = m_tree->GetEntriesFast();
0170 if (events == m_firstFlush) {
0171 m_tree->FlushBaskets();
0172 float maxMemory;
0173 if (m_autoFlush > 0) {
0174
0175
0176 float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
0177 maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
0178 } else if (m_tree->GetZipBytes() == 0) {
0179 maxMemory = 100 * 1024 * 1024;
0180 } else {
0181
0182 float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
0183 maxMemory = -m_autoFlush * cxnRatio;
0184 float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
0185 m_autoFlush = m_firstFlush / percentBytesDone;
0186 }
0187
0188
0189
0190
0191
0192 m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
0193 }
0194 if (m_eventsSinceFlush == m_autoFlush) {
0195 m_tree->FlushBaskets();
0196 m_eventsSinceFlush = 0;
0197 }
0198 m_eventsSinceFlush++;
0199 }
0200
0201 m_commonBranches.fill(iEvent.id());
0202
0203 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0204 for (auto& t : m_tables)
0205 t.fill(iEvent, *m_tree, extensions);
0206 }
0207 if (!m_triggers_areSorted) {
0208 std::vector<std::string> pnames;
0209 for (auto& p : iEvent.processHistory())
0210 pnames.push_back(p.processName());
0211 std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
0212 return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
0213 (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
0214 });
0215 m_triggers_areSorted = true;
0216 }
0217
0218 for (auto& t : m_triggers)
0219 t.fill(iEvent, *m_tree);
0220
0221 for (auto& t : m_evstrings)
0222 t.fill(iEvent, *m_tree);
0223 tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
0224
0225 m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0226 }
0227
0228 void NanoAODOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0229 edm::Service<edm::JobReport> jr;
0230 jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0231
0232 m_commonLumiBranches.fill(iLumi.id());
0233
0234 for (auto& t : m_lumiTables)
0235 t.fill(iLumi, *m_lumiTree);
0236
0237 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0238 for (auto& t : m_lumiTables2)
0239 t.fill(iLumi, *m_lumiTree, extensions);
0240 }
0241
0242 tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
0243
0244 m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0245 }
0246
0247 void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) {
0248 edm::Service<edm::JobReport> jr;
0249 jr->reportRunNumber(m_jrToken, iRun.id().run());
0250
0251 m_commonRunBranches.fill(iRun.id());
0252
0253 for (auto& t : m_runTables)
0254 t.fill(iRun, *m_runTree);
0255
0256 for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0257 for (auto& t : m_runFlatTables)
0258 t.fill(iRun, *m_runTree, extensions);
0259 }
0260
0261 edm::Handle<nanoaod::UniqueString> hstring;
0262 for (const auto& p : m_nanoMetadata) {
0263 iRun.getByToken(p.second, hstring);
0264 TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0265 if (tos) {
0266 if (hstring->str() != tos->GetString())
0267 throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0268 } else {
0269 auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0270 m_file->WriteTObject(ostr.release(), p.first.c_str());
0271 }
0272 }
0273
0274 tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
0275
0276 m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0277 }
0278
0279 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
0280
0281 void NanoAODOutputModule::openFile(edm::FileBlock const&) {
0282 m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0283 edm::Service<edm::JobReport> jr;
0284 cms::Digest branchHash;
0285 m_jrToken = jr->outputFileOpened(m_fileName,
0286 m_logicalFileName,
0287 std::string(),
0288 m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0289 description().moduleLabel(),
0290 edm::createGlobalIdentifier(),
0291 std::string(),
0292 branchHash.digest().toString(),
0293 std::vector<std::string>());
0294
0295 if (m_compressionAlgorithm == std::string("ZLIB")) {
0296 m_file->SetCompressionAlgorithm(ROOT::kZLIB);
0297 } else if (m_compressionAlgorithm == std::string("LZMA")) {
0298 m_file->SetCompressionAlgorithm(ROOT::kLZMA);
0299 } else if (m_compressionAlgorithm == std::string("ZSTD")) {
0300 m_file->SetCompressionAlgorithm(ROOT::kZSTD);
0301 } else if (m_compressionAlgorithm == std::string("LZ4")) {
0302 m_file->SetCompressionAlgorithm(ROOT::kLZ4);
0303 } else {
0304 throw cms::Exception("Configuration")
0305 << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0306 << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
0307 }
0308
0309 m_tables.clear();
0310 m_triggers.clear();
0311 m_triggers_areSorted = false;
0312 m_evstrings.clear();
0313 m_runTables.clear();
0314 m_lumiTables.clear();
0315 m_lumiTables2.clear();
0316 m_runFlatTables.clear();
0317 const auto& keeps = keptProducts();
0318 for (const auto& keep : keeps[edm::InEvent]) {
0319 if (keep.first->className() == "nanoaod::FlatTable")
0320 m_tables.emplace_back(keep.first, keep.second);
0321 else if (keep.first->className() == "edm::TriggerResults") {
0322 m_triggers.emplace_back(keep.first, keep.second);
0323 } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0324 keep.first->productInstanceName() == "genModel") {
0325 m_evstrings.emplace_back(keep.first, keep.second, true);
0326 } else
0327 throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0328 }
0329
0330 for (const auto& keep : keeps[edm::InLumi]) {
0331 if (keep.first->className() == "nanoaod::MergeableCounterTable")
0332 m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0333 else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0334 m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0335 else if (keep.first->className() == "nanoaod::FlatTable")
0336 m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
0337 else
0338 throw cms::Exception(
0339 "Configuration",
0340 "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
0341 }
0342
0343 for (const auto& keep : keeps[edm::InRun]) {
0344 if (keep.first->className() == "nanoaod::MergeableCounterTable")
0345 m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0346 else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0347 m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0348 else if (keep.first->className() == "nanoaod::FlatTable")
0349 m_runFlatTables.emplace_back(keep.first, keep.second);
0350 else
0351 throw cms::Exception("Configuration",
0352 "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0353 }
0354
0355
0356 m_tree = std::make_unique<TTree>("Events", "Events");
0357 m_tree->SetAutoSave(0);
0358 m_tree->SetAutoFlush(0);
0359 m_commonBranches.branch(*m_tree);
0360
0361 m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
0362 m_lumiTree->SetAutoSave(0);
0363 m_commonLumiBranches.branch(*m_lumiTree);
0364
0365 m_runTree = std::make_unique<TTree>("Runs", "Runs");
0366 m_runTree->SetAutoSave(0);
0367 m_commonRunBranches.branch(*m_runTree);
0368
0369 if (m_writeProvenance) {
0370 m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
0371 m_metaDataTree->SetAutoSave(0);
0372 m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
0373 m_parameterSetsTree->SetAutoSave(0);
0374 }
0375 }
0376 void NanoAODOutputModule::reallyCloseFile() {
0377 if (m_writeProvenance) {
0378 int basketSize = 16384;
0379 edm::fillParameterSetBranch(m_parameterSetsTree.get(), basketSize);
0380 edm::fillProcessHistoryBranch(m_metaDataTree.get(), basketSize, m_processHistoryRegistry);
0381 if (m_metaDataTree->GetNbranches() != 0) {
0382 m_metaDataTree->SetEntries(-1);
0383 }
0384 if (m_parameterSetsTree->GetNbranches() != 0) {
0385 m_parameterSetsTree->SetEntries(-1);
0386 }
0387 }
0388 m_file->Write();
0389 m_file->Close();
0390 m_file.reset();
0391 m_tree.release();
0392 m_lumiTree.release();
0393 m_runTree.release();
0394 m_metaDataTree.release();
0395 m_parameterSetsTree.release();
0396 edm::Service<edm::JobReport> jr;
0397 jr->outputFileClosed(m_jrToken);
0398 }
0399
0400 void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0401 edm::ParameterSetDescription desc;
0402
0403 desc.addUntracked<std::string>("fileName");
0404 desc.addUntracked<std::string>("logicalFileName", "");
0405
0406 desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0407 desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0408 ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0409 desc.addUntracked<bool>("saveProvenance", true)
0410 ->setComment("Save process provenance information, e.g. for edmProvDump");
0411 desc.addUntracked<bool>("fakeNameForCrab", false)
0412 ->setComment(
0413 "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
0414 "(and publish) till crab is fixed");
0415 desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
0416
0417
0418 const std::vector<std::string> keep = {"drop *",
0419 "keep nanoaodFlatTable_*Table_*_*",
0420 "keep edmTriggerResults_*_*_*",
0421 "keep String_*_genModel_*",
0422 "keep nanoaodMergeableCounterTable_*Table_*_*",
0423 "keep nanoaodUniqueString_nanoMetadata_*_*"};
0424 edm::one::OutputModule<>::fillDescription(desc, keep);
0425
0426
0427 edm::ParameterSetDescription dataSet;
0428 dataSet.setAllowAnything();
0429 desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0430 ->setComment("PSet is only used by Data Operations and not by this module.");
0431
0432 edm::ParameterSetDescription branchSet;
0433 branchSet.setAllowAnything();
0434 desc.add<edm::ParameterSetDescription>("branches", branchSet);
0435
0436 descriptions.addDefault(desc);
0437 }
0438
0439 DEFINE_FWK_MODULE(NanoAODOutputModule);