Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-11 23:28:06

0001 // -*- C++ -*-
0002 //
0003 // Package:     PhysicsTools/NanoAODOutput
0004 // Class  :     NanoAODOutputModule
0005 //
0006 // Implementation:
0007 //     [Notes on implementation]
0008 //
0009 // Original Author:  Christopher Jones
0010 //         Created:  Mon, 07 Aug 2017 14:21:41 GMT
0011 //
0012 
0013 // system include files
0014 #include <algorithm>
0015 #include <memory>
0016 
0017 #include "Compression.h"
0018 #include "TFile.h"
0019 #include "TObjString.h"
0020 #include "TROOT.h"
0021 #include "TTree.h"
0022 #include <string>
0023 
0024 // user include files
0025 #include "FWCore/Framework/interface/one/OutputModule.h"
0026 #include "FWCore/Framework/interface/RunForOutput.h"
0027 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/ServiceRegistry/interface/Service.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0032 #include "FWCore/MessageLogger/interface/JobReport.h"
0033 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0034 #include "FWCore/Utilities/interface/Digest.h"
0035 #include "IOPool/Provenance/interface/CommonProvenanceFiller.h"
0036 #include "DataFormats/Provenance/interface/BranchType.h"
0037 #include "DataFormats/Provenance/interface/BranchDescription.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 #include "DataFormats/NanoAOD/interface/FlatTable.h"
0040 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0041 #include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h"
0042 #include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h"
0043 #include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h"
0044 #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h"
0045 #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h"
0046 
0047 #include <iostream>
0048 
0049 #include "oneapi/tbb/task_arena.h"
0050 
0051 class NanoAODOutputModule : public edm::one::OutputModule<> {
0052 public:
0053   NanoAODOutputModule(edm::ParameterSet const& pset);
0054   ~NanoAODOutputModule() override;
0055 
0056   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0057 
0058 private:
0059   void write(edm::EventForOutput const& e) override;
0060   void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061   void writeRun(edm::RunForOutput const&) override;
0062   bool isFileOpen() const override;
0063   void openFile(edm::FileBlock const&) override;
0064   void reallyCloseFile() override;
0065 
0066   std::string m_fileName;
0067   std::string m_logicalFileName;
0068   int m_compressionLevel;
0069   int m_eventsSinceFlush{0};
0070   std::string m_compressionAlgorithm;
0071   bool m_writeProvenance;
0072   bool m_fakeName;  //crab workaround, remove after crab is fixed
0073   int m_autoFlush;
0074   edm::ProcessHistoryRegistry m_processHistoryRegistry;
0075   edm::JobReport::Token m_jrToken;
0076   std::unique_ptr<TFile> m_file;
0077   std::unique_ptr<TTree> m_tree, m_lumiTree, m_runTree, m_metaDataTree, m_parameterSetsTree;
0078 
0079   static constexpr int m_firstFlush{1000};
0080 
0081   class CommonEventBranches {
0082   public:
0083     void branch(TTree& tree) {
0084       tree.Branch("run", &m_run, "run/i");
0085       tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0086       tree.Branch("event", &m_event, "event/l");
0087       tree.Branch("bunchCrossing", &m_bunchCrossing, "bunchCrossing/i");
0088       tree.Branch("orbitNumber", &m_orbitNumber, "orbitNumber/i");
0089     }
0090     void fill(const edm::EventAuxiliary& aux) {
0091       m_run = aux.id().run();
0092       m_luminosityBlock = aux.id().luminosityBlock();
0093       m_event = aux.id().event();
0094       m_bunchCrossing = aux.bunchCrossing();
0095       m_orbitNumber = aux.orbitNumber();
0096     }
0097 
0098   private:
0099     UInt_t m_run;
0100     UInt_t m_luminosityBlock;
0101     ULong64_t m_event;
0102     UInt_t m_bunchCrossing;
0103     UInt_t m_orbitNumber;
0104   } m_commonBranches;
0105 
0106   class CommonLumiBranches {
0107   public:
0108     void branch(TTree& tree) {
0109       tree.Branch("run", &m_run, "run/i");
0110       tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0111     }
0112     void fill(const edm::LuminosityBlockID& id) {
0113       m_run = id.run();
0114       m_luminosityBlock = id.value();
0115     }
0116 
0117   private:
0118     UInt_t m_run;
0119     UInt_t m_luminosityBlock;
0120   } m_commonLumiBranches;
0121 
0122   class CommonRunBranches {
0123   public:
0124     void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
0125     void fill(const edm::RunID& id) { m_run = id.run(); }
0126 
0127   private:
0128     UInt_t m_run;
0129   } m_commonRunBranches;
0130 
0131   std::vector<TableOutputBranches> m_tables;
0132   std::vector<TriggerOutputBranches> m_triggers;
0133   bool m_triggers_areSorted = false;
0134   std::vector<EventStringOutputBranches> m_evstrings;
0135 
0136   std::vector<SummaryTableOutputBranches> m_runTables;
0137   std::vector<SummaryTableOutputBranches> m_lumiTables;
0138   std::vector<LumiOutputBranches> m_lumiTables2;
0139   std::vector<TableOutputBranches> m_runFlatTables;
0140 
0141   std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0142 };
0143 
0144 //
0145 // constants, enums and typedefs
0146 //
0147 
0148 //
0149 // static data member definitions
0150 //
0151 
0152 //
0153 // constructors and destructor
0154 //
0155 NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset)
0156     : edm::one::OutputModuleBase::OutputModuleBase(pset),
0157       edm::one::OutputModule<>(pset),
0158       m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0159       m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0160       m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0161       m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0162       m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0163       m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
0164       m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
0165       m_processHistoryRegistry() {}
0166 
0167 NanoAODOutputModule::~NanoAODOutputModule() {}
0168 
0169 void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) {
0170   //Get data from 'e' and write it to the file
0171   edm::Service<edm::JobReport> jr;
0172   jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0173 
0174   if (m_autoFlush) {
0175     int64_t events = m_tree->GetEntriesFast();
0176     if (events == m_firstFlush) {
0177       m_tree->FlushBaskets();
0178       float maxMemory;
0179       if (m_autoFlush > 0) {
0180         // Estimate the memory we'll be using at the first full flush by
0181         // linearly scaling the number of events.
0182         float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
0183         maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
0184       } else if (m_tree->GetZipBytes() == 0) {
0185         maxMemory = 100 * 1024 * 1024;  // Degenerate case of no information in the tree; arbitrary value
0186       } else {
0187         // Estimate the memory we'll be using by scaling the current compression ratio.
0188         float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
0189         maxMemory = -m_autoFlush * cxnRatio;
0190         float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
0191         m_autoFlush = m_firstFlush / percentBytesDone;
0192       }
0193       //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
0194       //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
0195       //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
0196       //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
0197       //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
0198       m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
0199     }
0200     if (m_eventsSinceFlush == m_autoFlush) {
0201       m_tree->FlushBaskets();
0202       m_eventsSinceFlush = 0;
0203     }
0204     m_eventsSinceFlush++;
0205   }
0206 
0207   m_commonBranches.fill(iEvent.eventAuxiliary());
0208   // fill all tables, starting from main tables and then doing extension tables
0209   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0210     for (auto& t : m_tables)
0211       t.fill(iEvent, *m_tree, extensions);
0212   }
0213   if (!m_triggers_areSorted) {  // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones
0214     std::vector<std::string> pnames;
0215     for (auto& p : iEvent.processHistory())
0216       pnames.push_back(p.processName());
0217     std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
0218       return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
0219               (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
0220     });
0221     m_triggers_areSorted = true;
0222   }
0223   // fill triggers
0224   for (auto& t : m_triggers)
0225     t.fill(iEvent, *m_tree);
0226   // fill event branches
0227   for (auto& t : m_evstrings)
0228     t.fill(iEvent, *m_tree);
0229   tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
0230 
0231   m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0232 }
0233 
0234 void NanoAODOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0235   edm::Service<edm::JobReport> jr;
0236   jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0237 
0238   m_commonLumiBranches.fill(iLumi.id());
0239 
0240   for (auto& t : m_lumiTables)
0241     t.fill(iLumi, *m_lumiTree);
0242 
0243   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0244     for (auto& t : m_lumiTables2)
0245       t.fill(iLumi, *m_lumiTree, extensions);
0246   }
0247 
0248   tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
0249 
0250   m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0251 }
0252 
0253 void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) {
0254   edm::Service<edm::JobReport> jr;
0255   jr->reportRunNumber(m_jrToken, iRun.id().run());
0256 
0257   m_commonRunBranches.fill(iRun.id());
0258 
0259   for (auto& t : m_runTables)
0260     t.fill(iRun, *m_runTree);
0261 
0262   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0263     for (auto& t : m_runFlatTables)
0264       t.fill(iRun, *m_runTree, extensions);
0265   }
0266 
0267   edm::Handle<nanoaod::UniqueString> hstring;
0268   for (const auto& p : m_nanoMetadata) {
0269     iRun.getByToken(p.second, hstring);
0270     TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0271     if (tos) {
0272       if (hstring->str() != tos->GetString())
0273         throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0274     } else {
0275       auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0276       m_file->WriteTObject(ostr.release(), p.first.c_str());
0277     }
0278   }
0279 
0280   tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
0281 
0282   m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0283 }
0284 
0285 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
0286 
0287 void NanoAODOutputModule::openFile(edm::FileBlock const&) {
0288   m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0289   edm::Service<edm::JobReport> jr;
0290   cms::Digest branchHash;
0291   m_jrToken = jr->outputFileOpened(m_fileName,
0292                                    m_logicalFileName,
0293                                    std::string(),
0294                                    m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0295                                    description().moduleLabel(),
0296                                    edm::createGlobalIdentifier(),
0297                                    std::string(),
0298                                    branchHash.digest().toString(),
0299                                    std::vector<std::string>());
0300 
0301   if (m_compressionAlgorithm == std::string("ZLIB")) {
0302     m_file->SetCompressionAlgorithm(ROOT::kZLIB);
0303   } else if (m_compressionAlgorithm == std::string("LZMA")) {
0304     m_file->SetCompressionAlgorithm(ROOT::kLZMA);
0305   } else if (m_compressionAlgorithm == std::string("ZSTD")) {
0306     m_file->SetCompressionAlgorithm(ROOT::kZSTD);
0307   } else if (m_compressionAlgorithm == std::string("LZ4")) {
0308     m_file->SetCompressionAlgorithm(ROOT::kLZ4);
0309   } else {
0310     throw cms::Exception("Configuration")
0311         << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0312         << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
0313   }
0314   /* Setup file structure here */
0315   m_tables.clear();
0316   m_triggers.clear();
0317   m_triggers_areSorted = false;
0318   m_evstrings.clear();
0319   m_runTables.clear();
0320   m_lumiTables.clear();
0321   m_lumiTables2.clear();
0322   m_runFlatTables.clear();
0323   const auto& keeps = keptProducts();
0324   for (const auto& keep : keeps[edm::InEvent]) {
0325     if (keep.first->className() == "nanoaod::FlatTable")
0326       m_tables.emplace_back(keep.first, keep.second);
0327     else if (keep.first->className() == "edm::TriggerResults") {
0328       m_triggers.emplace_back(keep.first, keep.second);
0329     } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0330                keep.first->productInstanceName() == "genModel") {  // friendlyClassName == "String"
0331       m_evstrings.emplace_back(keep.first, keep.second, true);     // update only at lumiBlock transitions
0332     } else
0333       throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0334   }
0335 
0336   for (const auto& keep : keeps[edm::InLumi]) {
0337     if (keep.first->className() == "nanoaod::MergeableCounterTable")
0338       m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0339     else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0340       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0341     else if (keep.first->className() == "nanoaod::FlatTable")
0342       m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
0343     else
0344       throw cms::Exception(
0345           "Configuration",
0346           "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
0347   }
0348 
0349   for (const auto& keep : keeps[edm::InRun]) {
0350     if (keep.first->className() == "nanoaod::MergeableCounterTable")
0351       m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0352     else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0353       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0354     else if (keep.first->className() == "nanoaod::FlatTable")
0355       m_runFlatTables.emplace_back(keep.first, keep.second);
0356     else
0357       throw cms::Exception("Configuration",
0358                            "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0359   }
0360 
0361   // create the trees
0362   m_tree = std::make_unique<TTree>("Events", "Events");
0363   m_tree->SetAutoSave(0);
0364   m_tree->SetAutoFlush(0);
0365   m_commonBranches.branch(*m_tree);
0366 
0367   m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
0368   m_lumiTree->SetAutoSave(0);
0369   m_commonLumiBranches.branch(*m_lumiTree);
0370 
0371   m_runTree = std::make_unique<TTree>("Runs", "Runs");
0372   m_runTree->SetAutoSave(0);
0373   m_commonRunBranches.branch(*m_runTree);
0374 
0375   if (m_writeProvenance) {
0376     m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
0377     m_metaDataTree->SetAutoSave(0);
0378     m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
0379     m_parameterSetsTree->SetAutoSave(0);
0380   }
0381 }
0382 void NanoAODOutputModule::reallyCloseFile() {
0383   if (m_writeProvenance) {
0384     int basketSize = 16384;  // fixme configurable?
0385     edm::fillParameterSetBranch(m_parameterSetsTree.get(), basketSize);
0386     edm::fillProcessHistoryBranch(m_metaDataTree.get(), basketSize, m_processHistoryRegistry);
0387     if (m_metaDataTree->GetNbranches() != 0) {
0388       m_metaDataTree->SetEntries(-1);
0389     }
0390     if (m_parameterSetsTree->GetNbranches() != 0) {
0391       m_parameterSetsTree->SetEntries(-1);
0392     }
0393   }
0394   m_file->Write();
0395   m_file->Close();
0396   m_file.reset();
0397   m_tree.release();               // apparently root has ownership
0398   m_lumiTree.release();           //
0399   m_runTree.release();            //
0400   m_metaDataTree.release();       //
0401   m_parameterSetsTree.release();  //
0402   edm::Service<edm::JobReport> jr;
0403   jr->outputFileClosed(m_jrToken);
0404 }
0405 
0406 void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0407   edm::ParameterSetDescription desc;
0408 
0409   desc.addUntracked<std::string>("fileName");
0410   desc.addUntracked<std::string>("logicalFileName", "");
0411 
0412   desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0413   desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0414       ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0415   desc.addUntracked<bool>("saveProvenance", true)
0416       ->setComment("Save process provenance information, e.g. for edmProvDump");
0417   desc.addUntracked<bool>("fakeNameForCrab", false)
0418       ->setComment(
0419           "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
0420           "(and publish) till crab is fixed");
0421   desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
0422 
0423   //replace with whatever you want to get from the EDM by default
0424   const std::vector<std::string> keep = {"drop *",
0425                                          "keep nanoaodFlatTable_*Table_*_*",
0426                                          "keep edmTriggerResults_*_*_*",
0427                                          "keep String_*_genModel_*",
0428                                          "keep nanoaodMergeableCounterTable_*Table_*_*",
0429                                          "keep nanoaodUniqueString_nanoMetadata_*_*"};
0430   edm::one::OutputModule<>::fillDescription(desc, keep);
0431 
0432   //Used by Workflow management for their own meta data
0433   edm::ParameterSetDescription dataSet;
0434   dataSet.setAllowAnything();
0435   desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0436       ->setComment("PSet is only used by Data Operations and not by this module.");
0437 
0438   edm::ParameterSetDescription branchSet;
0439   branchSet.setAllowAnything();
0440   desc.add<edm::ParameterSetDescription>("branches", branchSet);
0441 
0442   descriptions.addDefault(desc);
0443 }
0444 
0445 DEFINE_FWK_MODULE(NanoAODOutputModule);