Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:16:07

0001 // -*- C++ -*-
0002 //
0003 // Package:     PhysicsTools/NanoAODOutput
0004 // Class  :     NanoAODOutputModule
0005 //
0006 // Implementation:
0007 //     [Notes on implementation]
0008 //
0009 // Original Author:  Christopher Jones
0010 //         Created:  Mon, 07 Aug 2017 14:21:41 GMT
0011 //
0012 
0013 // system include files
0014 #include <algorithm>
0015 #include <memory>
0016 
0017 #include "Compression.h"
0018 #include "TFile.h"
0019 #include "TObjString.h"
0020 #include "TROOT.h"
0021 #include "TTree.h"
0022 #include <string>
0023 
0024 // user include files
0025 #include "FWCore/Framework/interface/one/OutputModule.h"
0026 #include "FWCore/Framework/interface/RunForOutput.h"
0027 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/ServiceRegistry/interface/Service.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0032 #include "FWCore/MessageLogger/interface/JobReport.h"
0033 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0034 #include "FWCore/Utilities/interface/Digest.h"
0035 #include "IOPool/Provenance/interface/CommonProvenanceFiller.h"
0036 #include "DataFormats/Provenance/interface/BranchType.h"
0037 #include "DataFormats/Provenance/interface/BranchDescription.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 #include "DataFormats/NanoAOD/interface/FlatTable.h"
0040 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0041 #include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h"
0042 #include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h"
0043 #include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h"
0044 #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h"
0045 #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h"
0046 
0047 #include <iostream>
0048 
0049 #include "oneapi/tbb/task_arena.h"
0050 
0051 class NanoAODOutputModule : public edm::one::OutputModule<> {
0052 public:
0053   NanoAODOutputModule(edm::ParameterSet const& pset);
0054   ~NanoAODOutputModule() override;
0055 
0056   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0057 
0058 private:
0059   void write(edm::EventForOutput const& e) override;
0060   void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061   void writeRun(edm::RunForOutput const&) override;
0062   bool isFileOpen() const override;
0063   void openFile(edm::FileBlock const&) override;
0064   void reallyCloseFile() override;
0065 
0066   std::string m_fileName;
0067   std::string m_logicalFileName;
0068   int m_compressionLevel;
0069   int m_eventsSinceFlush{0};
0070   std::string m_compressionAlgorithm;
0071   bool m_writeProvenance;
0072   bool m_fakeName;  //crab workaround, remove after crab is fixed
0073   int m_autoFlush;
0074   edm::ProcessHistoryRegistry m_processHistoryRegistry;
0075   edm::JobReport::Token m_jrToken;
0076   std::unique_ptr<TFile> m_file;
0077   std::unique_ptr<TTree> m_tree, m_lumiTree, m_runTree, m_metaDataTree, m_parameterSetsTree;
0078 
0079   static constexpr int m_firstFlush{1000};
0080 
0081   class CommonEventBranches {
0082   public:
0083     void branch(TTree& tree) {
0084       tree.Branch("run", &m_run, "run/i");
0085       tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0086       tree.Branch("event", &m_event, "event/l");
0087       tree.Branch("bunchCrossing", &m_bunchCrossing, "bunchCrossing/i");
0088     }
0089     void fill(const edm::EventAuxiliary& aux) {
0090       m_run = aux.id().run();
0091       m_luminosityBlock = aux.id().luminosityBlock();
0092       m_event = aux.id().event();
0093       m_bunchCrossing = aux.bunchCrossing();
0094     }
0095 
0096   private:
0097     UInt_t m_run;
0098     UInt_t m_luminosityBlock;
0099     ULong64_t m_event;
0100     UInt_t m_bunchCrossing;
0101   } m_commonBranches;
0102 
0103   class CommonLumiBranches {
0104   public:
0105     void branch(TTree& tree) {
0106       tree.Branch("run", &m_run, "run/i");
0107       tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0108     }
0109     void fill(const edm::LuminosityBlockID& id) {
0110       m_run = id.run();
0111       m_luminosityBlock = id.value();
0112     }
0113 
0114   private:
0115     UInt_t m_run;
0116     UInt_t m_luminosityBlock;
0117   } m_commonLumiBranches;
0118 
0119   class CommonRunBranches {
0120   public:
0121     void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
0122     void fill(const edm::RunID& id) { m_run = id.run(); }
0123 
0124   private:
0125     UInt_t m_run;
0126   } m_commonRunBranches;
0127 
0128   std::vector<TableOutputBranches> m_tables;
0129   std::vector<TriggerOutputBranches> m_triggers;
0130   bool m_triggers_areSorted = false;
0131   std::vector<EventStringOutputBranches> m_evstrings;
0132 
0133   std::vector<SummaryTableOutputBranches> m_runTables;
0134   std::vector<SummaryTableOutputBranches> m_lumiTables;
0135   std::vector<LumiOutputBranches> m_lumiTables2;
0136   std::vector<TableOutputBranches> m_runFlatTables;
0137 
0138   std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0139 };
0140 
0141 //
0142 // constants, enums and typedefs
0143 //
0144 
0145 //
0146 // static data member definitions
0147 //
0148 
0149 //
0150 // constructors and destructor
0151 //
0152 NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset)
0153     : edm::one::OutputModuleBase::OutputModuleBase(pset),
0154       edm::one::OutputModule<>(pset),
0155       m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0156       m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0157       m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0158       m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0159       m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0160       m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
0161       m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
0162       m_processHistoryRegistry() {}
0163 
0164 NanoAODOutputModule::~NanoAODOutputModule() {}
0165 
0166 void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) {
0167   //Get data from 'e' and write it to the file
0168   edm::Service<edm::JobReport> jr;
0169   jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0170 
0171   if (m_autoFlush) {
0172     int64_t events = m_tree->GetEntriesFast();
0173     if (events == m_firstFlush) {
0174       m_tree->FlushBaskets();
0175       float maxMemory;
0176       if (m_autoFlush > 0) {
0177         // Estimate the memory we'll be using at the first full flush by
0178         // linearly scaling the number of events.
0179         float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
0180         maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
0181       } else if (m_tree->GetZipBytes() == 0) {
0182         maxMemory = 100 * 1024 * 1024;  // Degenerate case of no information in the tree; arbitrary value
0183       } else {
0184         // Estimate the memory we'll be using by scaling the current compression ratio.
0185         float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
0186         maxMemory = -m_autoFlush * cxnRatio;
0187         float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
0188         m_autoFlush = m_firstFlush / percentBytesDone;
0189       }
0190       //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
0191       //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
0192       //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
0193       //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
0194       //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
0195       m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
0196     }
0197     if (m_eventsSinceFlush == m_autoFlush) {
0198       m_tree->FlushBaskets();
0199       m_eventsSinceFlush = 0;
0200     }
0201     m_eventsSinceFlush++;
0202   }
0203 
0204   m_commonBranches.fill(iEvent.eventAuxiliary());
0205   // fill all tables, starting from main tables and then doing extension tables
0206   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0207     for (auto& t : m_tables)
0208       t.fill(iEvent, *m_tree, extensions);
0209   }
0210   if (!m_triggers_areSorted) {  // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones
0211     std::vector<std::string> pnames;
0212     for (auto& p : iEvent.processHistory())
0213       pnames.push_back(p.processName());
0214     std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
0215       return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
0216               (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
0217     });
0218     m_triggers_areSorted = true;
0219   }
0220   // fill triggers
0221   for (auto& t : m_triggers)
0222     t.fill(iEvent, *m_tree);
0223   // fill event branches
0224   for (auto& t : m_evstrings)
0225     t.fill(iEvent, *m_tree);
0226   tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
0227 
0228   m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0229 }
0230 
0231 void NanoAODOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0232   edm::Service<edm::JobReport> jr;
0233   jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0234 
0235   m_commonLumiBranches.fill(iLumi.id());
0236 
0237   for (auto& t : m_lumiTables)
0238     t.fill(iLumi, *m_lumiTree);
0239 
0240   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0241     for (auto& t : m_lumiTables2)
0242       t.fill(iLumi, *m_lumiTree, extensions);
0243   }
0244 
0245   tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
0246 
0247   m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0248 }
0249 
0250 void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) {
0251   edm::Service<edm::JobReport> jr;
0252   jr->reportRunNumber(m_jrToken, iRun.id().run());
0253 
0254   m_commonRunBranches.fill(iRun.id());
0255 
0256   for (auto& t : m_runTables)
0257     t.fill(iRun, *m_runTree);
0258 
0259   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0260     for (auto& t : m_runFlatTables)
0261       t.fill(iRun, *m_runTree, extensions);
0262   }
0263 
0264   edm::Handle<nanoaod::UniqueString> hstring;
0265   for (const auto& p : m_nanoMetadata) {
0266     iRun.getByToken(p.second, hstring);
0267     TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0268     if (tos) {
0269       if (hstring->str() != tos->GetString())
0270         throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0271     } else {
0272       auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0273       m_file->WriteTObject(ostr.release(), p.first.c_str());
0274     }
0275   }
0276 
0277   tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
0278 
0279   m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0280 }
0281 
0282 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
0283 
0284 void NanoAODOutputModule::openFile(edm::FileBlock const&) {
0285   m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0286   edm::Service<edm::JobReport> jr;
0287   cms::Digest branchHash;
0288   m_jrToken = jr->outputFileOpened(m_fileName,
0289                                    m_logicalFileName,
0290                                    std::string(),
0291                                    m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0292                                    description().moduleLabel(),
0293                                    edm::createGlobalIdentifier(),
0294                                    std::string(),
0295                                    branchHash.digest().toString(),
0296                                    std::vector<std::string>());
0297 
0298   if (m_compressionAlgorithm == std::string("ZLIB")) {
0299     m_file->SetCompressionAlgorithm(ROOT::kZLIB);
0300   } else if (m_compressionAlgorithm == std::string("LZMA")) {
0301     m_file->SetCompressionAlgorithm(ROOT::kLZMA);
0302   } else if (m_compressionAlgorithm == std::string("ZSTD")) {
0303     m_file->SetCompressionAlgorithm(ROOT::kZSTD);
0304   } else if (m_compressionAlgorithm == std::string("LZ4")) {
0305     m_file->SetCompressionAlgorithm(ROOT::kLZ4);
0306   } else {
0307     throw cms::Exception("Configuration")
0308         << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0309         << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
0310   }
0311   /* Setup file structure here */
0312   m_tables.clear();
0313   m_triggers.clear();
0314   m_triggers_areSorted = false;
0315   m_evstrings.clear();
0316   m_runTables.clear();
0317   m_lumiTables.clear();
0318   m_lumiTables2.clear();
0319   m_runFlatTables.clear();
0320   const auto& keeps = keptProducts();
0321   for (const auto& keep : keeps[edm::InEvent]) {
0322     if (keep.first->className() == "nanoaod::FlatTable")
0323       m_tables.emplace_back(keep.first, keep.second);
0324     else if (keep.first->className() == "edm::TriggerResults") {
0325       m_triggers.emplace_back(keep.first, keep.second);
0326     } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0327                keep.first->productInstanceName() == "genModel") {  // friendlyClassName == "String"
0328       m_evstrings.emplace_back(keep.first, keep.second, true);     // update only at lumiBlock transitions
0329     } else
0330       throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0331   }
0332 
0333   for (const auto& keep : keeps[edm::InLumi]) {
0334     if (keep.first->className() == "nanoaod::MergeableCounterTable")
0335       m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0336     else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0337       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0338     else if (keep.first->className() == "nanoaod::FlatTable")
0339       m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
0340     else
0341       throw cms::Exception(
0342           "Configuration",
0343           "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
0344   }
0345 
0346   for (const auto& keep : keeps[edm::InRun]) {
0347     if (keep.first->className() == "nanoaod::MergeableCounterTable")
0348       m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0349     else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0350       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0351     else if (keep.first->className() == "nanoaod::FlatTable")
0352       m_runFlatTables.emplace_back(keep.first, keep.second);
0353     else
0354       throw cms::Exception("Configuration",
0355                            "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0356   }
0357 
0358   // create the trees
0359   m_tree = std::make_unique<TTree>("Events", "Events");
0360   m_tree->SetAutoSave(0);
0361   m_tree->SetAutoFlush(0);
0362   m_commonBranches.branch(*m_tree);
0363 
0364   m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
0365   m_lumiTree->SetAutoSave(0);
0366   m_commonLumiBranches.branch(*m_lumiTree);
0367 
0368   m_runTree = std::make_unique<TTree>("Runs", "Runs");
0369   m_runTree->SetAutoSave(0);
0370   m_commonRunBranches.branch(*m_runTree);
0371 
0372   if (m_writeProvenance) {
0373     m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
0374     m_metaDataTree->SetAutoSave(0);
0375     m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
0376     m_parameterSetsTree->SetAutoSave(0);
0377   }
0378 }
0379 void NanoAODOutputModule::reallyCloseFile() {
0380   if (m_writeProvenance) {
0381     int basketSize = 16384;  // fixme configurable?
0382     edm::fillParameterSetBranch(m_parameterSetsTree.get(), basketSize);
0383     edm::fillProcessHistoryBranch(m_metaDataTree.get(), basketSize, m_processHistoryRegistry);
0384     if (m_metaDataTree->GetNbranches() != 0) {
0385       m_metaDataTree->SetEntries(-1);
0386     }
0387     if (m_parameterSetsTree->GetNbranches() != 0) {
0388       m_parameterSetsTree->SetEntries(-1);
0389     }
0390   }
0391   m_file->Write();
0392   m_file->Close();
0393   m_file.reset();
0394   m_tree.release();               // apparently root has ownership
0395   m_lumiTree.release();           //
0396   m_runTree.release();            //
0397   m_metaDataTree.release();       //
0398   m_parameterSetsTree.release();  //
0399   edm::Service<edm::JobReport> jr;
0400   jr->outputFileClosed(m_jrToken);
0401 }
0402 
0403 void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0404   edm::ParameterSetDescription desc;
0405 
0406   desc.addUntracked<std::string>("fileName");
0407   desc.addUntracked<std::string>("logicalFileName", "");
0408 
0409   desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0410   desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0411       ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0412   desc.addUntracked<bool>("saveProvenance", true)
0413       ->setComment("Save process provenance information, e.g. for edmProvDump");
0414   desc.addUntracked<bool>("fakeNameForCrab", false)
0415       ->setComment(
0416           "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
0417           "(and publish) till crab is fixed");
0418   desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
0419 
0420   //replace with whatever you want to get from the EDM by default
0421   const std::vector<std::string> keep = {"drop *",
0422                                          "keep nanoaodFlatTable_*Table_*_*",
0423                                          "keep edmTriggerResults_*_*_*",
0424                                          "keep String_*_genModel_*",
0425                                          "keep nanoaodMergeableCounterTable_*Table_*_*",
0426                                          "keep nanoaodUniqueString_nanoMetadata_*_*"};
0427   edm::one::OutputModule<>::fillDescription(desc, keep);
0428 
0429   //Used by Workflow management for their own meta data
0430   edm::ParameterSetDescription dataSet;
0431   dataSet.setAllowAnything();
0432   desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0433       ->setComment("PSet is only used by Data Operations and not by this module.");
0434 
0435   edm::ParameterSetDescription branchSet;
0436   branchSet.setAllowAnything();
0437   desc.add<edm::ParameterSetDescription>("branches", branchSet);
0438 
0439   descriptions.addDefault(desc);
0440 }
0441 
0442 DEFINE_FWK_MODULE(NanoAODOutputModule);