Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-06-13 09:27:10

0001 // -*- C++ -*-
0002 //
0003 // Package:     PhysicsTools/NanoAODOutput
0004 // Class  :     NanoAODOutputModule
0005 //
0006 // Implementation:
0007 //     [Notes on implementation]
0008 //
0009 // Original Author:  Christopher Jones
0010 //         Created:  Mon, 07 Aug 2017 14:21:41 GMT
0011 //
0012 
0013 // system include files
0014 #include <algorithm>
0015 #include <memory>
0016 
0017 #include "Compression.h"
0018 #include "TFile.h"
0019 #include "TObjString.h"
0020 #include "TROOT.h"
0021 #include "TTree.h"
0022 #include <string>
0023 
0024 // user include files
0025 #include "FWCore/Framework/interface/one/OutputModule.h"
0026 #include "FWCore/Framework/interface/RunForOutput.h"
0027 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/ServiceRegistry/interface/Service.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0032 #include "FWCore/MessageLogger/interface/JobReport.h"
0033 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0034 #include "FWCore/Utilities/interface/Digest.h"
0035 #include "IOPool/Provenance/interface/CommonProvenanceFiller.h"
0036 #include "DataFormats/Provenance/interface/BranchType.h"
0037 #include "DataFormats/Provenance/interface/BranchDescription.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 #include "DataFormats/NanoAOD/interface/FlatTable.h"
0040 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0041 #include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h"
0042 #include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h"
0043 #include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h"
0044 #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h"
0045 #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h"
0046 
0047 #include <iostream>
0048 
0049 #include "oneapi/tbb/task_arena.h"
0050 
0051 class NanoAODOutputModule : public edm::one::OutputModule<> {
0052 public:
0053   NanoAODOutputModule(edm::ParameterSet const& pset);
0054   ~NanoAODOutputModule() override;
0055 
0056   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0057 
0058 private:
0059   void write(edm::EventForOutput const& e) override;
0060   void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061   void writeRun(edm::RunForOutput const&) override;
0062   bool isFileOpen() const override;
0063   void openFile(edm::FileBlock const&) override;
0064   void reallyCloseFile() override;
0065 
0066   std::string m_fileName;
0067   std::string m_logicalFileName;
0068   int m_compressionLevel;
0069   int m_eventsSinceFlush{0};
0070   std::string m_compressionAlgorithm;
0071   bool m_writeProvenance;
0072   bool m_fakeName;  //crab workaround, remove after crab is fixed
0073   int m_autoFlush;
0074   edm::ProcessHistoryRegistry m_processHistoryRegistry;
0075   edm::JobReport::Token m_jrToken;
0076   std::unique_ptr<TFile> m_file;
0077   std::unique_ptr<TTree> m_tree, m_lumiTree, m_runTree, m_metaDataTree, m_parameterSetsTree;
0078 
0079   static constexpr int m_firstFlush{1000};
0080 
0081   class CommonEventBranches {
0082   public:
0083     void branch(TTree& tree) {
0084       tree.Branch("run", &m_run, "run/i");
0085       tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0086       tree.Branch("event", &m_event, "event/l");
0087     }
0088     void fill(const edm::EventID& id) {
0089       m_run = id.run();
0090       m_luminosityBlock = id.luminosityBlock();
0091       m_event = id.event();
0092     }
0093 
0094   private:
0095     UInt_t m_run;
0096     UInt_t m_luminosityBlock;
0097     ULong64_t m_event;
0098   } m_commonBranches;
0099 
0100   class CommonLumiBranches {
0101   public:
0102     void branch(TTree& tree) {
0103       tree.Branch("run", &m_run, "run/i");
0104       tree.Branch("luminosityBlock", &m_luminosityBlock, "luminosityBlock/i");
0105     }
0106     void fill(const edm::LuminosityBlockID& id) {
0107       m_run = id.run();
0108       m_luminosityBlock = id.value();
0109     }
0110 
0111   private:
0112     UInt_t m_run;
0113     UInt_t m_luminosityBlock;
0114   } m_commonLumiBranches;
0115 
0116   class CommonRunBranches {
0117   public:
0118     void branch(TTree& tree) { tree.Branch("run", &m_run, "run/i"); }
0119     void fill(const edm::RunID& id) { m_run = id.run(); }
0120 
0121   private:
0122     UInt_t m_run;
0123   } m_commonRunBranches;
0124 
0125   std::vector<TableOutputBranches> m_tables;
0126   std::vector<TriggerOutputBranches> m_triggers;
0127   bool m_triggers_areSorted = false;
0128   std::vector<EventStringOutputBranches> m_evstrings;
0129 
0130   std::vector<SummaryTableOutputBranches> m_runTables;
0131   std::vector<SummaryTableOutputBranches> m_lumiTables;
0132   std::vector<LumiOutputBranches> m_lumiTables2;
0133   std::vector<TableOutputBranches> m_runFlatTables;
0134 
0135   std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0136 };
0137 
0138 //
0139 // constants, enums and typedefs
0140 //
0141 
0142 //
0143 // static data member definitions
0144 //
0145 
0146 //
0147 // constructors and destructor
0148 //
0149 NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset)
0150     : edm::one::OutputModuleBase::OutputModuleBase(pset),
0151       edm::one::OutputModule<>(pset),
0152       m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0153       m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0154       m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0155       m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0156       m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0157       m_fakeName(pset.getUntrackedParameter<bool>("fakeNameForCrab", false)),
0158       m_autoFlush(pset.getUntrackedParameter<int>("autoFlush", -10000000)),
0159       m_processHistoryRegistry() {}
0160 
0161 NanoAODOutputModule::~NanoAODOutputModule() {}
0162 
0163 void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) {
0164   //Get data from 'e' and write it to the file
0165   edm::Service<edm::JobReport> jr;
0166   jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0167 
0168   if (m_autoFlush) {
0169     int64_t events = m_tree->GetEntriesFast();
0170     if (events == m_firstFlush) {
0171       m_tree->FlushBaskets();
0172       float maxMemory;
0173       if (m_autoFlush > 0) {
0174         // Estimate the memory we'll be using at the first full flush by
0175         // linearly scaling the number of events.
0176         float percentClusterDone = m_firstFlush / static_cast<float>(m_autoFlush);
0177         maxMemory = static_cast<float>(m_tree->GetTotBytes()) / percentClusterDone;
0178       } else if (m_tree->GetZipBytes() == 0) {
0179         maxMemory = 100 * 1024 * 1024;  // Degenerate case of no information in the tree; arbitrary value
0180       } else {
0181         // Estimate the memory we'll be using by scaling the current compression ratio.
0182         float cxnRatio = m_tree->GetTotBytes() / static_cast<float>(m_tree->GetZipBytes());
0183         maxMemory = -m_autoFlush * cxnRatio;
0184         float percentBytesDone = -m_tree->GetZipBytes() / static_cast<float>(m_autoFlush);
0185         m_autoFlush = m_firstFlush / percentBytesDone;
0186       }
0187       //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl;
0188       //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl;
0189       //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl;
0190       //std::cout << "OptimizeBaskets: maxMemory " << static_cast<uint32_t>(maxMemory) << std::endl;
0191       //m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "d");
0192       m_tree->OptimizeBaskets(static_cast<uint32_t>(maxMemory), 1, "");
0193     }
0194     if (m_eventsSinceFlush == m_autoFlush) {
0195       m_tree->FlushBaskets();
0196       m_eventsSinceFlush = 0;
0197     }
0198     m_eventsSinceFlush++;
0199   }
0200 
0201   m_commonBranches.fill(iEvent.id());
0202   // fill all tables, starting from main tables and then doing extension tables
0203   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0204     for (auto& t : m_tables)
0205       t.fill(iEvent, *m_tree, extensions);
0206   }
0207   if (!m_triggers_areSorted) {  // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones
0208     std::vector<std::string> pnames;
0209     for (auto& p : iEvent.processHistory())
0210       pnames.push_back(p.processName());
0211     std::sort(m_triggers.begin(), m_triggers.end(), [pnames](TriggerOutputBranches& a, TriggerOutputBranches& b) {
0212       return ((std::find(pnames.begin(), pnames.end(), a.processName()) - pnames.begin()) >
0213               (std::find(pnames.begin(), pnames.end(), b.processName()) - pnames.begin()));
0214     });
0215     m_triggers_areSorted = true;
0216   }
0217   // fill triggers
0218   for (auto& t : m_triggers)
0219     t.fill(iEvent, *m_tree);
0220   // fill event branches
0221   for (auto& t : m_evstrings)
0222     t.fill(iEvent, *m_tree);
0223   tbb::this_task_arena::isolate([&] { m_tree->Fill(); });
0224 
0225   m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0226 }
0227 
0228 void NanoAODOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0229   edm::Service<edm::JobReport> jr;
0230   jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0231 
0232   m_commonLumiBranches.fill(iLumi.id());
0233 
0234   for (auto& t : m_lumiTables)
0235     t.fill(iLumi, *m_lumiTree);
0236 
0237   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0238     for (auto& t : m_lumiTables2)
0239       t.fill(iLumi, *m_lumiTree, extensions);
0240   }
0241 
0242   tbb::this_task_arena::isolate([&] { m_lumiTree->Fill(); });
0243 
0244   m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0245 }
0246 
0247 void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) {
0248   edm::Service<edm::JobReport> jr;
0249   jr->reportRunNumber(m_jrToken, iRun.id().run());
0250 
0251   m_commonRunBranches.fill(iRun.id());
0252 
0253   for (auto& t : m_runTables)
0254     t.fill(iRun, *m_runTree);
0255 
0256   for (unsigned int extensions = 0; extensions <= 1; ++extensions) {
0257     for (auto& t : m_runFlatTables)
0258       t.fill(iRun, *m_runTree, extensions);
0259   }
0260 
0261   edm::Handle<nanoaod::UniqueString> hstring;
0262   for (const auto& p : m_nanoMetadata) {
0263     iRun.getByToken(p.second, hstring);
0264     TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0265     if (tos) {
0266       if (hstring->str() != tos->GetString())
0267         throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0268     } else {
0269       auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0270       m_file->WriteTObject(ostr.release(), p.first.c_str());
0271     }
0272   }
0273 
0274   tbb::this_task_arena::isolate([&] { m_runTree->Fill(); });
0275 
0276   m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0277 }
0278 
0279 bool NanoAODOutputModule::isFileOpen() const { return nullptr != m_file.get(); }
0280 
0281 void NanoAODOutputModule::openFile(edm::FileBlock const&) {
0282   m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0283   edm::Service<edm::JobReport> jr;
0284   cms::Digest branchHash;
0285   m_jrToken = jr->outputFileOpened(m_fileName,
0286                                    m_logicalFileName,
0287                                    std::string(),
0288                                    m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0289                                    description().moduleLabel(),
0290                                    edm::createGlobalIdentifier(),
0291                                    std::string(),
0292                                    branchHash.digest().toString(),
0293                                    std::vector<std::string>());
0294 
0295   if (m_compressionAlgorithm == std::string("ZLIB")) {
0296     m_file->SetCompressionAlgorithm(ROOT::kZLIB);
0297   } else if (m_compressionAlgorithm == std::string("LZMA")) {
0298     m_file->SetCompressionAlgorithm(ROOT::kLZMA);
0299   } else if (m_compressionAlgorithm == std::string("ZSTD")) {
0300     m_file->SetCompressionAlgorithm(ROOT::kZSTD);
0301   } else if (m_compressionAlgorithm == std::string("LZ4")) {
0302     m_file->SetCompressionAlgorithm(ROOT::kLZ4);
0303   } else {
0304     throw cms::Exception("Configuration")
0305         << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0306         << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n";
0307   }
0308   /* Setup file structure here */
0309   m_tables.clear();
0310   m_triggers.clear();
0311   m_triggers_areSorted = false;
0312   m_evstrings.clear();
0313   m_runTables.clear();
0314   m_lumiTables.clear();
0315   m_lumiTables2.clear();
0316   m_runFlatTables.clear();
0317   const auto& keeps = keptProducts();
0318   for (const auto& keep : keeps[edm::InEvent]) {
0319     if (keep.first->className() == "nanoaod::FlatTable")
0320       m_tables.emplace_back(keep.first, keep.second);
0321     else if (keep.first->className() == "edm::TriggerResults") {
0322       m_triggers.emplace_back(keep.first, keep.second);
0323     } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0324                keep.first->productInstanceName() == "genModel") {  // friendlyClassName == "String"
0325       m_evstrings.emplace_back(keep.first, keep.second, true);     // update only at lumiBlock transitions
0326     } else
0327       throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0328   }
0329 
0330   for (const auto& keep : keeps[edm::InLumi]) {
0331     if (keep.first->className() == "nanoaod::MergeableCounterTable")
0332       m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0333     else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0334       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0335     else if (keep.first->className() == "nanoaod::FlatTable")
0336       m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second));
0337     else
0338       throw cms::Exception(
0339           "Configuration",
0340           "NanoAODOutputModule cannot handle class " + keep.first->className() + " in LuminosityBlock branch");
0341   }
0342 
0343   for (const auto& keep : keeps[edm::InRun]) {
0344     if (keep.first->className() == "nanoaod::MergeableCounterTable")
0345       m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second));
0346     else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata")
0347       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0348     else if (keep.first->className() == "nanoaod::FlatTable")
0349       m_runFlatTables.emplace_back(keep.first, keep.second);
0350     else
0351       throw cms::Exception("Configuration",
0352                            "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0353   }
0354 
0355   // create the trees
0356   m_tree = std::make_unique<TTree>("Events", "Events");
0357   m_tree->SetAutoSave(0);
0358   m_tree->SetAutoFlush(0);
0359   m_commonBranches.branch(*m_tree);
0360 
0361   m_lumiTree = std::make_unique<TTree>("LuminosityBlocks", "LuminosityBlocks");
0362   m_lumiTree->SetAutoSave(0);
0363   m_commonLumiBranches.branch(*m_lumiTree);
0364 
0365   m_runTree = std::make_unique<TTree>("Runs", "Runs");
0366   m_runTree->SetAutoSave(0);
0367   m_commonRunBranches.branch(*m_runTree);
0368 
0369   if (m_writeProvenance) {
0370     m_metaDataTree = std::make_unique<TTree>(edm::poolNames::metaDataTreeName().c_str(), "Job metadata");
0371     m_metaDataTree->SetAutoSave(0);
0372     m_parameterSetsTree = std::make_unique<TTree>(edm::poolNames::parameterSetsTreeName().c_str(), "Parameter sets");
0373     m_parameterSetsTree->SetAutoSave(0);
0374   }
0375 }
0376 void NanoAODOutputModule::reallyCloseFile() {
0377   if (m_writeProvenance) {
0378     int basketSize = 16384;  // fixme configurable?
0379     edm::fillParameterSetBranch(m_parameterSetsTree.get(), basketSize);
0380     edm::fillProcessHistoryBranch(m_metaDataTree.get(), basketSize, m_processHistoryRegistry);
0381     if (m_metaDataTree->GetNbranches() != 0) {
0382       m_metaDataTree->SetEntries(-1);
0383     }
0384     if (m_parameterSetsTree->GetNbranches() != 0) {
0385       m_parameterSetsTree->SetEntries(-1);
0386     }
0387   }
0388   m_file->Write();
0389   m_file->Close();
0390   m_file.reset();
0391   m_tree.release();               // apparently root has ownership
0392   m_lumiTree.release();           //
0393   m_runTree.release();            //
0394   m_metaDataTree.release();       //
0395   m_parameterSetsTree.release();  //
0396   edm::Service<edm::JobReport> jr;
0397   jr->outputFileClosed(m_jrToken);
0398 }
0399 
0400 void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0401   edm::ParameterSetDescription desc;
0402 
0403   desc.addUntracked<std::string>("fileName");
0404   desc.addUntracked<std::string>("logicalFileName", "");
0405 
0406   desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0407   desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0408       ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0409   desc.addUntracked<bool>("saveProvenance", true)
0410       ->setComment("Save process provenance information, e.g. for edmProvDump");
0411   desc.addUntracked<bool>("fakeNameForCrab", false)
0412       ->setComment(
0413           "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran "
0414           "(and publish) till crab is fixed");
0415   desc.addUntracked<int>("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file");
0416 
0417   //replace with whatever you want to get from the EDM by default
0418   const std::vector<std::string> keep = {"drop *",
0419                                          "keep nanoaodFlatTable_*Table_*_*",
0420                                          "keep edmTriggerResults_*_*_*",
0421                                          "keep String_*_genModel_*",
0422                                          "keep nanoaodMergeableCounterTable_*Table_*_*",
0423                                          "keep nanoaodUniqueString_nanoMetadata_*_*"};
0424   edm::one::OutputModule<>::fillDescription(desc, keep);
0425 
0426   //Used by Workflow management for their own meta data
0427   edm::ParameterSetDescription dataSet;
0428   dataSet.setAllowAnything();
0429   desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0430       ->setComment("PSet is only used by Data Operations and not by this module.");
0431 
0432   edm::ParameterSetDescription branchSet;
0433   branchSet.setAllowAnything();
0434   desc.add<edm::ParameterSetDescription>("branches", branchSet);
0435 
0436   descriptions.addDefault(desc);
0437 }
0438 
0439 DEFINE_FWK_MODULE(NanoAODOutputModule);