Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-10-25 09:58:02

0001 // -*- C++ -*-
0002 //
0003 // Package:     PhysicsTools/NanoAODOutput
0004 // Class  :     NanoAODRNTupleOutputModule
0005 //
0006 // Implementation:
0007 //     [Notes on implementation]
0008 //
0009 // Original Author:  Max Orok
0010 //         Created:  Wed, 13 Jan 2021 14:21:41 GMT
0011 //
0012 
0013 #include <cstdint>
0014 #include <string>
0015 
0016 #include <ROOT/RNTuple.hxx>
0017 #include <ROOT/RNTupleModel.hxx>
0018 #include <ROOT/RNTupleOptions.hxx>
0019 #include <ROOT/RPageStorageFile.hxx>
0020 using ROOT::Experimental::RNTupleModel;
0021 using ROOT::Experimental::RNTupleWriteOptions;
0022 using ROOT::Experimental::RNTupleWriter;
0023 using ROOT::Experimental::Detail::RPageSinkFile;
0024 
0025 #include "TObjString.h"
0026 
0027 #include "FWCore/Framework/interface/one/OutputModule.h"
0028 #include "FWCore/Framework/interface/RunForOutput.h"
0029 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0030 #include "FWCore/Framework/interface/EventForOutput.h"
0031 #include "FWCore/ServiceRegistry/interface/Service.h"
0032 #include "FWCore/Framework/interface/MakerMacros.h"
0033 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0034 #include "FWCore/MessageLogger/interface/JobReport.h"
0035 #include "FWCore/Utilities/interface/Digest.h"
0036 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0037 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0038 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0039 
0040 #include "NanoAODRNTuples.h"
0041 
0042 class NanoAODRNTupleOutputModule : public edm::one::OutputModule<> {
0043 public:
0044   NanoAODRNTupleOutputModule(edm::ParameterSet const& pset);
0045   ~NanoAODRNTupleOutputModule() override;
0046 
0047   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0048 
0049 private:
0050   void openFile(edm::FileBlock const&) override;
0051   bool isFileOpen() const override;
0052   void write(edm::EventForOutput const& e) override;
0053   void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0054   void writeRun(edm::RunForOutput const&) override;
0055   void reallyCloseFile() override;
0056   void writeProvenance();
0057 
0058   void initializeNTuple(edm::EventForOutput const& e);
0059 
0060   std::string m_fileName;
0061   std::string m_logicalFileName;
0062   std::string m_compressionAlgorithm;
0063   int m_compressionLevel;
0064   bool m_writeProvenance;
0065   edm::ProcessHistoryRegistry m_processHistoryRegistry;
0066   edm::JobReport::Token m_jrToken;
0067 
0068   std::unique_ptr<TFile> m_file;
0069   std::unique_ptr<RNTupleWriter> m_ntuple;
0070   TableCollectionSet m_tables;
0071   std::vector<TriggerOutputFields> m_triggers;
0072   EventStringOutputFields m_evstrings;
0073 
0074   class CommonEventFields {
0075   public:
0076     void createFields(RNTupleModel& model) {
0077       model.AddField<UInt_t>("run", &m_run);
0078       model.AddField<UInt_t>("luminosityBlock", &m_luminosityBlock);
0079       model.AddField<std::uint64_t>("event", &m_event);
0080     }
0081     void fill(const edm::EventID& id) {
0082       m_run = id.run();
0083       m_luminosityBlock = id.luminosityBlock();
0084       m_event = id.event();
0085     }
0086 
0087   private:
0088     UInt_t m_run;
0089     UInt_t m_luminosityBlock;
0090     std::uint64_t m_event;
0091   } m_commonFields;
0092 
0093   LumiNTuple m_lumi;
0094   RunNTuple m_run;
0095 
0096   std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0097 };
0098 
0099 NanoAODRNTupleOutputModule::NanoAODRNTupleOutputModule(edm::ParameterSet const& pset)
0100     : edm::one::OutputModuleBase::OutputModuleBase(pset),
0101       edm::one::OutputModule<>(pset),
0102       m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0103       m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0104       m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0105       m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0106       m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0107       m_processHistoryRegistry() {}
0108 
0109 NanoAODRNTupleOutputModule::~NanoAODRNTupleOutputModule() {}
0110 
0111 void NanoAODRNTupleOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0112   edm::Service<edm::JobReport> jr;
0113   jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0114   m_lumi.fill(iLumi.id(), *m_file);
0115   m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0116 }
0117 
0118 void NanoAODRNTupleOutputModule::writeRun(edm::RunForOutput const& iRun) {
0119   edm::Service<edm::JobReport> jr;
0120   jr->reportRunNumber(m_jrToken, iRun.id().run());
0121 
0122   m_run.fill(iRun, *m_file);
0123 
0124   edm::Handle<nanoaod::UniqueString> hstring;
0125   for (const auto& p : m_nanoMetadata) {
0126     iRun.getByToken(p.second, hstring);
0127     TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0128     if (tos && hstring->str() != tos->GetString()) {
0129       throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0130     } else {
0131       auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0132       m_file->WriteTObject(ostr.release(), p.first.c_str());
0133     }
0134   }
0135   m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0136 }
0137 
0138 bool NanoAODRNTupleOutputModule::isFileOpen() const { return nullptr != m_ntuple.get(); }
0139 
0140 void NanoAODRNTupleOutputModule::openFile(edm::FileBlock const&) {
0141   m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0142   edm::Service<edm::JobReport> jr;
0143   cms::Digest branchHash;
0144   m_jrToken = jr->outputFileOpened(m_fileName,
0145                                    m_logicalFileName,
0146                                    std::string(),
0147                                    // TODO check if needed
0148                                    //m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0149                                    "NanoAODRNTupleOutputModule",
0150                                    description().moduleLabel(),
0151                                    edm::createGlobalIdentifier(),
0152                                    std::string(),
0153                                    branchHash.digest().toString(),
0154                                    std::vector<std::string>());
0155 
0156   if (m_compressionAlgorithm == "ZLIB") {
0157     m_file->SetCompressionAlgorithm(ROOT::kZLIB);
0158   } else if (m_compressionAlgorithm == "LZMA") {
0159     m_file->SetCompressionAlgorithm(ROOT::kLZMA);
0160   } else {
0161     throw cms::Exception("Configuration")
0162         << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0163         << "Allowed compression algorithms are ZLIB and LZMA\n";
0164   }
0165 
0166   const auto& keeps = keptProducts();
0167   for (const auto& keep : keeps[edm::InRun]) {
0168     if (keep.first->className() == "nanoaod::MergeableCounterTable") {
0169       m_run.registerToken(keep.second);
0170     } else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata") {
0171       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0172     } else {
0173       throw cms::Exception(
0174           "Configuration",
0175           "NanoAODRNTupleOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0176     }
0177   }
0178 }
0179 
0180 void NanoAODRNTupleOutputModule::initializeNTuple(edm::EventForOutput const& iEvent) {
0181   // set up RNTuple schema
0182   auto model = RNTupleModel::Create();
0183   m_commonFields.createFields(*model);
0184 
0185   const auto& keeps = keptProducts();
0186   for (const auto& keep : keeps[edm::InEvent]) {
0187     if (keep.first->className() == "nanoaod::FlatTable") {
0188       edm::Handle<nanoaod::FlatTable> handle;
0189       const auto& token = keep.second;
0190       iEvent.getByToken(token, handle);
0191       m_tables.add(token, *handle);
0192     } else if (keep.first->className() == "edm::TriggerResults") {
0193       m_triggers.emplace_back(TriggerOutputFields(keep.first->processName(), keep.second));
0194     } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0195                keep.first->productInstanceName() == "genModel") {
0196       m_evstrings.registerToken(keep.second);
0197     } else {
0198       throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0199     }
0200   }
0201   m_tables.createFields(iEvent, *model);
0202   for (auto& trigger : m_triggers) {
0203     trigger.createFields(iEvent, *model);
0204   }
0205   m_evstrings.createFields(*model);
0206   // TODO use Append
0207   RNTupleWriteOptions options;
0208   options.SetCompression(m_file->GetCompressionSettings());
0209   m_ntuple =
0210       std::make_unique<RNTupleWriter>(std::move(model), std::make_unique<RPageSinkFile>("Events", *m_file, options));
0211 }
0212 
0213 void NanoAODRNTupleOutputModule::write(edm::EventForOutput const& iEvent) {
0214   if (!m_ntuple) {
0215     initializeNTuple(iEvent);
0216   }
0217 
0218   edm::Service<edm::JobReport> jr;
0219   jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0220 
0221   m_commonFields.fill(iEvent.id());
0222   m_tables.fill(iEvent);
0223   for (auto& trigger : m_triggers) {
0224     trigger.fill(iEvent);
0225   }
0226   m_evstrings.fill(iEvent);
0227   m_ntuple->Fill();
0228   m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0229 }
0230 
0231 void NanoAODRNTupleOutputModule::reallyCloseFile() {
0232   if (m_writeProvenance) {
0233     writeProvenance();
0234   }
0235   // write ntuple to disk by calling the RNTupleWriter destructor
0236   m_ntuple.reset();
0237   m_lumi.finalizeWrite();
0238   m_run.finalizeWrite();
0239   m_file->Write();
0240   m_file->Close();
0241 
0242   edm::Service<edm::JobReport> jr;
0243   jr->outputFileClosed(m_jrToken);
0244 }
0245 
0246 void NanoAODRNTupleOutputModule::writeProvenance() {
0247   PSetNTuple pntuple;
0248   pntuple.fill(edm::pset::Registry::instance(), *m_file);
0249   pntuple.finalizeWrite();
0250 
0251   MetadataNTuple mdntuple;
0252   mdntuple.fill(m_processHistoryRegistry, *m_file);
0253   mdntuple.finalizeWrite();
0254 }
0255 
0256 void NanoAODRNTupleOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0257   edm::ParameterSetDescription desc;
0258 
0259   desc.addUntracked<std::string>("fileName");
0260   desc.addUntracked<std::string>("logicalFileName", "");
0261   desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0262   desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0263       ->setComment(
0264           "Algorithm used to "
0265           "compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0266   desc.addUntracked<bool>("saveProvenance", true)
0267       ->setComment("Save process provenance information, e.g. for edmProvDump");
0268   const std::vector<std::string> keep = {"drop *",
0269                                          "keep nanoaodFlatTable_*Table_*_*",
0270                                          "keep edmTriggerResults_*_*_*",
0271                                          "keep String_*_genModel_*",
0272                                          "keep nanoaodMergeableCounterTable_*Table_*_*",
0273                                          "keep nanoaodUniqueString_nanoMetadata_*_*"};
0274   edm::one::OutputModule<>::fillDescription(desc, keep);
0275 
0276   //Used by Workflow management for their own meta data
0277   edm::ParameterSetDescription dataSet;
0278   dataSet.setAllowAnything();
0279   desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0280       ->setComment("PSet is only used by Data Operations and not by this module.");
0281 
0282   edm::ParameterSetDescription branchSet;
0283   branchSet.setAllowAnything();
0284   desc.add<edm::ParameterSetDescription>("branches", branchSet);
0285 
0286   descriptions.addDefault(desc);
0287 }
0288 
0289 DEFINE_FWK_MODULE(NanoAODRNTupleOutputModule);