Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-11 23:31:43

0001 // -*- C++ -*-
0002 //
0003 // Package:     PhysicsTools/NanoAODOutput
0004 // Class  :     NanoAODRNTupleOutputModule
0005 //
0006 // Implementation:
0007 //     [Notes on implementation]
0008 //
0009 // Original Author:  Max Orok
0010 //         Created:  Wed, 13 Jan 2021 14:21:41 GMT
0011 //
0012 
0013 #include <cstdint>
0014 #include <string>
0015 
0016 #include <ROOT/RNTuple.hxx>
0017 #include <ROOT/RNTupleModel.hxx>
0018 #include <ROOT/RPageStorageFile.hxx>
0019 using ROOT::Experimental::RNTupleModel;
0020 #if ROOT_VERSION_CODE < ROOT_VERSION(6, 31, 0)
0021 using ROOT::Experimental::RNTupleWriter;
0022 using ROOT::Experimental::Detail::RPageSinkFile;
0023 #define MakeRNTupleWriter std::make_unique<RNTupleWriter>
0024 #include <ROOT/RNTupleOptions.hxx>
0025 #else
0026 using ROOT::Experimental::Internal::RPageSinkFile;
0027 #define MakeRNTupleWriter ROOT::Experimental::Internal::CreateRNTupleWriter
0028 #include <ROOT/RNTupleWriteOptions.hxx>
0029 #endif
0030 using ROOT::Experimental::RNTupleWriteOptions;
0031 
0032 #include "TObjString.h"
0033 
0034 #include "FWCore/Framework/interface/one/OutputModule.h"
0035 #include "FWCore/Framework/interface/RunForOutput.h"
0036 #include "FWCore/Framework/interface/LuminosityBlockForOutput.h"
0037 #include "FWCore/Framework/interface/EventForOutput.h"
0038 #include "FWCore/ServiceRegistry/interface/Service.h"
0039 #include "FWCore/Framework/interface/MakerMacros.h"
0040 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0041 #include "FWCore/MessageLogger/interface/JobReport.h"
0042 #include "FWCore/Utilities/interface/Digest.h"
0043 #include "FWCore/Utilities/interface/GlobalIdentifier.h"
0044 #include "DataFormats/NanoAOD/interface/UniqueString.h"
0045 #include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h"
0046 
0047 #include "NanoAODRNTuples.h"
0048 
0049 class NanoAODRNTupleOutputModule : public edm::one::OutputModule<> {
0050 public:
0051   NanoAODRNTupleOutputModule(edm::ParameterSet const& pset);
0052   ~NanoAODRNTupleOutputModule() override;
0053 
0054   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0055 
0056 private:
0057   void openFile(edm::FileBlock const&) override;
0058   bool isFileOpen() const override;
0059   void write(edm::EventForOutput const& e) override;
0060   void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override;
0061   void writeRun(edm::RunForOutput const&) override;
0062   void reallyCloseFile() override;
0063   void writeProvenance();
0064 
0065   void initializeNTuple(edm::EventForOutput const& e);
0066 
0067   std::string m_fileName;
0068   std::string m_logicalFileName;
0069   std::string m_compressionAlgorithm;
0070   int m_compressionLevel;
0071   bool m_writeProvenance;
0072   edm::ProcessHistoryRegistry m_processHistoryRegistry;
0073   edm::JobReport::Token m_jrToken;
0074 
0075   std::unique_ptr<TFile> m_file;
0076   std::unique_ptr<RNTupleWriter> m_ntuple;
0077   TableCollectionSet m_tables;
0078   std::vector<TriggerOutputFields> m_triggers;
0079   EventStringOutputFields m_evstrings;
0080 
0081   class CommonEventFields {
0082   public:
0083     void createFields(RNTupleModel& model) {
0084       m_run = model.MakeField<UInt_t>("run");
0085       m_luminosityBlock = model.MakeField<UInt_t>("luminosityBlock");
0086       m_event = model.MakeField<std::uint64_t>("event");
0087     }
0088     void fill(const edm::EventID& id) {
0089       *m_run = id.run();
0090       *m_luminosityBlock = id.luminosityBlock();
0091       *m_event = id.event();
0092     }
0093 
0094   private:
0095     std::shared_ptr<UInt_t> m_run;
0096     std::shared_ptr<UInt_t> m_luminosityBlock;
0097     std::shared_ptr<std::uint64_t> m_event;
0098   } m_commonFields;
0099 
0100   LumiNTuple m_lumi;
0101   RunNTuple m_run;
0102 
0103   std::vector<std::pair<std::string, edm::EDGetToken>> m_nanoMetadata;
0104 };
0105 
0106 NanoAODRNTupleOutputModule::NanoAODRNTupleOutputModule(edm::ParameterSet const& pset)
0107     : edm::one::OutputModuleBase::OutputModuleBase(pset),
0108       edm::one::OutputModule<>(pset),
0109       m_fileName(pset.getUntrackedParameter<std::string>("fileName")),
0110       m_logicalFileName(pset.getUntrackedParameter<std::string>("logicalFileName")),
0111       m_compressionAlgorithm(pset.getUntrackedParameter<std::string>("compressionAlgorithm")),
0112       m_compressionLevel(pset.getUntrackedParameter<int>("compressionLevel")),
0113       m_writeProvenance(pset.getUntrackedParameter<bool>("saveProvenance", true)),
0114       m_processHistoryRegistry() {}
0115 
0116 NanoAODRNTupleOutputModule::~NanoAODRNTupleOutputModule() {}
0117 
0118 void NanoAODRNTupleOutputModule::writeLuminosityBlock(edm::LuminosityBlockForOutput const& iLumi) {
0119   edm::Service<edm::JobReport> jr;
0120   jr->reportLumiSection(m_jrToken, iLumi.id().run(), iLumi.id().value());
0121   m_lumi.fill(iLumi.id(), *m_file);
0122   m_processHistoryRegistry.registerProcessHistory(iLumi.processHistory());
0123 }
0124 
0125 void NanoAODRNTupleOutputModule::writeRun(edm::RunForOutput const& iRun) {
0126   edm::Service<edm::JobReport> jr;
0127   jr->reportRunNumber(m_jrToken, iRun.id().run());
0128 
0129   m_run.fill(iRun, *m_file);
0130 
0131   edm::Handle<nanoaod::UniqueString> hstring;
0132   for (const auto& p : m_nanoMetadata) {
0133     iRun.getByToken(p.second, hstring);
0134     TObjString* tos = dynamic_cast<TObjString*>(m_file->Get(p.first.c_str()));
0135     if (tos && hstring->str() != tos->GetString()) {
0136       throw cms::Exception("LogicError", "Inconsistent nanoMetadata " + p.first + " (" + hstring->str() + ")");
0137     } else {
0138       auto ostr = std::make_unique<TObjString>(hstring->str().c_str());
0139       m_file->WriteTObject(ostr.release(), p.first.c_str());
0140     }
0141   }
0142   m_processHistoryRegistry.registerProcessHistory(iRun.processHistory());
0143 }
0144 
0145 bool NanoAODRNTupleOutputModule::isFileOpen() const { return nullptr != m_ntuple.get(); }
0146 
0147 void NanoAODRNTupleOutputModule::openFile(edm::FileBlock const&) {
0148   m_file = std::make_unique<TFile>(m_fileName.c_str(), "RECREATE", "", m_compressionLevel);
0149   edm::Service<edm::JobReport> jr;
0150   cms::Digest branchHash;
0151   m_jrToken = jr->outputFileOpened(m_fileName,
0152                                    m_logicalFileName,
0153                                    std::string(),
0154                                    // TODO check if needed
0155                                    //m_fakeName ? "PoolOutputModule" : "NanoAODOutputModule",
0156                                    "NanoAODRNTupleOutputModule",
0157                                    description().moduleLabel(),
0158                                    edm::createGlobalIdentifier(),
0159                                    std::string(),
0160                                    branchHash.digest().toString(),
0161                                    std::vector<std::string>());
0162 
0163   if (m_compressionAlgorithm == "ZLIB") {
0164     m_file->SetCompressionAlgorithm(ROOT::RCompressionSetting::EAlgorithm::kZLIB);
0165   } else if (m_compressionAlgorithm == "LZMA") {
0166     m_file->SetCompressionAlgorithm(ROOT::RCompressionSetting::EAlgorithm::kLZMA);
0167   } else {
0168     throw cms::Exception("Configuration")
0169         << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n"
0170         << "Allowed compression algorithms are ZLIB and LZMA\n";
0171   }
0172 
0173   const auto& keeps = keptProducts();
0174   for (const auto& keep : keeps[edm::InRun]) {
0175     if (keep.first->className() == "nanoaod::MergeableCounterTable") {
0176       m_run.registerToken(keep.second);
0177     } else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata") {
0178       m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second);
0179     } else {
0180       throw cms::Exception(
0181           "Configuration",
0182           "NanoAODRNTupleOutputModule cannot handle class " + keep.first->className() + " in Run branch");
0183     }
0184   }
0185 }
0186 
0187 void NanoAODRNTupleOutputModule::initializeNTuple(edm::EventForOutput const& iEvent) {
0188   // set up RNTuple schema
0189   auto model = RNTupleModel::Create();
0190   m_commonFields.createFields(*model);
0191 
0192   const auto& keeps = keptProducts();
0193   for (const auto& keep : keeps[edm::InEvent]) {
0194     if (keep.first->className() == "nanoaod::FlatTable") {
0195       edm::Handle<nanoaod::FlatTable> handle;
0196       const auto& token = keep.second;
0197       iEvent.getByToken(token, handle);
0198       m_tables.add(token, *handle);
0199     } else if (keep.first->className() == "edm::TriggerResults") {
0200       m_triggers.emplace_back(TriggerOutputFields(keep.first->processName(), keep.second));
0201     } else if (keep.first->className() == "std::basic_string<char,std::char_traits<char> >" &&
0202                keep.first->productInstanceName() == "genModel") {
0203       m_evstrings.registerToken(keep.second);
0204     } else {
0205       throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className());
0206     }
0207   }
0208   m_tables.createFields(iEvent, *model);
0209   for (auto& trigger : m_triggers) {
0210     trigger.createFields(iEvent, *model);
0211   }
0212   m_evstrings.createFields(*model);
0213   // TODO use Append
0214   RNTupleWriteOptions options;
0215   options.SetCompression(m_file->GetCompressionSettings());
0216   m_ntuple = MakeRNTupleWriter(std::move(model), std::make_unique<RPageSinkFile>("Events", *m_file, options));
0217 }
0218 
0219 void NanoAODRNTupleOutputModule::write(edm::EventForOutput const& iEvent) {
0220   if (!m_ntuple) {
0221     initializeNTuple(iEvent);
0222   }
0223 
0224   edm::Service<edm::JobReport> jr;
0225   jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event());
0226 
0227   m_commonFields.fill(iEvent.id());
0228   m_tables.fill(iEvent);
0229   for (auto& trigger : m_triggers) {
0230     trigger.fill(iEvent);
0231   }
0232   m_evstrings.fill(iEvent);
0233   m_ntuple->Fill();
0234   m_processHistoryRegistry.registerProcessHistory(iEvent.processHistory());
0235 }
0236 
0237 void NanoAODRNTupleOutputModule::reallyCloseFile() {
0238   if (m_writeProvenance) {
0239     writeProvenance();
0240   }
0241   // write ntuple to disk by calling the RNTupleWriter destructor
0242   m_ntuple.reset();
0243   m_lumi.finalizeWrite();
0244   m_run.finalizeWrite();
0245   m_file->Write();
0246   m_file->Close();
0247 
0248   edm::Service<edm::JobReport> jr;
0249   jr->outputFileClosed(m_jrToken);
0250 }
0251 
0252 void NanoAODRNTupleOutputModule::writeProvenance() {
0253   PSetNTuple pntuple;
0254   pntuple.fill(edm::pset::Registry::instance(), *m_file);
0255   pntuple.finalizeWrite();
0256 
0257   MetadataNTuple mdntuple;
0258   mdntuple.fill(m_processHistoryRegistry, *m_file);
0259   mdntuple.finalizeWrite();
0260 }
0261 
0262 void NanoAODRNTupleOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0263   edm::ParameterSetDescription desc;
0264 
0265   desc.addUntracked<std::string>("fileName");
0266   desc.addUntracked<std::string>("logicalFileName", "");
0267   desc.addUntracked<int>("compressionLevel", 9)->setComment("ROOT compression level of output file.");
0268   desc.addUntracked<std::string>("compressionAlgorithm", "ZLIB")
0269       ->setComment(
0270           "Algorithm used to "
0271           "compress data in the ROOT output file, allowed values are ZLIB and LZMA");
0272   desc.addUntracked<bool>("saveProvenance", true)
0273       ->setComment("Save process provenance information, e.g. for edmProvDump");
0274   const std::vector<std::string> keep = {"drop *",
0275                                          "keep nanoaodFlatTable_*Table_*_*",
0276                                          "keep edmTriggerResults_*_*_*",
0277                                          "keep String_*_genModel_*",
0278                                          "keep nanoaodMergeableCounterTable_*Table_*_*",
0279                                          "keep nanoaodUniqueString_nanoMetadata_*_*"};
0280   edm::one::OutputModule<>::fillDescription(desc, keep);
0281 
0282   //Used by Workflow management for their own meta data
0283   edm::ParameterSetDescription dataSet;
0284   dataSet.setAllowAnything();
0285   desc.addUntracked<edm::ParameterSetDescription>("dataset", dataSet)
0286       ->setComment("PSet is only used by Data Operations and not by this module.");
0287 
0288   edm::ParameterSetDescription branchSet;
0289   branchSet.setAllowAnything();
0290   desc.add<edm::ParameterSetDescription>("branches", branchSet);
0291 
0292   descriptions.addDefault(desc);
0293 }
0294 
0295 DEFINE_FWK_MODULE(NanoAODRNTupleOutputModule);