Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:19:08

0001 // -*- C++ -*-
0002 //
0003 //         Package:  IOPool/Input
0004 //           Class:  IOExerciser
0005 // Original Author:  Brian Bockelman
0006 //         Created:  Mon Jun  4 17:35:30 CDT 2012
0007 //
0008 /*
0009  Description: Read out a fixed subset of an input file
0010 
0011  Implementation:
0012 
0013 Much of the interaction with the framework is from EventContentAnalyzer and AsciiOutputModule
0014 
0015 See also IOPool/Input/doc/IOExerciser-README for a more detailed description of how
0016 to use this plugin.
0017 
0018 */
0019 
0020 // system include files
0021 #include <map>
0022 #include <memory>
0023 
0024 // user include files
0025 #include "DataFormats/Provenance/interface/StableProvenance.h"
0026 #include "FWCore/Framework/interface/Frameworkfwd.h"
0027 #include "FWCore/Framework/interface/one/OutputModule.h"
0028 #include "FWCore/Framework/interface/EventForOutput.h"
0029 #include "FWCore/Framework/interface/MakerMacros.h"
0030 #include "FWCore/Framework/interface/GenericHandle.h"
0031 #include "FWCore/Framework/interface/FileBlock.h"
0032 #include "FWCore/Utilities/interface/Exception.h"
0033 #include "FWCore/Utilities/interface/EDGetToken.h"
0034 #include "FWCore/Utilities/interface/propagate_const.h"
0035 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0036 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0037 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0038 
0039 #include "TBranch.h"
0040 #include "TTree.h"
0041 
0042 #include "ProductInfo.h"
0043 
0044 class IOExerciser : public edm::one::OutputModule<edm::WatchInputFiles> {
0045 public:
0046   explicit IOExerciser(const edm::ParameterSet&);
0047   ~IOExerciser() override;
0048 
0049   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0050 
0051   enum SelectionStrategy { SmallestFirst = 0, LargestFirst, RoundRobin };
0052 
0053 private:
0054   typedef std::map<edm::BranchID, edm::EDGetToken> TokenMap;
0055   // ----------required OutputModule functions-----------------------------
0056   void write(edm::EventForOutput const& e) override;
0057   void writeRun(edm::RunForOutput const&) override {}
0058   void writeLuminosityBlock(edm::LuminosityBlockForOutput const&) override {}
0059 
0060   void respondToOpenInputFile(edm::FileBlock const& fb) override;
0061   void respondToCloseInputFile(edm::FileBlock const& fb) override {}
0062 
0063   // ----------internal implementation functions---------------------------
0064   void computeProducts(edm::EventForOutput const& e, TokenMap const& tokens);
0065   void fillSmallestFirst(ProductInfos const& all_products, Long64_t threshold);
0066   void fillLargestFirst(ProductInfos const& all_products, Long64_t threshold);
0067   void fillRoundRobin(ProductInfos const& all_products, Long64_t threshold);
0068 
0069   // ----------member data ------------------------------------------------
0070   TokenMap m_tokens;
0071   bool m_fetchedProducts;
0072   edm::propagate_const<TTree*> m_eventsTree;
0073   ProductInfos m_products;
0074   ProductInfos m_all_products;
0075   unsigned int m_percentBranches;
0076   SelectionStrategy m_selectionStrategy;
0077   Long64_t m_currentUsage;
0078   const unsigned int m_triggerFactor;
0079   unsigned int m_triggerCount;
0080 };
0081 
0082 //
0083 // constructors and destructor
0084 //
0085 IOExerciser::IOExerciser(const edm::ParameterSet& pset)
0086     : edm::one::OutputModuleBase(pset),
0087       edm::one::OutputModule<edm::WatchInputFiles>(pset),
0088       m_tokens(),
0089       m_fetchedProducts(false),
0090       m_eventsTree(nullptr),
0091       m_percentBranches(pset.getUntrackedParameter<unsigned int>("percentBranches")),
0092       m_currentUsage(0),
0093       m_triggerFactor(pset.getUntrackedParameter<unsigned int>("triggerFactor")),
0094       m_triggerCount(0) {
0095   //now do what ever initialization is needed
0096   std::string const& selectionStrategy = pset.getUntrackedParameter<std::string>("selectionStrategy");
0097   if (selectionStrategy == "smallestFirst") {
0098     m_selectionStrategy = SmallestFirst;
0099   } else if (selectionStrategy == "largestFirst") {
0100     m_selectionStrategy = LargestFirst;
0101   } else if (selectionStrategy == "roundRobin") {
0102     m_selectionStrategy = RoundRobin;
0103   } else {
0104     edm::Exception ex(edm::errors::Configuration);
0105     ex << "Invalid IOExerciser selection strategy: " << selectionStrategy;
0106     throw ex;
0107   }
0108   if ((m_percentBranches < 1) || (m_percentBranches > 100)) {
0109     edm::Exception ex(edm::errors::Configuration);
0110     ex << "Invalid value for percentBranches (" << m_percentBranches << "); must be between 1 and 100, inclusive";
0111     throw ex;
0112   }
0113   for (auto const& product : keptProducts()[edm::InEvent]) {
0114     m_tokens.insert(std::make_pair(product.first->branchID(), product.second));
0115   }
0116 }
0117 
0118 IOExerciser::~IOExerciser() {}
0119 
0120 //
0121 // member functions
0122 //
0123 
0124 // ------------ method called for each event  ------------
0125 void IOExerciser::write(edm::EventForOutput const& e) {
0126   using namespace edm;
0127   if (!m_fetchedProducts) {
0128     computeProducts(e, m_tokens);
0129   }
0130 
0131   m_triggerCount += 1;
0132 
0133   int ctr = 0;
0134 
0135   ProductInfos& products_to_use = (m_triggerCount == m_triggerFactor) ? m_all_products : m_products;
0136   if (m_triggerCount == m_triggerFactor) {
0137     m_triggerCount = 0;
0138   }
0139 
0140   for (auto const& product : products_to_use) {
0141     edm::BasicHandle result = e.getByToken(product.token(), product.type());
0142     ctr++;
0143   }
0144   edm::LogInfo("IOExerciser") << "IOExerciser read out " << ctr << " products.";
0145 }
0146 
0147 // ------------ method called when starting to processes a run  ------------
0148 void IOExerciser::respondToOpenInputFile(edm::FileBlock const& fb) {
0149   TTree* eventsTree = fb.tree();
0150   if (!eventsTree) {
0151     edm::Exception ex(edm::errors::ProductNotFound);
0152     ex << "IOExerciser was run with a TFile missing an events TTree.";
0153     throw ex;
0154   }
0155   m_eventsTree = eventsTree;
0156 
0157   m_fetchedProducts = false;
0158 }
0159 
0160 void IOExerciser::computeProducts(edm::EventForOutput const& e, TokenMap const& tokens) {
0161   using namespace edm;
0162   typedef std::vector<StableProvenance const*> Provenances;
0163 
0164   m_fetchedProducts = true;
0165   Provenances provenances;
0166   e.getAllStableProvenance(provenances);
0167 
0168   if (!m_eventsTree) {
0169     edm::Exception ex(edm::errors::ProductNotFound);
0170     ex << "IOExerciser invoked computeProducts without an events TTree.";
0171     throw ex;
0172   }
0173 
0174   m_all_products.clear();
0175   m_all_products.reserve(provenances.size());
0176   Long64_t totalSize = 0;
0177   for (auto const& provenance : provenances) {
0178     const std::string& branchName = provenance->branchName();
0179 
0180     TBranch* branch = (TBranch*)m_eventsTree->GetBranch(branchName.c_str());
0181     if (!branch) {
0182       LogWarning("IOExerciser") << "Ignoring missing branch " << branchName;
0183       continue;
0184     }
0185     edm::BranchID bid = provenance->branchID();
0186     auto const iter = m_tokens.find(bid);
0187     if (iter == m_tokens.end()) {
0188       // product not kept
0189       continue;
0190     }
0191     ProductInfo pi(*provenance, *branch, iter->second);
0192     totalSize += pi.size();
0193     m_all_products.push_back(pi);
0194   }
0195 
0196   Long64_t threshold = m_percentBranches * totalSize / 100;
0197   LogDebug("IOExerciser") << "Threshold is " << threshold << " of " << totalSize << " bytes.";
0198 
0199   std::sort(m_all_products.begin(), m_all_products.end(), ProductInfo::sort);
0200 
0201   m_products.clear();
0202   m_currentUsage = 0;
0203 
0204   switch (m_selectionStrategy) {
0205     case SmallestFirst:
0206       fillSmallestFirst(m_all_products, threshold);
0207       break;
0208     case LargestFirst:
0209       fillLargestFirst(m_all_products, threshold);
0210       break;
0211     case RoundRobin:
0212       fillRoundRobin(m_all_products, threshold);
0213       break;
0214   }
0215 
0216   LogInfo("IOExerciser") << "Reading " << m_products.size() << " of " << m_all_products.size()
0217                          << " products.  Aggregate branch size is " << m_currentUsage << " of " << totalSize
0218                          << " bytes.";
0219 }
0220 
0221 void IOExerciser::fillSmallestFirst(ProductInfos const& all_products, Long64_t threshold) {
0222   for (ProductInfos::const_iterator it = all_products.begin(), itEnd = all_products.end();
0223        (it != itEnd) && (m_currentUsage < threshold);
0224        ++it) {
0225     m_products.push_back(*it);
0226     m_currentUsage += it->size();
0227     LogDebug("IOExerciser") << "Adding label " << it->tag().label() << ", size " << it->size() << "; current usage is "
0228                             << m_currentUsage << " of " << threshold << " bytes.";
0229   }
0230 }
0231 
0232 void IOExerciser::fillLargestFirst(ProductInfos const& all_products, Long64_t threshold) {
0233   m_currentUsage = 0;
0234   for (ProductInfos::const_iterator it = --all_products.end(), itBegin = all_products.begin();
0235        m_currentUsage < threshold;
0236        --it) {
0237     m_products.push_back(*it);
0238     m_currentUsage += it->size();
0239     LogDebug("IOExerciser") << "Adding label " << it->tag().label() << ", size " << it->size() << "; current usage is "
0240                             << m_currentUsage << " of " << threshold << " bytes.";
0241     if (it == itBegin) {
0242       break;
0243     }
0244   }
0245 }
0246 
0247 void IOExerciser::fillRoundRobin(ProductInfos const& all_products, Long64_t threshold) {
0248   size_t currentSmallest = 0, currentLargest = all_products.size() - 1;
0249   bool useSmallest = true;
0250   while (m_currentUsage < threshold) {
0251     if (useSmallest) {
0252       ProductInfo const& pi = all_products[currentSmallest];
0253       m_currentUsage += pi.size();
0254       m_products.push_back(pi);
0255       currentSmallest++;
0256       useSmallest = false;
0257       LogDebug("IOExerciser") << "Adding label " << pi.tag().label() << ", size " << pi.size() << "; current usage is "
0258                               << m_currentUsage << " of " << threshold << " bytes.";
0259     } else {
0260       ProductInfo const& pi = all_products[currentLargest];
0261       m_currentUsage += pi.size();
0262       m_products.push_back(pi);
0263       currentLargest--;
0264       useSmallest = true;
0265       LogDebug("IOExerciser") << "Adding label " << pi.tag().label() << ", size " << pi.size() << "; current usage is "
0266                               << m_currentUsage << " of " << threshold << " bytes.";
0267     }
0268   }
0269 }
0270 
0271 // ------------ method fills 'descriptions' with the allowed parameters for the module  ------------
0272 void IOExerciser::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0273   //The following says we do not know what parameters are allowed so do no validation
0274   // Please change this to state exactly what you do use, even if it is no parameters
0275   edm::ParameterSetDescription desc;
0276   desc.setComment("Reads a configurable subset of EDM files.");
0277   desc.addUntracked<unsigned int>("percentBranches", 100)
0278       ->setComment(
0279           "Control the percent of branches IOExerciser will read out.\n"
0280           "Branches are weighted by size.  Valid values are between 1 and 100.\n"
0281           "Additional branches will be read out until at least this percent has\n"
0282           "been read; thus, IOExerciser will likely read out more than this amount.");
0283   desc.addUntracked<std::string>("selectionStrategy", "smallestFirst")
0284       ->setComment(
0285           "Control the branch selection strategy:\n"
0286           "'smallestFirst' (default): Read branches in increasing order of size until limit is hit.\n"
0287           "'largestFirst': Read branches in decreasing order of size until limit is hit.\n"
0288           "'roundRobin': Read a small branch, then large branch.  Repeat until size limit is hit.");
0289   desc.addUntracked<unsigned int>("triggerFactor", 0)
0290       ->setComment(
0291           "Controls the trigger rate.  Once every 'triggerFactor' events, IOExerciser\n"
0292           "will read out all event data, not just the selected branches.  Setting to 10\n"
0293           "will cause it to read out one event in 10.  Setting it to zero would mean to\n"
0294           "disable trigger behavior completely.  Defaults to 0.");
0295   edm::one::OutputModule<edm::WatchInputFiles>::fillDescription(desc);
0296   descriptions.add("IOExerciser", desc);
0297 }
0298 
0299 //define this as a plug-in
0300 DEFINE_FWK_MODULE(IOExerciser);