Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:16:51

0001 #ifndef StringBasedNTupler_NTupler_H
0002 #define StringBasedNTupler_NTupler_H
0003 
0004 #include "FWCore/Utilities/interface/InputTag.h"
0005 #include "FWCore/Framework/interface/Frameworkfwd.h"
0006 #include "FWCore/Framework/interface/ProducesCollector.h"
0007 
0008 #include "CommonTools/UtilAlgos/interface/TFileService.h"
0009 #include "TTree.h"
0010 #include "TBranch.h"
0011 #include "TFile.h"
0012 
0013 #include "PhysicsTools/UtilAlgos/interface/NTupler.h"
0014 
0015 #include "FWCore/Framework/interface/Event.h"
0016 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0017 #include "CommonTools/Utils/interface/StringObjectFunction.h"
0018 #include "CommonTools/Utils/interface/StringCutObjectSelector.h"
0019 #include "FWCore/ServiceRegistry/interface/Service.h"
0020 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0021 
0022 #include "CommonTools/UtilAlgos/interface/InputTagDistributor.h"
0023 
0024 #include "DataFormats/PatCandidates/interface/PFParticle.h"
0025 #include "SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h"
0026 
0027 #include <memory>
0028 #include <string>
0029 #include <sstream>
0030 
0031 #include "FWCore/Framework/interface/Frameworkfwd.h"
0032 #include "FWCore/Framework/interface/Event.h"
0033 #include "FWCore/Framework/interface/MakerMacros.h"
0034 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0035 
0036 // LHE Event
0037 #include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h"
0038 
0039 class TreeBranch {
0040 public:
0041   TreeBranch() : class_(""), expr_(""), order_(""), selection_(""), maxIndexName_(""), branchAlias_("") {}
0042   TreeBranch(
0043       std::string C, edm::InputTag S, std::string E, std::string O, std::string SE, std::string Mi, std::string Ba)
0044       : class_(C), src_(S), expr_(E), order_(O), selection_(SE), maxIndexName_(Mi), branchAlias_(Ba) {
0045     branchTitle_ = E + " calculated on " + C + " object from " + S.encode();
0046     if (!O.empty())
0047       branchTitle_ += " ordered according to " + O;
0048     if (!SE.empty())
0049       branchTitle_ += " selecting on " + SE;
0050     edm::LogInfo("TreeBranch") << "the branch with alias: " << branchAlias_ << " corresponds to: " << branchTitle_;
0051   }
0052 
0053   const std::string& className() const { return class_; }
0054   const edm::InputTag& src() const { return src_; }
0055   const std::string& expr() const { return expr_; }
0056   const std::string& order() const { return order_; }
0057   const std::string& selection() const { return selection_; }
0058   const std::string& maxIndexName() const { return maxIndexName_; }
0059   const std::string branchName() const {
0060     std::string name(branchAlias_);
0061     std::replace(name.begin(), name.end(), '_', '0');
0062     return std::string(name);
0063   }
0064   const std::string& branchAlias() const { return branchAlias_; }
0065   const std::string& branchTitle() const { return branchTitle_; }
0066   typedef std::unique_ptr<std::vector<float>> value;
0067   value branch(const edm::Event& iEvent);
0068 
0069   std::vector<float>** dataHolderPtrAdress() { return &dataHolderPtr_; }
0070   std::vector<float>* dataHolderPtr() { return dataHolderPtr_; }
0071   void assignDataHolderPtr(std::vector<float>* data) { dataHolderPtr_ = data; }
0072 
0073 private:
0074   std::string class_;
0075   edm::InputTag src_;
0076   std::string expr_;
0077   std::string order_;
0078   std::string selection_;
0079   std::string maxIndexName_;
0080   std::string branchAlias_;
0081   std::string branchTitle_;
0082 
0083   std::vector<float>* dataHolderPtr_;
0084 };
0085 
0086 template <typename Object>
0087 class StringLeaveHelper {
0088 public:
0089   typedef TreeBranch::value value;
0090   value operator()() { return std::move(value_); }
0091 
0092   StringLeaveHelper(const TreeBranch& B, const edm::Event& iEvent) {
0093     const float defaultValue = 0.;
0094     //    grab the object
0095     edm::Handle<Object> oH;
0096     iEvent.getByLabel(B.src(), oH);
0097     //empty vector if product not found
0098     if (oH.failedToGet()) {
0099       edm::LogError("StringBranchHelper") << "cannot open: " << B.src();
0100       value_ = std::make_unique<std::vector<float>>(0);
0101     } else {
0102       //parser for the object expression
0103       StringObjectFunction<Object> expr(B.expr());
0104       //allocate enough memory for the data holder
0105       value_ = std::make_unique<std::vector<float>>(1);
0106       try {
0107         (*value_)[0] = (expr)(*oH);
0108       } catch (...) {
0109         LogDebug("StringLeaveHelper") << "could not evaluate expression: " << B.expr()
0110                                       << " on class: " << B.className();
0111         (*value_)[0] = defaultValue;
0112       }
0113     }
0114   }
0115 
0116 private:
0117   value value_;
0118 };
0119 
0120 template <typename Object, typename Collection = std::vector<Object>>
0121 class StringBranchHelper {
0122 public:
0123   typedef TreeBranch::value value;
0124   value operator()() { return std::move(value_); }
0125 
0126   StringBranchHelper(const TreeBranch& B, const edm::Event& iEvent) {
0127     const float defaultValue = 0.;
0128 
0129     //    grab the collection
0130     edm::Handle<Collection> oH;
0131     iEvent.getByLabel(B.src(), oH);
0132 
0133     //empty vector if product not found
0134     if (oH.failedToGet()) {
0135       if (!(iEvent.isRealData() && B.className() == "reco::GenParticle")) {  //don't output genparticle error in data
0136         edm::LogError("StringBranchHelper") << "cannot open: " << B.src() << "  " << B.className();
0137       }
0138       value_ = std::make_unique<std::vector<float>>();
0139     } else {
0140       //parser for the object expression
0141       StringObjectFunction<Object> expr(B.expr());
0142       //allocate enough memory for the data holder
0143       value_ = std::make_unique<std::vector<float>>();
0144       value_->reserve(oH->size());
0145 
0146       StringCutObjectSelector<Object>* selection = nullptr;
0147       if (!B.selection().empty()) {
0148         //std::cout<<"trying to get to a selection"<<std::endl;
0149         selection = new StringCutObjectSelector<Object>(B.selection());
0150         //std::cout<<"got the objet"<<std::endl;
0151       }
0152       uint i_end = oH->size();
0153       //sort things first if requested
0154       if (!B.order().empty()) {
0155         StringObjectFunction<Object> order(B.order());
0156         // allocate a vector of pointers (we are using view) to be sorted
0157         std::vector<const Object*> copyToSort(oH->size());
0158         for (uint i = 0; i != i_end; ++i)
0159           copyToSort[i] = &(*oH)[i];
0160         std::sort(copyToSort.begin(), copyToSort.end(), sortByStringFunction<Object>(&order));
0161         //then loop and fill
0162         for (uint i = 0; i != i_end; ++i) {
0163           //try and catch is necessary because ...
0164           try {
0165             if (selection && !((*selection)(*(copyToSort)[i])))
0166               continue;
0167             value_->push_back((expr)(*(copyToSort)[i]));
0168           } catch (...) {
0169             LogDebug("StringBranchHelper")
0170                 << "with sorting. could not evaluate expression: " << B.expr() << " on class: " << B.className();
0171             value_->push_back(defaultValue);  //push a default value to not change the indexing
0172           }
0173         }
0174       } else {
0175         //actually fill the vector of values
0176         for (uint i = 0; i != i_end; ++i) {
0177           //try and catch is necessary because ...
0178           try {
0179             if (selection && !((*selection)((*oH)[i])))
0180               continue;
0181             value_->push_back((expr)((*oH)[i]));
0182           } catch (...) {
0183             LogDebug("StringBranchHelper")
0184                 << "could not evaluate expression: " << B.expr() << " on class: " << B.className();
0185             value_->push_back(defaultValue);  //push a default value to not change the indexing
0186           }
0187         }
0188       }
0189       if (selection)
0190         delete selection;
0191     }
0192   }
0193 
0194 private:
0195   value value_;
0196 };
0197 
0198 class StringBasedNTupler : public NTupler {
0199 public:
0200   StringBasedNTupler(const edm::ParameterSet& iConfig) {
0201     edm::ParameterSet branchesPSet = iConfig.getParameter<edm::ParameterSet>("branchesPSet");
0202     std::vector<std::string> branches;
0203     branchesPSet.getParameterSetNames(branches);
0204     const std::string separator = branchesPSet.getUntrackedParameter<std::string>("separator", ":");
0205     for (uint b = 0; b != branches.size(); ++b) {
0206       edm::ParameterSet bPSet = branchesPSet.getParameter<edm::ParameterSet>(branches[b]);
0207       std::string className = "";
0208       if (bPSet.exists("class"))
0209         className = bPSet.getParameter<std::string>("class");
0210       else
0211         className = bPSet.getParameter<std::string>("Class");
0212       edm::InputTag src = edm::Service<InputTagDistributorService>()->retrieve("src", bPSet);
0213       edm::ParameterSet leavesPSet = bPSet.getParameter<edm::ParameterSet>("leaves");
0214       std::string order = "";
0215       if (bPSet.exists("order"))
0216         order = bPSet.getParameter<std::string>("order");
0217       std::string selection = "";
0218       if (bPSet.exists("selection"))
0219         selection = bPSet.getParameter<std::string>("selection");
0220       // do it one by one with configuration [string x = "x"]
0221       std::vector<std::string> leaves = leavesPSet.getParameterNamesForType<std::string>();
0222       std::string maxName = "N" + branches[b];
0223       for (uint l = 0; l != leaves.size(); ++l) {
0224         std::string leave_expr = leavesPSet.getParameter<std::string>(leaves[l]);
0225         std::string branchAlias = branches[b] + "_" + leaves[l];
0226 
0227         //add a branch manager for this expression on this collection
0228         branches_[maxName].push_back(TreeBranch(className, src, leave_expr, order, selection, maxName, branchAlias));
0229       }  //loop the provided leaves
0230 
0231       //do it once with configuration [vstring vars = { "x:x" ,... } ] where ":"=separator
0232       if (leavesPSet.exists("vars")) {
0233         std::vector<std::string> leavesS = leavesPSet.getParameter<std::vector<std::string>>("vars");
0234         for (uint l = 0; l != leavesS.size(); ++l) {
0235           uint sep = leavesS[l].find(separator);
0236           std::string name = leavesS[l].substr(0, sep);
0237           //removes spaces from the variable name
0238           /*uint*/ int space = name.find(' ');
0239           while (space != -1 /*std::string::npos*/) {
0240             std::string first = name.substr(0, space);
0241             std::string second = name.substr(space + 1);
0242             name = first + second;
0243             space = name.find(' ');
0244           }
0245           std::string expr = leavesS[l].substr(sep + 1);
0246           std::string branchAlias = branches[b] + "_" + name;
0247 
0248           //add a branch manager for this expression on this collection
0249           branches_[maxName].push_back(TreeBranch(className, src, expr, order, selection, maxName, branchAlias));
0250         }
0251       }
0252 
0253     }  //loop the provided branches
0254 
0255     ev_ = new uint64_t;
0256     run_ = new uint;
0257     lumiblock_ = new uint;
0258     experimentType_ = new uint;
0259     bunchCrossing_ = new uint;
0260     orbitNumber_ = new uint;
0261     weight_ = new float;
0262     model_params_ = new std::string;
0263 
0264     if (branchesPSet.exists("useTFileService"))
0265       useTFileService_ = branchesPSet.getParameter<bool>("useTFileService");
0266     else
0267       useTFileService_ = iConfig.getParameter<bool>("useTFileService");
0268 
0269     if (useTFileService_) {
0270       if (branchesPSet.exists("treeName")) {
0271         treeName_ = branchesPSet.getParameter<std::string>("treeName");
0272         ownTheTree_ = true;
0273       } else {
0274         treeName_ = iConfig.getParameter<std::string>("treeName");
0275         ownTheTree_ = false;
0276       }
0277     }
0278   }
0279 
0280   uint registerleaves(edm::ProducesCollector producesCollector) override {
0281     uint nLeaves = 0;
0282 
0283     if (useTFileService_) {
0284       edm::Service<TFileService> fs;
0285       if (ownTheTree_) {
0286         ownTheTree_ = true;
0287         tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
0288       } else {
0289         TObject* object = fs->file().Get(treeName_.c_str());
0290         if (!object) {
0291           ownTheTree_ = true;
0292           tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
0293         } else {
0294           tree_ = dynamic_cast<TTree*>(object);
0295           if (!tree_) {
0296             ownTheTree_ = true;
0297             tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
0298           } else
0299             ownTheTree_ = false;
0300         }
0301       }
0302 
0303       //reserve memory for the indexes
0304       indexDataHolder_ = new uint[branches_.size()];
0305       // loop the automated leafer
0306       Branches::iterator iB = branches_.begin();
0307       Branches::iterator iB_end = branches_.end();
0308       uint indexOfIndexInDataHolder = 0;
0309       for (; iB != iB_end; ++iB, ++indexOfIndexInDataHolder) {
0310         //create a branch for the index: an integer
0311         tree_->Branch(iB->first.c_str(), &(indexDataHolder_[indexOfIndexInDataHolder]), (iB->first + "/i").c_str());
0312         //loop on the "leaves"
0313         std::vector<TreeBranch>::iterator iL = iB->second.begin();
0314         std::vector<TreeBranch>::iterator iL_end = iB->second.end();
0315         for (; iL != iL_end; ++iL) {
0316           TreeBranch& b = *iL;
0317           //create a branch for the leaves: vector of floats
0318           TBranch* br = tree_->Branch(b.branchAlias().c_str(), "std::vector<float>", iL->dataHolderPtrAdress());
0319           br->SetTitle(b.branchTitle().c_str());
0320           nLeaves++;
0321         }
0322       }
0323 
0324       //extra leaves for event info.
0325       tree_->Branch("run", run_, "run/i");
0326       tree_->Branch("event", ev_, "event/l");
0327       tree_->Branch("lumiblock", lumiblock_, "lumiblock/i");
0328       tree_->Branch("experimentType", experimentType_, "experimentType/i");
0329       tree_->Branch("bunchCrossing", bunchCrossing_, "bunchCrossing/i");
0330       tree_->Branch("orbitNumber", orbitNumber_, "orbitNumber/i");
0331       tree_->Branch("weight", weight_, "weight/f");
0332       tree_->Branch("model_params", &model_params_);
0333 
0334     } else {
0335       // loop the automated leafer
0336       Branches::iterator iB = branches_.begin();
0337       Branches::iterator iB_end = branches_.end();
0338       for (; iB != iB_end; ++iB) {
0339         //the index. should produce it only once
0340         // a simple uint for the index
0341         producesCollector.produces<uint>(iB->first).setBranchAlias(iB->first);
0342         std::vector<TreeBranch>::iterator iL = iB->second.begin();
0343         std::vector<TreeBranch>::iterator iL_end = iB->second.end();
0344         for (; iL != iL_end; ++iL) {
0345           TreeBranch& b = *iL;
0346           //a vector of float for each leave
0347           producesCollector.produces<std::vector<float>>(b.branchName()).setBranchAlias(b.branchAlias());
0348           nLeaves++;
0349         }
0350       }
0351     }
0352     return nLeaves;
0353   }
0354 
0355   void fill(edm::Event& iEvent) override {
0356     //    if (!edm::Service<UpdaterService>()->checkOnce("StringBasedNTupler::fill")) return;
0357     //well if you do that, you cannot have two ntupler of the same type in the same job...
0358 
0359     if (useTFileService_) {
0360       // loop the automated leafer
0361       Branches::iterator iB = branches_.begin();
0362       Branches::iterator iB_end = branches_.end();
0363       uint indexOfIndexInDataHolder = 0;
0364       for (; iB != iB_end; ++iB, ++indexOfIndexInDataHolder) {
0365         std::vector<TreeBranch>::iterator iL = iB->second.begin();
0366         std::vector<TreeBranch>::iterator iL_end = iB->second.end();
0367         uint maxS = 0;
0368         for (; iL != iL_end; ++iL) {
0369           TreeBranch& b = *iL;
0370           // grab the vector of values from the interpretation of expression for the associated collection
0371           std::unique_ptr<std::vector<float>> branch(b.branch(iEvent));
0372           // calculate the maximum index size.
0373           if (branch->size() > maxS)
0374             maxS = branch->size();
0375           // transfer of (no copy) pointer to the vector of float from the std::unique_ptr to the tree data pointer
0376           b.assignDataHolderPtr(branch.release());
0377           // for memory tracing, object b is holding the data (not std::unique_ptr) and should delete it for each event (that's not completely optimum)
0378         }
0379         //assigne the maximum vector size for this collection
0380         indexDataHolder_[indexOfIndexInDataHolder] = maxS;
0381       }
0382 
0383       //fill event info.
0384       *run_ = iEvent.id().run();
0385       *ev_ = iEvent.id().event();
0386       //      *lumiblock_ = iEvent.id().luminosityBlock();
0387       *lumiblock_ = iEvent.luminosityBlock();
0388       *experimentType_ = iEvent.experimentType();
0389       *bunchCrossing_ = iEvent.bunchCrossing();
0390       *orbitNumber_ = iEvent.orbitNumber();
0391 
0392       *weight_ = 1;
0393       if (!iEvent.isRealData()) {
0394         edm::Handle<GenEventInfoProduct> wgeneventinfo;
0395         iEvent.getByLabel("generator", wgeneventinfo);
0396         *weight_ = wgeneventinfo->weight();
0397       }
0398 
0399       typedef std::vector<std::string>::const_iterator comments_const_iterator;
0400       //      using namespace edm;
0401 
0402       edm::Handle<LHEEventProduct> product;
0403       *model_params_ = "NULL";
0404       if (iEvent.getByLabel("source", product)) {
0405         comments_const_iterator c_begin = product->comments_begin();
0406         comments_const_iterator c_end = product->comments_end();
0407 
0408         for (comments_const_iterator cit = c_begin; cit != c_end; ++cit) {
0409           size_t found = (*cit).find("model");
0410           if (found != std::string::npos) {
0411             //std::cout << *cit << std::endl;
0412             *model_params_ = *cit;
0413           }
0414         }
0415       }
0416 
0417       if (ownTheTree_) {
0418         tree_->Fill();
0419       }
0420     } else {
0421       // loop the automated leafer
0422       Branches::iterator iB = branches_.begin();
0423       Branches::iterator iB_end = branches_.end();
0424       for (; iB != iB_end; ++iB) {
0425         std::vector<TreeBranch>::iterator iL = iB->second.begin();
0426         std::vector<TreeBranch>::iterator iL_end = iB->second.end();
0427         uint maxS = 0;
0428         for (; iL != iL_end; ++iL) {
0429           TreeBranch& b = *iL;
0430           std::unique_ptr<std::vector<float>> branch(b.branch(iEvent));
0431           if (branch->size() > maxS)
0432             maxS = branch->size();
0433           iEvent.put(std::move(branch), b.branchName());
0434         }
0435         //index should be put only once per branch. doe not really mattter for edm root files
0436         iEvent.put(std::make_unique<uint>(maxS), iB->first);
0437       }
0438     }
0439   }
0440 
0441   void callBack() {
0442     if (useTFileService_) {
0443       Branches::iterator iB = branches_.begin();
0444       Branches::iterator iB_end = branches_.end();
0445       //de-allocate memory now: allocated in branch(...) and released to the pointer.
0446       for (; iB != iB_end; ++iB) {
0447         std::vector<TreeBranch>::iterator iL = iB->second.begin();
0448         std::vector<TreeBranch>::iterator iL_end = iB->second.end();
0449         for (; iL != iL_end; ++iL) {
0450           TreeBranch& b = *iL;
0451           delete b.dataHolderPtr();
0452         }
0453       }
0454     }
0455   }
0456 
0457   ~StringBasedNTupler() override {
0458     delete indexDataHolder_;
0459     delete ev_;
0460     delete run_;
0461     delete lumiblock_;
0462     delete experimentType_;
0463     delete bunchCrossing_;
0464     delete orbitNumber_;
0465     delete weight_;
0466     delete model_params_;
0467   }
0468 
0469 protected:
0470   typedef std::map<std::string, std::vector<TreeBranch>> Branches;
0471   Branches branches_;
0472 
0473   bool ownTheTree_;
0474   std::string treeName_;
0475   uint* indexDataHolder_;
0476 
0477   //event info
0478   uint64_t* ev_;
0479   uint* run_;
0480   uint* lumiblock_;
0481   uint* experimentType_;
0482   uint* bunchCrossing_;
0483   uint* orbitNumber_;
0484   float* weight_;
0485   std::string* model_params_;
0486 };
0487 
0488 #endif