Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:11

0001 #ifndef TagProbeFitter_h
0002 #define TagProbeFitter_h
0003 
0004 #include "TFile.h"
0005 #include "TChain.h"
0006 #include "TGraphAsymmErrors.h"
0007 #include "RooWorkspace.h"
0008 #include "RooFitResult.h"
0009 #include "RooDataSet.h"
0010 
0011 class TagProbeFitter {
0012 public:
0013   ///construct the fitter with the inputFileName, inputDirectoryName, inputTreeName, outputFileName and specify wether to save the workspace with data for each bin
0014   TagProbeFitter(const std::vector<std::string>& inputFileNames,
0015                  std::string inputDirectoryName,
0016                  std::string inputTreeName,
0017                  std::string outputFileName,
0018                  int numCPU = 1,
0019                  bool saveWorkspace_ = false,
0020                  bool floatShapeParameters = true,
0021                  const std::vector<std::string>& fixVars_ = std::vector<std::string>());
0022 
0023   ///destructor closes the files
0024   ~TagProbeFitter();
0025 
0026   ///adds a new real variable to the set of variables describing the data in the tree
0027   bool addVariable(std::string variableName, std::string title, double low, double hi, std::string units);
0028 
0029   ///adds a new category variable to the set of variables describing the data in the tree; "expression" is parsed by factory()
0030   bool addCategory(std::string categoryName, std::string title, std::string expression);
0031 
0032   ///adds a new category based on a cut
0033   bool addExpression(std::string expressionName,
0034                      std::string title,
0035                      std::string expression,
0036                      const std::vector<std::string>& arguments);
0037 
0038   ///adds a new category based on a cut
0039   bool addThresholdCategory(std::string categoryName, std::string title, std::string varName, double cutValue);
0040 
0041   ///add a new PDF to the list of available PDFs; "pdfCommands" are parsed by factory().
0042   /// the user needs to define efficiency[0.9,0,1] for the initial value, "signal" PDF, "backgroundPass" PDF and "backgroundFail" PDF
0043   void addPdf(std::string pdfName, std::vector<std::string>& pdfCommands);
0044 
0045   ///set a list of variables to fix during first fit iteration. If the list is empty, do one iteration.
0046   void addFixedVariavles(const std::vector<std::string>&);
0047 
0048   ///calculate the efficiency for a particular binning of the data; it saves everything in the directory "dirName", uses the previously defined PDF with name "pdfName"
0049   std::string calculateEfficiency(std::string dirName,
0050                                   std::string efficiencyCategory,
0051                                   std::string efficiencyState,
0052                                   std::vector<std::string>& unbinnedVariables,
0053                                   std::map<std::string, std::vector<double> >& binnedReals,
0054                                   std::map<std::string, std::vector<std::string> >& binnedCategories,
0055                                   std::vector<std::string>& binToPDFmap) {
0056     std::vector<std::string> efficiencyCategories(1, efficiencyCategory);
0057     std::vector<std::string> efficiencyStates(1, efficiencyState);
0058     return calculateEfficiency(
0059         dirName, efficiencyCategories, efficiencyStates, unbinnedVariables, binnedReals, binnedCategories, binToPDFmap);
0060   }
0061 
0062   std::string calculateEfficiency(std::string dirName,
0063                                   const std::vector<std::string>& efficiencyCategories,
0064                                   const std::vector<std::string>& efficiencyStates,
0065                                   std::vector<std::string>& unbinnedVariables,
0066                                   std::map<std::string, std::vector<double> >& binnedReals,
0067                                   std::map<std::string, std::vector<std::string> >& binnedCategories,
0068                                   std::vector<std::string>& binToPDFmap);
0069 
0070   /// set if to do a binned fit
0071   void setBinnedFit(bool binned, int bins = 0) {
0072     binnedFit = binned;
0073     massBins = bins;
0074   }
0075 
0076   /// set number of bins to use when making the plots; 0 = automatic
0077   void setBinsForMassPlots(int bins);
0078 
0079   //// turn on or off the saving of distribution plots)
0080   void setSaveDistributionsPlot(bool saveDistributionsPlot_) { doSaveDistributionsPlot = saveDistributionsPlot_; }
0081 
0082   /// set a variable to be used as weight for a dataset. empty string means no weights.
0083   void setWeightVar(const std::string& weight);
0084 
0085   /// suppress most of the output from RooFit and Minuit
0086   void setQuiet(bool quiet_ = true);
0087 
0088   /// split mode - use it for very large input files (slower that non-split mode, which is the default)
0089   ///    0 - import input TTree as a whole (non-split mode)
0090   ///    non-zero value - use split reading mode and read specified number of events for each iteration
0091   void setSplitMode(unsigned int nevents);
0092 
0093 protected:
0094   ///pointer to the input TTree Chain of data
0095   TChain* inputTree;
0096 
0097   ///pointer to the output file
0098   TFile* outputFile;
0099 
0100   ///pointer to the TDirectory in the output file that is the root directory for this fitter
0101   TDirectory* outputDirectory;
0102 
0103   ///number of CPUs to use for the fit
0104   int numCPU;
0105 
0106   ///save distribution plots
0107   bool doSaveDistributionsPlot;
0108 
0109   ///the default option wether to save the workspace for each bin
0110   bool saveWorkspace;
0111 
0112   ///do binned fit; 0 = automatic, 1 = yes, -1 = no. d
0113   int binnedFit;
0114 
0115   ///number of bins to use in mass shape plots; 0 = automatic
0116   int massBins;
0117 
0118   ///the map of pdf names to the vector of commands to build the pdf
0119   std::map<std::string, std::vector<std::string> > pdfs;
0120 
0121   ///the set of variables describing the data in the input TTree
0122   RooArgSet variables;
0123 
0124   ///weight variable (or empy string for no weights)
0125   std::string weightVar;
0126 
0127   ///expressions computed almost on the fly
0128   //RooArgSet expressionVars;
0129   std::vector<std::pair<std::pair<std::string, std::string>, std::pair<std::string, std::vector<std::string> > > >
0130       expressionVars;
0131 
0132   // Threshold categories have to be created at the last minute
0133   // so we store just the info about them
0134   std::vector<std::pair<std::pair<std::string, std::string>, std::pair<std::string, double> > > thresholdCategories;
0135 
0136   ///list of variables fo fix (see below)
0137   std::vector<std::string> fixVars;
0138   std::vector<double> fixVarValues;
0139 
0140   ///release some variables before the fit in each bin
0141   ///if set to "false" will fit all dataset to get values of specified variables and then fit all bins having them fixed
0142   ///if set to "true" (default) will not fit all dataset, just each bin with fixed and then released variables
0143   bool floatShapeParameters;
0144 
0145   ///a RooWorkspace object to parse input parameters with ".factory()"
0146   RooWorkspace parameterParser;
0147 
0148   /// suppress most printout
0149   bool quiet;
0150 
0151   /// split mode - use it for very large input files (slower that non-split mode, which is the default)
0152   ///    0 - import input TTree as a whole (non-split mode)
0153   ///    non-zero value - use split reading mode and read specified number of events for each iteration
0154   unsigned int split_mode;
0155 
0156   ///fix or release variables selected by user
0157   void varFixer(RooWorkspace* w, bool fix);
0158   ///store values in the vector
0159   void varSaver(RooWorkspace* w);
0160   ///restore variables's values for fit starting point
0161   void varRestorer(RooWorkspace* w);
0162 
0163   ///calculate the efficiecny with a simulataneous maximum likelihood fit in the dataset found in the workspace with PDF pdfName
0164   void doFitEfficiency(RooWorkspace* w, std::string pdfName, RooRealVar& efficiency);
0165 
0166   ///calculate the efficiecny with side band substraction in the dataset found in the workspace
0167   void doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency);
0168 
0169   ///calculate the efficiecny by counting in the dataset found in the workspace
0170   void doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency);
0171 
0172   ///creates the simultaneous PDF in the workspace according to the "pdfCommands"
0173   void createPdf(RooWorkspace* w, std::vector<std::string>& pdfCommands);
0174 
0175   ///sets initial values of the PDF parameters based on the data available in the workspace
0176   void setInitialValues(RooWorkspace* w);
0177 
0178   ///saves the fit canvas
0179   void saveFitPlot(RooWorkspace* w);
0180 
0181   ///saves the distributions canvas
0182   void saveDistributionsPlot(RooWorkspace* w);
0183 
0184   ///saves the efficiency plots
0185   void saveEfficiencyPlots(RooDataSet& eff,
0186                            const TString& effName,
0187                            RooArgSet& binnedVariables,
0188                            RooArgSet& mappedCategories);
0189 
0190   ///makes the 1D plot
0191   void makeEfficiencyPlot1D(RooDataSet& eff,
0192                             RooRealVar& v,
0193                             const TString& plotName,
0194                             const TString& plotTitle,
0195                             const TString& effName,
0196                             const char* catName = nullptr,
0197                             int catIndex = -1);
0198 
0199   ///makes the 2D plot
0200   void makeEfficiencyPlot2D(RooDataSet& eff,
0201                             RooRealVar& v1,
0202                             RooRealVar& v2,
0203                             const TString& plotName,
0204                             const TString& plotTitle,
0205                             const TString& effName,
0206                             const char* catName = nullptr,
0207                             int catIndex = -1);
0208 };
0209 
0210 #endif  //TagProbeFitter_h