RecoParticleFlow/test/compare.py

0001 #!/usr/bin/env python3
0002 import sys
0003 import os
0004 import ROOT
0005 import argparse
0006
0007 # This is an example of plotting the standard tracking validation
0008 # plots from an explicit set of DQM root files.
0009
0010 from Validation.RecoTrack.plotting.validation import SimpleValidation, SimpleSample
0011
0012 from Validation.RecoTrack.plotting.plotting import Subtract, FakeDuplicate, CutEfficiency, Transform, AggregateBins, ROC, Plot, PlotEmpty, PlotGroup, PlotOnSideGroup, PlotFolder, Plotter
0013 from Validation.RecoTrack.plotting.html import PlotPurpose
0014
0015 from Validation.RecoParticleFlow.defaults_cfi import ptbins, etabins, response_distribution_name, muLowOffset, muHighOffset, npvLowOffset, npvHighOffset, candidateType, offset_name
0016 from offsetStack import offsetStack
0017
0018 def parse_sample_string(ss):
0019     spl = ss.split(":")
0020     if not (len(spl) >= 3):
0021         raise Exception("Sample must be in the format name:DQMfile1.root:DQMfile2.root:...")
0022
0023     name = spl[0]
0024     files = spl[1:]
0025
0026     #check that all supplied files are actually ROOT files
0027     for fi in files:
0028         print("Trying to open DQM file {0} for sample {1}".format(fi, name))
0029         if not os.path.isfile(fi):
0030             raise Exception("Could not read DQM file {0}, it does not exist".format(fi))
0031         tf = ROOT.TFile(fi)
0032         if not tf:
0033             raise Exception("Could not read DQM file {0}, it's not a ROOT file".format(fi))
0034         #KH d = tf.Get("DQMData/Run 1/Physics/Run summary")
0035         d = tf.Get("DQMData/Run 1/ParticleFlow/Run summary")
0036         if not d:
0037             raise Exception("Could not read DQM file {0}, it's does not seem to be a harvested DQM file".format(fi))
0038     return name, files
0039
0040 def parse_plot_string(ss):
0041     spl = ss.split(":")
0042     if not (len(spl) >= 3):
0043         raise Exception("Plot must be in the format folder:name:hist1:hist2:...")
0044
0045     folder = spl[0]
0046     name = spl[1]
0047     histograms = spl[2:]
0048
0049     return folder, name, histograms
0050
0051 def parse_args():
0052     parser = argparse.ArgumentParser()
0053     parser.add_argument("-s", "--sample", type=str, action='append',
0054         required=True,
0055         help="DQM files to compare for a single sample, in the format 'name:file1.root:file2.root:...:fileN.root'",
0056         #default=[
0057         #    "QCD:DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root:DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root"
0058         #]
0059     )
0060     parser.add_argument("-p", "--plots",
0061         type=str, action='append',
0062         required=False,
0063         help="Plots to put on a single canvas, in the format 'folder:name:plot1:plot2:...:plotN'",
0064         default = [],
0065         #default=[
0066         #    "JetResponse:reso_dist_10_24:reso_dist_10_24_eta05:reso_dist_10_24_eta13"
0067         #]
0068     )
0069     parser.add_argument("--doResponsePlots",
0070         action='store_true',
0071         required=False,
0072         help="If enabled, do all jet response plots"
0073     )
0074     parser.add_argument("--doOffsetPlots",
0075         action='store_true',
0076         required=False,
0077         help="If enabled, do all offset plots"
0078     )
0079     parser.add_argument("--doMETPlots",
0080         action='store_true',
0081         required=False,
0082         help="If enabled, do all JetMET plots"
0083     )
0084     parser.add_argument("--doPFCandPlots",
0085         action='store_true',
0086         required=False,
0087         help="If enabled, do all PFCandidate plots"
0088     )
0089
0090     parser.add_argument( "--offsetVar", type=str,   action='store', default="npv", help="variable to bin offset eT" )
0091     parser.add_argument( "--offsetDR",  type=float, action='store', default=0.4,   help="offset deltaR value" )
0092     args = parser.parse_args()
0093
0094     #collect all the SimpleSample objects
0095     samples = []
0096     plots = []
0097
0098     sample_strings = args.sample
0099     for ss in sample_strings:
0100         name, files = parse_sample_string(ss)
0101         samp = SimpleSample(name, name, [(fn, fn.split('/')[-2]) for fn in files])
0102         samples += [samp]
0103
0104     for ss in args.plots:
0105         folder, name, histograms = parse_plot_string(ss)
0106         plots += [(folder, name, histograms)]
0107
0108
0109     # This needs to be also changed whenever changing binning
0110     if args.doResponsePlots:
0111         # Needs to add extra folders here if the DQM files have other folders of histograms
0112         JetFolderDirs = ["JetResponse/slimmedJets/JEC", "JetResponse/slimmedJets/noJEC", "JetResponse/slimmedJetsPuppi/JEC", "JetResponse/slimmedJetsPuppi/noJEC"]
0113
0114         for JetFolderDir in JetFolderDirs:
0115             plots += [(JetFolderDir, "efficiency_pt", ["efficiency_eta05", "efficiency_eta13",
0116                                                   "efficiency_eta21","efficiency_eta25","efficiency_eta30","efficiency_eta50"])]
0117             plots += [(JetFolderDir, "purity_pt", ["purity_eta05", "purity_eta13",
0118                                                   "purity_eta21","purity_eta25","purity_eta30","purity_eta50"])]
0119             plots += [(JetFolderDir, "ratePUJet_pt", ["ratePUJet_eta05", "ratePUJet_eta13",
0120                                                   "ratePUJet_eta21","ratePUJet_eta25","ratePUJet_eta30","ratePUJet_eta50"])]
0121             plots += [(JetFolderDir, "reso_pt", ["preso_eta05", "preso_eta13",
0122                                                   "preso_eta21","preso_eta25","preso_eta30","preso_eta50"])]
0123             plots += [(JetFolderDir, "reso_pt_rms", ["preso_eta05_rms",
0124                                                       "preso_eta13_rms","preso_eta21_rms","preso_eta25_rms","preso_eta30_rms",
0125                                                       "preso_eta50_rms"])]
0126             plots += [(JetFolderDir, "response_pt", ["presponse_eta05",
0127                                                       "presponse_eta13", "presponse_eta21", "presponse_eta25", "presponse_eta30",
0128                                                       "presponse_eta50"])]
0129             plots += [(JetFolderDir, "response_pt_mean", ["presponse_eta05_mean",
0130                                                           "presponse_eta13_mean", "presponse_eta21_mean",
0131                                                           "presponse_eta25_mean", "presponse_eta30_mean",
0132                                                           "presponse_eta50_mean"])]
0133             plots += [(JetFolderDir, "response_pt_median", ["presponse_eta05_median",
0134                                                             "presponse_eta13_median", "presponse_eta21_median", "presponse_eta25_median",
0135                                                             "presponse_eta30_median", "presponse_eta50_median"])]
0136             for iptbin in range(len(ptbins)-1):
0137                 pthistograms = []
0138                 for ietabin in range(len(etabins)-1):
0139                     pthistograms += [response_distribution_name(iptbin, ietabin)]
0140                 plots += [(JetFolderDir, "response_{0:.0f}_{1:.0f}".format(ptbins[iptbin], ptbins[iptbin+1]), pthistograms)]
0141
0142     if args.doOffsetPlots:
0143         if args.offsetVar == "npv" :
0144             varHigh, varLow = npvHighOffset, npvLowOffset
0145         else :
0146             varHigh, varLow = muHighOffset, muLowOffset
0147         for ivar in range( varLow, varHigh ) :
0148             offsetHists = []
0149             for itype in candidateType :
0150                 offsetHists += [ offset_name( args.offsetVar, ivar, itype ) ]
0151             plots += [("Offset/{0}Plots/{0}{1}".format(args.offsetVar, ivar), "{0}{1}".format(args.offsetVar, ivar), offsetHists)]
0152
0153     if args.doMETPlots:
0154         doMETPlots(files, plots)
0155
0156
0157     if args.doPFCandPlots:
0158         doPFCandPlots(files, plots)
0159
0160     return samples, plots, args.doOffsetPlots, args.offsetVar, args.offsetDR, args.doPFCandPlots
0161
0162 # function that does METValidation from JetMET
0163 def doMETPlots(files, plots):
0164     #get the names of the histograms
0165     #MetValidation
0166     METHistograms = []
0167     f = ROOT.TFile(files[0])
0168     d = f.Get("DQMData/Run 1/JetMET/Run summary/METValidation/slimmedMETsPuppi")
0169     for i in d.GetListOfKeys():
0170         METHistograms.append([i.GetName()])
0171     # append plots
0172     METFolderDirs = ["METValidation/slimmedMETs","METValidation/slimmedMETsPuppi"]
0173     for METFolderDir in METFolderDirs:
0174         for  METHistogram in  METHistograms:
0175             plots += [(METFolderDir, "", METHistogram)]
0176
0177     #JetValidation
0178     JetHistograms = []
0179     d = f.Get("DQMData/Run 1/JetMET/Run summary/JetValidation/slimmedJets")
0180     for i in d.GetListOfKeys():
0181         JetHistograms.append([i.GetName()])
0182     JetValFolderDirs = ["JetValidation/slimmedJets", "JetValidation/slimmedJetsAK8", "JetValidation/slimmedJetsPuppi"]
0183     for JetValFolderDir in JetValFolderDirs:
0184         for JetHistogram in JetHistograms:
0185             plots += [(JetValFolderDir, "", JetHistogram)]
0186
0187 # does PFCandidate Plots
0188 def doPFCandPlots(files, plots):
0189     #we are going to hard code the end part of the histogram names because there's only 4
0190     hist_list = ["Charge", "Eta", "Phi", "Log10Pt", "PtLow","PtMid", "PtHigh"]
0191     f = ROOT.TFile(files[0])
0192     d = f.Get("DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates")
0193     #get the name of the folders, which can use to complete plot name as well probably
0194     PFFolderNames = []
0195
0196     for i in d.GetListOfKeys():
0197         PFFolderNames.append(i.GetName())
0198
0199     for PFFolderName in PFFolderNames:
0200         for hist in hist_list:
0201             plots += [(PFFolderName, "", [PFFolderName + hist])]
0202
0203
0204 def addPlots(plotter, folder, name, section, histograms, opts, Offset=False):
0205     folders = [folder]
0206     #plots = [PlotGroup(name, [Plot(h, **opts) for h in histograms])]
0207     #KH print plots
0208     if Offset :
0209         plots = [PlotGroup(name, [Plot(h, **opts) for h in histograms])]
0210         plotter.append("Offset", folders, PlotFolder(*plots, loopSubFolders=False, page="offset", section=section))
0211     elif "JetResponse" in folder :
0212         plots = [PlotGroup(name, [Plot(h, **opts) for h in histograms])]
0213         plotter.append("ParticleFlow/" + section, folders, PlotFolder(*plots, loopSubFolders=False, page="pf", section=section))
0214         for plot in plots:
0215             plot.setProperties(ncols=3)
0216             plot.setProperties(legendDw=-0.68)
0217             plot.setProperties(legendDh=0.005)
0218             plot.setProperties(legendDy=0.24)
0219             plot.setProperties(legendDx=0.05)
0220     elif "JetMET" in folder:
0221         for h in histograms:
0222             plots = [PlotGroup(h, [Plot(h, **opts)])]
0223         for plot in plots:
0224             plot.setProperties(ncols=1)
0225             plot.setProperties(legendDw=-0.5)
0226             plot.setProperties(legendDh=0.01)
0227             plot.setProperties(legendDy=0.24)
0228             plot.setProperties(legendDx=0.05)
0229         plotter.append("JetMET" + section, folders, PlotFolder(*plots, loopSubFolders=False, page="JetMET", section=section))
0230     if "PackedCandidates" in folder:
0231         for h in histograms:
0232             if ("PtMid" in h or "PtHigh" in h):
0233                 plots = [PlotGroup(h, [Plot(h, ymin = pow(10,-1), ylog = True)])]
0234             else:
0235                 plots = [PlotGroup(h, [Plot(h, **opts)])]
0236         for plot in plots:
0237             plot.setProperties(legendDw=-0.5)
0238             plot.setProperties(legendDh=0.01)
0239             plot.setProperties(legendDy=0.24)
0240             plot.setProperties(legendDx=0.05)
0241         plotter.append("ParticleFlow/PackedCandidates/" + section, folders, PlotFolder(*plots, loopSubFolders=False, page="PackedCandidates", section= section))
0242
0243
0244 def main():
0245
0246     # plot-dependent style options
0247     # style options can be found from Validation/RecoTrack/python/plotting/plotting.py
0248     styledict_resolution = {"xlog": True, "xgrid":False, "ygrid":False,
0249         "xtitle":"GenJet pT (GeV)", "ytitle":"Jet pT resolution",
0250         "xtitleoffset":7.7,"ytitleoffset":3.8,"adjustMarginLeft":0.00}
0251
0252     styledict_response = {"xlog": True, "xgrid":False, "ygrid":False,
0253         "xtitle":"GenJet pT (GeV)", "ytitle":"Jet response",
0254         "xtitleoffset":7.7,"ytitleoffset":3.8,"adjustMarginLeft":0.00}
0255
0256     styledict_rate = {"xlog": True, "xgrid":False, "ygrid":False,
0257         "xtitle":"RecoJet pT (GeV)", "ytitle":"PU Jet rate (#PUJets/event)",
0258         "xtitleoffset":7.7,"ytitleoffset":3.8,"adjustMarginLeft":0.00}
0259
0260     styledict_efficiency = {"xlog": True, "xgrid":False, "ygrid":False,
0261         "xtitle":"GenJet pT (GeV)", "ytitle":"Efficiency",
0262         "xtitleoffset":7.7,"ytitleoffset":3.8,"adjustMarginLeft":0.00}
0263
0264     styledict_purity = {"xlog": True, "xgrid":False, "ygrid":False,
0265         "xtitle":"RecoJet pT (GeV)", "ytitle":"Purity",
0266         "xtitleoffset":7.7,"ytitleoffset":3.8,"adjustMarginLeft":0.00}
0267
0268     plot_opts = {
0269         "efficiency_pt": styledict_efficiency,
0270         "purity_pt": styledict_purity,
0271         "ratePUJet_pt": styledict_rate,
0272         "reso_pt": styledict_resolution,
0273         "reso_pt_rms": styledict_resolution,
0274         "response_pt": styledict_response,
0275         "response_pt_mean": styledict_response,
0276         "response_pt_median": styledict_response,
0277     }
0278     for iptbin in range(len(ptbins)-1):
0279         plot_opts["response_{0:.0f}_{1:.0f}".format(ptbins[iptbin], ptbins[iptbin+1])] = {"stat": True}
0280
0281     samples, plots, doOffsetPlots, offsetVar, offsetDR, doPFCandPlots = parse_args()
0282
0283     plotter = Plotter()
0284
0285
0286     for folder, name, histograms in plots:
0287         opts = plot_opts.get(name, {})
0288
0289         #fullfolder =  "DQMData/Run 1/Physics/Run summary/{0}".format(folder)
0290         #fullfolder =  "DQMData/Run 1/ParticleFlow/Run summary/{0}".format(folder)
0291         fullJetFolder = "DQMData/Run 1/ParticleFlow/Run summary/{0}".format(folder)
0292         fullMETFolder = "DQMData/Run 1/JetMET/Run summary/{0}".format(folder)
0293         fullPFCandFolder = "DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/{0}".format(folder)
0294         print("Booking histogram group {0}={1} from folder {2}".format(name, histograms, folder))
0295         if "Offset/" in folder:
0296             opts = {'xtitle':'Default', 'ytitle':'Default'}
0297             addPlots(plotter, fullJetFolder, name, folder, histograms, opts, True)
0298         if "JetResponse" in folder:
0299             addPlots(plotter, fullJetFolder, name, folder, histograms, opts)
0300         if "METValidation" in folder or "JetValidation" in folder:
0301             addPlots(plotter, fullMETFolder, name, folder, histograms, opts)
0302         if doPFCandPlots:
0303             addPlots(plotter, fullPFCandFolder, name, folder, histograms, opts)
0304
0305     outputDir = "plots" # Plot output directory
0306     description = "Simple ParticleFlow comparison"
0307
0308     plotterDrawArgs = dict(
0309         separate=False, # Set to true if you want each plot in it's own canvas
0310     #    ratio=False,   # Uncomment to disable ratio pad
0311         saveFormat=".png",
0312     )
0313
0314
0315     val = SimpleValidation(samples, outputDir)
0316     report = val.createHtmlReport(validationName=description)
0317     val.doPlots([plotter], plotterDrawArgs=plotterDrawArgs)
0318
0319     report.write()
0320
0321     #add tdr-style stack plots to offset html file
0322     if doOffsetPlots :
0323         offsetDir = "OffsetStacks"
0324         fullOffsetDir = os.path.join( outputDir, offsetDir )
0325         os.makedirs( fullOffsetDir )
0326
0327         for s in samples :
0328             offFile = open( outputDir + "/" + s.label() + "_offset.html", "r")
0329             lines = offFile.readlines()
0330             offFile.close()
0331             for f in s.files() :
0332                 fname = f.split('/')[-2]
0333                 outName = offsetStack( [(fname,f)], offsetVar, offsetDR, fullOffsetDir )
0334                 outName = outName.replace("plots/", "") #KH: This "plots" look redundant and causes trouble for .html. Stripping it off.
0335                 addLine( outName, lines )
0336
0337                 for f2 in s.files() :
0338                     if f == f2 : continue
0339                     fname2 = f2.split('/')[-2]
0340                     outName = offsetStack( [(fname,f), (fname2,f2)], offsetVar, offsetDR, fullOffsetDir )
0341                     outName = outName.replace("plots/", "") #KH: This "plots" look redundant and causes trouble for .html. Stripping it off.
0342                     addLine( outName, lines )
0343
0344             offFile = open( outputDir + "/" + s.label() + "_offset.html", "w")
0345             lines = "".join(lines)
0346             offFile.write(lines)
0347             offFile.close()
0348
0349 def addLine(name, oldLines) :
0350     newLines = [
0351         '   <td><a href="{0}">{0}</a></td>\n'.format(name),
0352         '  <br/>\n',
0353         '  <br/>\n'
0354     ]
0355     oldLines[8:len(newLines)] = newLines
0356
0357 if __name__ == "__main__":
0358     main()