Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:31:49

0001 #! /usr/bin/env python3
0002 ################################################################################
0003 # RelMon: a tool for automatic Release Comparison                              
0004 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
0005 #
0006 #
0007 #                                                                              
0008 # Danilo Piparo CERN - danilo.piparo@cern.ch                                   
0009 #                                                                              
0010 ################################################################################
0011 
0012 from __future__ import print_function
0013 from builtins import range
0014 from optparse import OptionParser
0015 
0016 import os
0017 import pickle
0018 import glob
0019 from re import search
0020 from subprocess import call,PIPE
0021 from multiprocessing import Pool
0022 from sys import exit
0023 
0024 import sys
0025 argv=sys.argv
0026 sys.argv=[]
0027 if "RELMON_SA" in os.environ:
0028   import definitions as definitions
0029   from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0030   from dirstructure import Directory
0031   from directories2html import directory2html,make_summary_table
0032   from utils import ask_ok, unpickler, make_files_pairs
0033 else:
0034   import Utilities.RelMon.definitions as definitions
0035   from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0036   from Utilities.RelMon.dirstructure import Directory
0037   from Utilities.RelMon.directories2html import directory2html,make_summary_table
0038   from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
0039 sys.argv=argv
0040 
0041 #-------------------------------------------------------------------------------
0042 
0043 def name2sample(filename):
0044   namebase=os.path.basename(filename)
0045   return namebase.split("__")[1]
0046 
0047 def name2version(filename):
0048   namebase=os.path.basename(filename)
0049   return namebase.split("__")[2]
0050   
0051 def name2run(filename):
0052   namebase=os.path.basename(filename)
0053   return namebase.split("__")[0].split("_")[2]  
0054 
0055 def name2runskim(filename):
0056   run=name2run(filename)
0057   skim=name2version(filename).split("_")[-1]
0058   # remove skim version
0059   if "-v" in skim:
0060     skim = skim[:skim.rfind('-v')]
0061   return "%s_%s"%(run,skim)
0062 
0063 def name2globaltag(filename):
0064   namebase = os.path.basename(filename)
0065   return namebase.split("__")[2].split("-")[1] #returns GT from file basename
0066 
0067 #-------------------------------------------------------------------------------  
0068 
0069 def guess_params(ref_filenames,test_filenames):
0070   
0071   if len(ref_filenames)*len(test_filenames)==0:
0072     print("Empty reference and test filenames lists!")
0073     return [],"",""
0074   
0075   samples=[]
0076   ref_versions=[]
0077   test_versions=[]
0078     
0079   for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
0080     
0081     ref_sample=name2sample(ref)
0082     ref_version=name2version(ref)
0083     test_sample=name2sample(test)
0084     test_version=name2version(test)
0085 
0086     print("  ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample))
0087           
0088     if ref_sample!=test_sample:
0089       print("Files %s and %s do not seem to be relative to the same sample." %(ref, test))
0090     #  exit(2)
0091 
0092     # Slightly modify for data
0093     if search("20[01]",ref_version)!=None:
0094       ref_sample+=ref_version.split("_")[-1]
0095     samples.append(ref_sample)
0096  
0097     # append the versions
0098     ref_versions.append(ref_version)
0099     test_versions.append(test_version)
0100 
0101   # Check if ref and test versions are always the same.
0102   ref_versions=list(set(ref_versions))
0103   test_versions=list(set(test_versions))
0104   
0105   #for versions in ref_versions,test_versions:
0106     #if len(versions)!=1:
0107       #print "More than one kind of CMSSW version selected (%s)" %versions
0108       #exit(2)  
0109   
0110   cmssw_version1=ref_versions[0]
0111   cmssw_version2=test_versions[0]
0112   
0113   return samples,cmssw_version1,cmssw_version2
0114   
0115 
0116 #-------------------------------------------------------------------------------
0117 
0118 def check_root_files(names_list):
0119   for name in names_list:
0120     if not name.endswith(".root"):
0121       print("File %s does not seem to be a rootfile. Please check.")
0122       return False
0123   return True
0124 
0125 #-------------------------------------------------------------------------------
0126 
0127 def add_to_blacklist(blacklist, pattern, target, blist_piece):
0128   int_pattern=pattern
0129   int_pattern=pattern.strip()  
0130   flip_condition=False
0131   if int_pattern[0]=='!':
0132     int_pattern=int_pattern[1:]
0133     flip_condition=True
0134 
0135   condition = search(int_pattern,target)!=None
0136   if flip_condition:
0137     condition = not condition
0138 
0139   if condition:
0140     #print "Found %s in %s" %(pattern,target)
0141     if blacklist!="": # if not the first, add a comma
0142       blacklist+=","
0143     blacklist+=blist_piece
0144   #else:
0145     #print "  NOT Found %s in %s" %(pattern,target)
0146   return blacklist
0147 
0148 #-------------------------------------------------------------------------------
0149 
0150 def guess_blacklists(samples,ver1,ver2,hlt):
0151   """Build a blacklist for each sample accordind to a set of rules
0152   """
0153   blacklists={}
0154   for sample in samples:
0155     blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
0156     
0157     # HLT
0158     if hlt: #HLT
0159       blacklists[sample]+=",AlCaEcalPi0@2"
0160       if not search("2010+|2011+|2012+|2015+",ver1):
0161         print("We are treating MC files for the HLT")
0162         for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
0163           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0164       else:
0165         print("We are treating Data files for the HLT")    
0166         # at the moment it does not make sense since hlt is ran already
0167     
0168     else: #RECO
0169       #Monte Carlo
0170       if not search("2010+|2011+|2012+",ver1):
0171         print("We are treating MC files")        
0172         
0173         for pattern,blist in definitions.mc_pattern_blist_pairs:
0174           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0175 #          print "MC RECO"
0176           #print blacklists[sample]
0177           
0178       # Data
0179       else:
0180         print("We are treating Data files:")      
0181         blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"                                         
0182         for pattern,blist in definitions.data_pattern_blist_pairs:
0183           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
0184 #         print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
0185 
0186 
0187   return blacklists
0188 
0189 #-------------------------------------------------------------------------------  
0190 
0191 def get_roofiles_in_dir(directory):  
0192   print(directory)
0193   files_list = [s for s in os.listdir(directory) if s.endswith(".root")]
0194   files_list_path=map(lambda s: os.path.join(directory,s), files_list)
0195   
0196   return files_list_path
0197   
0198 #-------------------------------------------------------------------------------  
0199 
0200 def get_filenames_from_pool(all_samples):
0201   
0202   # get a list of the files
0203   files_list=get_roofiles_in_dir(all_samples)
0204   
0205   if len(files_list)==0:
0206     print("Zero files found in directory %s!" %all_samples)
0207     return [],[]
0208   
0209   # Are they an even number?
0210   for name in files_list:
0211     print("* ",name)  
0212   if len(files_list)%2!=0:
0213     print("The numbuer of file is not even... Trying to recover a catastrophe.")
0214     
0215   files_list=make_files_pairs(files_list)
0216   
0217   # Try to couple them according to their sample
0218   ref_filenames=[]
0219   test_filenames=[]
0220   #files_list.sort(key=name2version)
0221   #files_list.sort(key=name2sample) 
0222   #files_list.sort(key=name2run)
0223   for iname in range(len(files_list)):
0224     filename=files_list[iname]
0225     if iname%2==0:
0226       ref_filenames.append(filename)
0227     else:
0228       test_filenames.append(filename)
0229       
0230   print("The guess would be the following:")
0231   for ref,test in zip(ref_filenames,test_filenames):
0232     refbasedir=os.path.dirname(ref)
0233     testbasedir=os.path.dirname(test)
0234     dir_to_print=refbasedir
0235     if refbasedir!=testbasedir:
0236       dir_to_print="%s and %s" %(refbasedir,testbasedir)
0237     print("* Directory: %s " %dir_to_print)
0238     refname=os.path.basename(ref)
0239     testname=os.path.basename(test)
0240     print("  o %s" %refname)
0241     print("  o %s" %testname)
0242   
0243   #is_ok=ask_ok("Is that ok?")
0244   #if not is_ok:
0245     #print "Manual input needed then!"
0246     #exit(2)
0247       
0248   
0249   return ref_filenames,test_filenames
0250   
0251 
0252 #-------------------------------------------------------------------------------
0253 
0254 def get_clean_fileanames(ref_samples,test_samples):
0255   # Process the samples starting from the names
0256   ref_filenames=list(map(lambda s:s.strip(),ref_samples.split(",")))
0257   test_filenames=list(map(lambda s:s.strip(),test_samples.split(",")))
0258 
0259   if len(ref_filenames)!=len(test_filenames):
0260     print("The numebr of reference and test files does not seem to be the same. Please check.")
0261     exit(2)
0262 
0263   if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
0264     exit(2)
0265   return ref_filenames,test_filenames
0266 
0267 #-------------------------------------------------------------------------------
0268 
0269 def count_alive_processes(p_list):
0270   return len([p for p in p_list if p.returncode==None])
0271 
0272 #-------------------------------------------------------------------------------
0273 
0274 def call_compare_using_files(args):
0275   """Creates shell command to compare two files using compare_using_files.py
0276   script and calls it."""
0277   sample, ref_filename, test_filename, options = args
0278   gt = name2globaltag(ref_filename)
0279   blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
0280   command = " compare_using_files.py "
0281   command+= "%s %s " %(ref_filename,test_filename)
0282   command+= " -C -R "
0283   if options.do_pngs:
0284     command+= " -p "
0285   command+= " -o %s_%s " %(sample, gt)
0286   # Change threshold to an experimental and empirical value of 10^-5
0287   command+= " --specify_run "
0288   if options.stat_test in ["Bin2Bin", "BinToBin"]:
0289     options.test_threshold = 0.9999
0290   command+= " -t %s " %options.test_threshold
0291   command+= " -s %s " %options.stat_test
0292 
0293   # Inspect the HLT directories
0294   if options.hlt:
0295     command+=" -d HLT "
0296   
0297   if options.hash_name:
0298     command += " --hash_name "
0299 
0300   if options.blacklist_file:
0301     command += " --use_black_file "
0302 
0303   if options.standalone:
0304     command += " --standalone "
0305   if len(blacklists[sample]) >0:
0306     command+= '-B %s ' %blacklists[sample]
0307   print("\nExecuting --  %s" %command)
0308 
0309   process=call([x for x in command.split(" ") if len(x)>0])
0310   return process
0311   
0312 
0313 #--------------------------------------------------------------------------------
0314 
0315 def do_comparisons_threaded(options):
0316 
0317   n_processes= int(options.n_processes)
0318 
0319   ref_filenames=[]
0320   test_filenames=[]
0321   
0322   if len(options.all_samples)>0:
0323     ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)  
0324   else:
0325     ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
0326  
0327   # make the paths absolute
0328   ref_filenames=list(map(os.path.abspath,ref_filenames))
0329   test_filenames=list(map(os.path.abspath,test_filenames))
0330   
0331   samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
0332   
0333   if len(samples)==0:
0334     print("No Samples found... Quitting")
0335     return 0
0336   
0337 #  blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
0338 
0339   # Launch the single comparisons
0340   original_dir=os.getcwd()
0341 
0342   outdir=options.out_dir
0343   if len(outdir)==0:
0344     print("Creating automatic outdir:", end=' ')
0345     outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
0346     print(outdir)
0347   if len(options.input_dir)==0:
0348     print("Creating automatic indir:", end=' ')
0349     options.input_dir=outdir
0350     print(options.input_dir)
0351   
0352   if not os.path.exists(outdir):
0353     os.mkdir(outdir)
0354   os.chdir(outdir)  
0355   
0356   # adjust the number of threads
0357   n_comparisons=len(ref_filenames)
0358   if n_comparisons < n_processes:
0359     print("Less comparisons than possible processes: reducing n processes to", end=' ')
0360     n_processes=n_comparisons
0361   #elif n_processes/n_comparisons == 0:
0362     #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",    
0363     #original_nprocesses=n_processes
0364     #first=True
0365     #n_bunches=0
0366     #while first or n_processes > original_nprocesses:
0367       #n_processes=n_comparisons/2
0368       #if n_comparisons%2 !=0:
0369         #n_processes+=1
0370       #first=False
0371       
0372     #print n_processes
0373   #print n_processes
0374   
0375   # Test if we treat data
0376   skim_name=""
0377   if search("20[01]",cmssw_version1)!=None:
0378     skim_name=cmssw_version1.split("_")[-1]
0379     
0380   running_subprocesses=[]
0381   process_counter=0
0382   #print ref_filenames
0383 
0384   ## Compare all pairs of root files
0385   pool = Pool(n_processes)
0386   args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
0387   pool.map(call_compare_using_files, args_iterable) 
0388   # move the pickles on the top, hack
0389   os.system("mv */*pkl .")
0390   
0391   os.chdir("..")
0392 #-------------------------------------------------------------------------------
0393 def do_reports(indir):
0394   #print indir
0395   os.chdir(indir)
0396   pkl_list=[x for x in os.listdir("./") if ".pkl" in x]
0397   running_subprocesses=[]
0398   n_processes=int(options.n_processes)
0399   process_counter=0
0400   for pklfilename in pkl_list:
0401     command = "compare_using_files.py " 
0402     command+= "-R "
0403     if options.do_pngs:
0404       command+= " -p "
0405     command+= "-P %s " %pklfilename
0406     command+= "-o %s " %pklfilename[:-4]
0407     print("Executing %s" %command)
0408     process=call([x for x in command.split(" ") if len(x)>0])
0409     process_counter+=1
0410     # add it to the list
0411     running_subprocesses.append(process)   
0412     if process_counter>=n_processes:
0413       process_counter=0
0414       for p in running_subprocesses:
0415         #print "Waiting for %s" %p.name
0416         p.wait()
0417         
0418   os.chdir("..")
0419   
0420 #-------------------------------------------------------------------------------
0421 def do_html(options, hashing_flag, standalone):
0422 
0423   if options.reports:
0424     print("Preparing reports for the single files...")
0425     do_reports(options.input_dir)
0426   # Do the summary page
0427   aggregation_rules={}
0428   aggregation_rules_twiki={}
0429   # check which aggregation rules are to be used
0430   if options.hlt:
0431     print("Aggregating directories according to HLT rules")
0432     aggregation_rules=definitions.aggr_pairs_dict['HLT']
0433     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
0434   else:
0435     aggregation_rules=definitions.aggr_pairs_dict['reco']
0436     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
0437   table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
0438 
0439   # create summary html file
0440   ofile = open("RelMonSummary.html","w")
0441   ofile.write(table_html)
0442   ofile.close()
0443 
0444 #-------------------------------------------------------------------------------
0445 
0446 if __name__ == "__main__":
0447 
0448   #-----------------------------------------------------------------------------
0449   ref_samples=""
0450   test_samples=""
0451   all_samples=""
0452   n_processes=1
0453   out_dir=""
0454   in_dir=""
0455   n_threads=1 # do not change this
0456   run=-1
0457   stat_test="Chi2"
0458   test_threshold=0.00001
0459   hlt=False
0460   #-----------------------------------------------------------------------------
0461 
0462 
0463   parser = OptionParser(usage="usage: %prog [options]")
0464 
0465   parser.add_option("-R","--ref_samples ",
0466                     action="store",
0467                     dest="ref_samples",
0468                     default=ref_samples,
0469                     help="The samples that act as reference (comma separated list)")
0470 
0471   parser.add_option("-T","--test_samples",
0472                     action="store",
0473                     dest="test_samples",
0474                     default=test_samples,
0475                     help="The samples to be tested (comma separated list)")
0476 
0477   parser.add_option("-a","--all_samples",
0478                     action="store",
0479                     dest="all_samples",
0480                     default=all_samples,
0481                     help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
0482 
0483   parser.add_option("-o","--out_dir",
0484                     action="store",
0485                     dest="out_dir",
0486                     default=out_dir,
0487                     help="The outdir other than <Version1>VS<Version2>")
0488 
0489   parser.add_option("-p","--do_pngs",
0490                     action="store_true",
0491                     dest="do_pngs",
0492                     default=False,
0493                     help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
0494 
0495   parser.add_option("-r","--run ",
0496                     action="store",
0497                     dest="run",
0498                     default=run,
0499                     help="The run to be checked \n(default is %s)" %run)
0500 
0501   parser.add_option("-t","--test_threshold",
0502                     action="store",
0503                     dest="test_threshold",
0504                     default=test_threshold,
0505                     help="Threshold for the statistical test \n(default is %s)" %test_threshold)    
0506 
0507   parser.add_option("-s","--stat_test",
0508                     action="store",
0509                     dest="stat_test",
0510                     default=stat_test,
0511                     help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)  
0512   
0513   parser.add_option("-N","--numberOfProcesses",
0514                     action="store",
0515                     dest="n_processes",
0516                     default=n_processes,
0517                     help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)  
0518                     
0519   parser.add_option("--HLT",
0520                     action="store_true",
0521                     dest="hlt",
0522                     default=False,
0523                     help="Analyse HLT histograms\n(default is %s)" %hlt)
0524                     
0525   parser.add_option("-i","--input_dir",
0526                     action="store",
0527                     dest="input_dir",
0528                     default=in_dir,
0529                     help="Input directory for html creation \n(default is %s)" %in_dir)
0530   
0531   parser.add_option("--reports",
0532                     action="store_true",
0533                     dest="reports",
0534                     default=False,
0535                     help="Do the reports for the pickles \n(default is %s)" %in_dir)
0536 ##---HASHING---##
0537   parser.add_option("--hash_name",
0538                     action="store_true",
0539                     dest="hash_name",
0540                     default=False,
0541                     help="Set if you want to minimize & hash the output HTML files.")
0542 ##--Blacklist File --##
0543   parser.add_option("--use_black_file",
0544                     action="store_true",
0545                     dest="blacklist_file",
0546                     default=False,
0547                     help="Use a black list file of histograms located @ /RelMon/data")
0548 ##-- USE CSS files in web access, for stand-alone usage --##
0549   parser.add_option("--standalone",
0550                   action="store_true",
0551                   dest="standalone",
0552                   default=False,
0553                   help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
0554 
0555   (options, args) = parser.parse_args()
0556 
0557   if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
0558     print("No samples given as input.")
0559     parser.print_help()
0560     exit(2)
0561 
0562   if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
0563     do_comparisons_threaded(options)
0564   if len(options.input_dir)>0:
0565     do_html(options, options.hash_name, options.standalone)
0566 
0567 
0568 
0569 
0570 
0571 
0572 
0573 
0574 
0575 
0576 
0577