Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:35

0001 #! /usr/bin/env python3
0002 ################################################################################
0003 # RelMon: a tool for automatic Release Comparison                              
0004 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
0005 #
0006 #
0007 #                                                                              
0008 # Danilo Piparo CERN - danilo.piparo@cern.ch                                   
0009 #                                                                              
0010 ################################################################################
0011 
0012 from builtins import range
0013 from optparse import OptionParser
0014 
0015 import os
0016 import pickle
0017 import glob
0018 from re import search
0019 from subprocess import call,PIPE
0020 from multiprocessing import Pool
0021 from sys import exit
0022 
0023 import sys
0024 argv=sys.argv
0025 sys.argv=[]
0026 if "RELMON_SA" in os.environ:
0027   import definitions as definitions
0028   from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0029   from dirstructure import Directory
0030   from directories2html import directory2html,make_summary_table
0031   from utils import ask_ok, unpickler, make_files_pairs
0032 else:
0033   import Utilities.RelMon.definitions as definitions
0034   from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0035   from Utilities.RelMon.dirstructure import Directory
0036   from Utilities.RelMon.directories2html import directory2html,make_summary_table
0037   from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
0038 sys.argv=argv
0039 
0040 #-------------------------------------------------------------------------------
0041 
0042 def name2sample(filename):
0043   namebase=os.path.basename(filename)
0044   return namebase.split("__")[1]
0045 
0046 def name2version(filename):
0047   namebase=os.path.basename(filename)
0048   return namebase.split("__")[2]
0049   
0050 def name2run(filename):
0051   namebase=os.path.basename(filename)
0052   return namebase.split("__")[0].split("_")[2]  
0053 
0054 def name2runskim(filename):
0055   run=name2run(filename)
0056   skim=name2version(filename).split("_")[-1]
0057   # remove skim version
0058   if "-v" in skim:
0059     skim = skim[:skim.rfind('-v')]
0060   return "%s_%s"%(run,skim)
0061 
0062 def name2globaltag(filename):
0063   namebase = os.path.basename(filename)
0064   return namebase.split("__")[2].split("-")[1] #returns GT from file basename
0065 
0066 #-------------------------------------------------------------------------------  
0067 
0068 def guess_params(ref_filenames,test_filenames):
0069   
0070   if len(ref_filenames)*len(test_filenames)==0:
0071     print("Empty reference and test filenames lists!")
0072     return [],"",""
0073   
0074   samples=[]
0075   ref_versions=[]
0076   test_versions=[]
0077     
0078   for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
0079     
0080     ref_sample=name2sample(ref)
0081     ref_version=name2version(ref)
0082     test_sample=name2sample(test)
0083     test_version=name2version(test)
0084 
0085     print("  ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample))
0086           
0087     if ref_sample!=test_sample:
0088       print("Files %s and %s do not seem to be relative to the same sample." %(ref, test))
0089     #  exit(2)
0090 
0091     # Slightly modify for data
0092     if search("20[01]",ref_version)!=None:
0093       ref_sample+=ref_version.split("_")[-1]
0094     samples.append(ref_sample)
0095  
0096     # append the versions
0097     ref_versions.append(ref_version)
0098     test_versions.append(test_version)
0099 
0100   # Check if ref and test versions are always the same.
0101   ref_versions=list(set(ref_versions))
0102   test_versions=list(set(test_versions))
0103   
0104   #for versions in ref_versions,test_versions:
0105     #if len(versions)!=1:
0106       #print "More than one kind of CMSSW version selected (%s)" %versions
0107       #exit(2)  
0108   
0109   cmssw_version1=ref_versions[0]
0110   cmssw_version2=test_versions[0]
0111   
0112   return samples,cmssw_version1,cmssw_version2
0113   
0114 
0115 #-------------------------------------------------------------------------------
0116 
0117 def check_root_files(names_list):
0118   for name in names_list:
0119     if not name.endswith(".root"):
0120       print("File %s does not seem to be a rootfile. Please check.")
0121       return False
0122   return True
0123 
0124 #-------------------------------------------------------------------------------
0125 
0126 def add_to_blacklist(blacklist, pattern, target, blist_piece):
0127   int_pattern=pattern
0128   int_pattern=pattern.strip()  
0129   flip_condition=False
0130   if int_pattern[0]=='!':
0131     int_pattern=int_pattern[1:]
0132     flip_condition=True
0133 
0134   condition = search(int_pattern,target)!=None
0135   if flip_condition:
0136     condition = not condition
0137 
0138   if condition:
0139     #print "Found %s in %s" %(pattern,target)
0140     if blacklist!="": # if not the first, add a comma
0141       blacklist+=","
0142     blacklist+=blist_piece
0143   #else:
0144     #print "  NOT Found %s in %s" %(pattern,target)
0145   return blacklist
0146 
0147 #-------------------------------------------------------------------------------
0148 
0149 def guess_blacklists(samples,ver1,ver2,hlt):
0150   """Build a blacklist for each sample accordind to a set of rules
0151   """
0152   blacklists={}
0153   for sample in samples:
0154     blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
0155     
0156     # HLT
0157     if hlt: #HLT
0158       blacklists[sample]+=",AlCaEcalPi0@2"
0159       if not search("2010+|2011+|2012+|2015+",ver1):
0160         print("We are treating MC files for the HLT")
0161         for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
0162           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0163       else:
0164         print("We are treating Data files for the HLT")    
0165         # at the moment it does not make sense since hlt is ran already
0166     
0167     else: #RECO
0168       #Monte Carlo
0169       if not search("2010+|2011+|2012+",ver1):
0170         print("We are treating MC files")        
0171         
0172         for pattern,blist in definitions.mc_pattern_blist_pairs:
0173           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0174 #          print "MC RECO"
0175           #print blacklists[sample]
0176           
0177       # Data
0178       else:
0179         print("We are treating Data files:")      
0180         blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"                                         
0181         for pattern,blist in definitions.data_pattern_blist_pairs:
0182           blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
0183 #         print "DATA RECO: %s %s %s -->%s" %( ver1, pattern, blist, blacklists[sample])
0184 
0185 
0186   return blacklists
0187 
0188 #-------------------------------------------------------------------------------  
0189 
0190 def get_roofiles_in_dir(directory):  
0191   print(directory)
0192   files_list = [s for s in os.listdir(directory) if s.endswith(".root")]
0193   files_list_path=map(lambda s: os.path.join(directory,s), files_list)
0194   
0195   return files_list_path
0196   
0197 #-------------------------------------------------------------------------------  
0198 
0199 def get_filenames_from_pool(all_samples):
0200   
0201   # get a list of the files
0202   files_list=get_roofiles_in_dir(all_samples)
0203   
0204   if len(files_list)==0:
0205     print("Zero files found in directory %s!" %all_samples)
0206     return [],[]
0207   
0208   # Are they an even number?
0209   for name in files_list:
0210     print("* ",name)  
0211   if len(files_list)%2!=0:
0212     print("The numbuer of file is not even... Trying to recover a catastrophe.")
0213     
0214   files_list=make_files_pairs(files_list)
0215   
0216   # Try to couple them according to their sample
0217   ref_filenames=[]
0218   test_filenames=[]
0219   #files_list.sort(key=name2version)
0220   #files_list.sort(key=name2sample) 
0221   #files_list.sort(key=name2run)
0222   for iname in range(len(files_list)):
0223     filename=files_list[iname]
0224     if iname%2==0:
0225       ref_filenames.append(filename)
0226     else:
0227       test_filenames.append(filename)
0228       
0229   print("The guess would be the following:")
0230   for ref,test in zip(ref_filenames,test_filenames):
0231     refbasedir=os.path.dirname(ref)
0232     testbasedir=os.path.dirname(test)
0233     dir_to_print=refbasedir
0234     if refbasedir!=testbasedir:
0235       dir_to_print="%s and %s" %(refbasedir,testbasedir)
0236     print("* Directory: %s " %dir_to_print)
0237     refname=os.path.basename(ref)
0238     testname=os.path.basename(test)
0239     print("  o %s" %refname)
0240     print("  o %s" %testname)
0241   
0242   #is_ok=ask_ok("Is that ok?")
0243   #if not is_ok:
0244     #print "Manual input needed then!"
0245     #exit(2)
0246       
0247   
0248   return ref_filenames,test_filenames
0249   
0250 
0251 #-------------------------------------------------------------------------------
0252 
0253 def get_clean_fileanames(ref_samples,test_samples):
0254   # Process the samples starting from the names
0255   ref_filenames=list(map(lambda s:s.strip(),ref_samples.split(",")))
0256   test_filenames=list(map(lambda s:s.strip(),test_samples.split(",")))
0257 
0258   if len(ref_filenames)!=len(test_filenames):
0259     print("The numebr of reference and test files does not seem to be the same. Please check.")
0260     exit(2)
0261 
0262   if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
0263     exit(2)
0264   return ref_filenames,test_filenames
0265 
0266 #-------------------------------------------------------------------------------
0267 
0268 def count_alive_processes(p_list):
0269   return len([p for p in p_list if p.returncode==None])
0270 
0271 #-------------------------------------------------------------------------------
0272 
0273 def call_compare_using_files(args):
0274   """Creates shell command to compare two files using compare_using_files.py
0275   script and calls it."""
0276   sample, ref_filename, test_filename, options = args
0277   gt = name2globaltag(ref_filename)
0278   blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
0279   command = " compare_using_files.py "
0280   command+= "%s %s " %(ref_filename,test_filename)
0281   command+= " -C -R "
0282   if options.do_pngs:
0283     command+= " -p "
0284   command+= " -o %s_%s " %(sample, gt)
0285   # Change threshold to an experimental and empirical value of 10^-5
0286   command+= " --specify_run "
0287   if options.stat_test in ["Bin2Bin", "BinToBin"]:
0288     options.test_threshold = 0.9999
0289   command+= " -t %s " %options.test_threshold
0290   command+= " -s %s " %options.stat_test
0291 
0292   # Inspect the HLT directories
0293   if options.hlt:
0294     command+=" -d HLT "
0295   
0296   if options.hash_name:
0297     command += " --hash_name "
0298 
0299   if options.blacklist_file:
0300     command += " --use_black_file "
0301 
0302   if options.standalone:
0303     command += " --standalone "
0304   if len(blacklists[sample]) >0:
0305     command+= '-B %s ' %blacklists[sample]
0306   print("\nExecuting --  %s" %command)
0307 
0308   process=call([x for x in command.split(" ") if len(x)>0])
0309   return process
0310   
0311 
0312 #--------------------------------------------------------------------------------
0313 
0314 def do_comparisons_threaded(options):
0315 
0316   n_processes= int(options.n_processes)
0317 
0318   ref_filenames=[]
0319   test_filenames=[]
0320   
0321   if len(options.all_samples)>0:
0322     ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)  
0323   else:
0324     ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
0325  
0326   # make the paths absolute
0327   ref_filenames=list(map(os.path.abspath,ref_filenames))
0328   test_filenames=list(map(os.path.abspath,test_filenames))
0329   
0330   samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
0331   
0332   if len(samples)==0:
0333     print("No Samples found... Quitting")
0334     return 0
0335   
0336 #  blacklists=guess_blacklists(samples,cmssw_version1,cmssw_version2,options.hlt)
0337 
0338   # Launch the single comparisons
0339   original_dir=os.getcwd()
0340 
0341   outdir=options.out_dir
0342   if len(outdir)==0:
0343     print("Creating automatic outdir:", end=' ')
0344     outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
0345     print(outdir)
0346   if len(options.input_dir)==0:
0347     print("Creating automatic indir:", end=' ')
0348     options.input_dir=outdir
0349     print(options.input_dir)
0350   
0351   if not os.path.exists(outdir):
0352     os.mkdir(outdir)
0353   os.chdir(outdir)  
0354   
0355   # adjust the number of threads
0356   n_comparisons=len(ref_filenames)
0357   if n_comparisons < n_processes:
0358     print("Less comparisons than possible processes: reducing n processes to", end=' ')
0359     n_processes=n_comparisons
0360   #elif n_processes/n_comparisons == 0:
0361     #print "More comparisons than possible processes, can be done in N rounds: reducing n processes to",    
0362     #original_nprocesses=n_processes
0363     #first=True
0364     #n_bunches=0
0365     #while first or n_processes > original_nprocesses:
0366       #n_processes=n_comparisons/2
0367       #if n_comparisons%2 !=0:
0368         #n_processes+=1
0369       #first=False
0370       
0371     #print n_processes
0372   #print n_processes
0373   
0374   # Test if we treat data
0375   skim_name=""
0376   if search("20[01]",cmssw_version1)!=None:
0377     skim_name=cmssw_version1.split("_")[-1]
0378     
0379   running_subprocesses=[]
0380   process_counter=0
0381   #print ref_filenames
0382 
0383   ## Compare all pairs of root files
0384   pool = Pool(n_processes)
0385   args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
0386   pool.map(call_compare_using_files, args_iterable) 
0387   # move the pickles on the top, hack
0388   os.system("mv */*pkl .")
0389   
0390   os.chdir("..")
0391 #-------------------------------------------------------------------------------
0392 def do_reports(indir):
0393   #print indir
0394   os.chdir(indir)
0395   pkl_list=[x for x in os.listdir("./") if ".pkl" in x]
0396   running_subprocesses=[]
0397   n_processes=int(options.n_processes)
0398   process_counter=0
0399   for pklfilename in pkl_list:
0400     command = "compare_using_files.py " 
0401     command+= "-R "
0402     if options.do_pngs:
0403       command+= " -p "
0404     command+= "-P %s " %pklfilename
0405     command+= "-o %s " %pklfilename[:-4]
0406     print("Executing %s" %command)
0407     process=call([x for x in command.split(" ") if len(x)>0])
0408     process_counter+=1
0409     # add it to the list
0410     running_subprocesses.append(process)   
0411     if process_counter>=n_processes:
0412       process_counter=0
0413       for p in running_subprocesses:
0414         #print "Waiting for %s" %p.name
0415         p.wait()
0416         
0417   os.chdir("..")
0418   
0419 #-------------------------------------------------------------------------------
0420 def do_html(options, hashing_flag, standalone):
0421 
0422   if options.reports:
0423     print("Preparing reports for the single files...")
0424     do_reports(options.input_dir)
0425   # Do the summary page
0426   aggregation_rules={}
0427   aggregation_rules_twiki={}
0428   # check which aggregation rules are to be used
0429   if options.hlt:
0430     print("Aggregating directories according to HLT rules")
0431     aggregation_rules=definitions.aggr_pairs_dict['HLT']
0432     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
0433   else:
0434     aggregation_rules=definitions.aggr_pairs_dict['reco']
0435     aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
0436   table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
0437 
0438   # create summary html file
0439   ofile = open("RelMonSummary.html","w")
0440   ofile.write(table_html)
0441   ofile.close()
0442 
0443 #-------------------------------------------------------------------------------
0444 
0445 if __name__ == "__main__":
0446 
0447   #-----------------------------------------------------------------------------
0448   ref_samples=""
0449   test_samples=""
0450   all_samples=""
0451   n_processes=1
0452   out_dir=""
0453   in_dir=""
0454   n_threads=1 # do not change this
0455   run=-1
0456   stat_test="Chi2"
0457   test_threshold=0.00001
0458   hlt=False
0459   #-----------------------------------------------------------------------------
0460 
0461 
0462   parser = OptionParser(usage="usage: %prog [options]")
0463 
0464   parser.add_option("-R","--ref_samples ",
0465                     action="store",
0466                     dest="ref_samples",
0467                     default=ref_samples,
0468                     help="The samples that act as reference (comma separated list)")
0469 
0470   parser.add_option("-T","--test_samples",
0471                     action="store",
0472                     dest="test_samples",
0473                     default=test_samples,
0474                     help="The samples to be tested (comma separated list)")
0475 
0476   parser.add_option("-a","--all_samples",
0477                     action="store",
0478                     dest="all_samples",
0479                     default=all_samples,
0480                     help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
0481 
0482   parser.add_option("-o","--out_dir",
0483                     action="store",
0484                     dest="out_dir",
0485                     default=out_dir,
0486                     help="The outdir other than <Version1>VS<Version2>")
0487 
0488   parser.add_option("-p","--do_pngs",
0489                     action="store_true",
0490                     dest="do_pngs",
0491                     default=False,
0492                     help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
0493 
0494   parser.add_option("-r","--run ",
0495                     action="store",
0496                     dest="run",
0497                     default=run,
0498                     help="The run to be checked \n(default is %s)" %run)
0499 
0500   parser.add_option("-t","--test_threshold",
0501                     action="store",
0502                     dest="test_threshold",
0503                     default=test_threshold,
0504                     help="Threshold for the statistical test \n(default is %s)" %test_threshold)    
0505 
0506   parser.add_option("-s","--stat_test",
0507                     action="store",
0508                     dest="stat_test",
0509                     default=stat_test,
0510                     help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)  
0511   
0512   parser.add_option("-N","--numberOfProcesses",
0513                     action="store",
0514                     dest="n_processes",
0515                     default=n_processes,
0516                     help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)  
0517                     
0518   parser.add_option("--HLT",
0519                     action="store_true",
0520                     dest="hlt",
0521                     default=False,
0522                     help="Analyse HLT histograms\n(default is %s)" %hlt)
0523                     
0524   parser.add_option("-i","--input_dir",
0525                     action="store",
0526                     dest="input_dir",
0527                     default=in_dir,
0528                     help="Input directory for html creation \n(default is %s)" %in_dir)
0529   
0530   parser.add_option("--reports",
0531                     action="store_true",
0532                     dest="reports",
0533                     default=False,
0534                     help="Do the reports for the pickles \n(default is %s)" %in_dir)
0535 ##---HASHING---##
0536   parser.add_option("--hash_name",
0537                     action="store_true",
0538                     dest="hash_name",
0539                     default=False,
0540                     help="Set if you want to minimize & hash the output HTML files.")
0541 ##--Blacklist File --##
0542   parser.add_option("--use_black_file",
0543                     action="store_true",
0544                     dest="blacklist_file",
0545                     default=False,
0546                     help="Use a black list file of histograms located @ /RelMon/data")
0547 ##-- USE CSS files in web access, for stand-alone usage --##
0548   parser.add_option("--standalone",
0549                   action="store_true",
0550                   dest="standalone",
0551                   default=False,
0552                   help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
0553 
0554   (options, args) = parser.parse_args()
0555 
0556   if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
0557     print("No samples given as input.")
0558     parser.print_help()
0559     exit(2)
0560 
0561   if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
0562     do_comparisons_threaded(options)
0563   if len(options.input_dir)>0:
0564     do_html(options, options.hash_name, options.standalone)
0565 
0566 
0567 
0568 
0569 
0570 
0571 
0572 
0573 
0574 
0575 
0576