File indexing completed on 2024-11-26 02:34:35
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 from builtins import range
0013 from optparse import OptionParser
0014
0015 import os
0016 import pickle
0017 import glob
0018 from re import search
0019 from subprocess import call,PIPE
0020 from multiprocessing import Pool
0021 from sys import exit
0022
0023 import sys
0024 argv=sys.argv
0025 sys.argv=[]
0026 if "RELMON_SA" in os.environ:
0027 import definitions as definitions
0028 from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0029 from dirstructure import Directory
0030 from directories2html import directory2html,make_summary_table
0031 from utils import ask_ok, unpickler, make_files_pairs
0032 else:
0033 import Utilities.RelMon.definitions as definitions
0034 from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0035 from Utilities.RelMon.dirstructure import Directory
0036 from Utilities.RelMon.directories2html import directory2html,make_summary_table
0037 from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
0038 sys.argv=argv
0039
0040
0041
0042 def name2sample(filename):
0043 namebase=os.path.basename(filename)
0044 return namebase.split("__")[1]
0045
0046 def name2version(filename):
0047 namebase=os.path.basename(filename)
0048 return namebase.split("__")[2]
0049
0050 def name2run(filename):
0051 namebase=os.path.basename(filename)
0052 return namebase.split("__")[0].split("_")[2]
0053
0054 def name2runskim(filename):
0055 run=name2run(filename)
0056 skim=name2version(filename).split("_")[-1]
0057
0058 if "-v" in skim:
0059 skim = skim[:skim.rfind('-v')]
0060 return "%s_%s"%(run,skim)
0061
0062 def name2globaltag(filename):
0063 namebase = os.path.basename(filename)
0064 return namebase.split("__")[2].split("-")[1]
0065
0066
0067
0068 def guess_params(ref_filenames,test_filenames):
0069
0070 if len(ref_filenames)*len(test_filenames)==0:
0071 print("Empty reference and test filenames lists!")
0072 return [],"",""
0073
0074 samples=[]
0075 ref_versions=[]
0076 test_versions=[]
0077
0078 for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
0079
0080 ref_sample=name2sample(ref)
0081 ref_version=name2version(ref)
0082 test_sample=name2sample(test)
0083 test_version=name2version(test)
0084
0085 print(" ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample))
0086
0087 if ref_sample!=test_sample:
0088 print("Files %s and %s do not seem to be relative to the same sample." %(ref, test))
0089
0090
0091
0092 if search("20[01]",ref_version)!=None:
0093 ref_sample+=ref_version.split("_")[-1]
0094 samples.append(ref_sample)
0095
0096
0097 ref_versions.append(ref_version)
0098 test_versions.append(test_version)
0099
0100
0101 ref_versions=list(set(ref_versions))
0102 test_versions=list(set(test_versions))
0103
0104
0105
0106
0107
0108
0109 cmssw_version1=ref_versions[0]
0110 cmssw_version2=test_versions[0]
0111
0112 return samples,cmssw_version1,cmssw_version2
0113
0114
0115
0116
0117 def check_root_files(names_list):
0118 for name in names_list:
0119 if not name.endswith(".root"):
0120 print("File %s does not seem to be a rootfile. Please check.")
0121 return False
0122 return True
0123
0124
0125
0126 def add_to_blacklist(blacklist, pattern, target, blist_piece):
0127 int_pattern=pattern
0128 int_pattern=pattern.strip()
0129 flip_condition=False
0130 if int_pattern[0]=='!':
0131 int_pattern=int_pattern[1:]
0132 flip_condition=True
0133
0134 condition = search(int_pattern,target)!=None
0135 if flip_condition:
0136 condition = not condition
0137
0138 if condition:
0139
0140 if blacklist!="":
0141 blacklist+=","
0142 blacklist+=blist_piece
0143
0144
0145 return blacklist
0146
0147
0148
0149 def guess_blacklists(samples,ver1,ver2,hlt):
0150 """Build a blacklist for each sample accordind to a set of rules
0151 """
0152 blacklists={}
0153 for sample in samples:
0154 blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
0155
0156
0157 if hlt:
0158 blacklists[sample]+=",AlCaEcalPi0@2"
0159 if not search("2010+|2011+|2012+|2015+",ver1):
0160 print("We are treating MC files for the HLT")
0161 for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
0162 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0163 else:
0164 print("We are treating Data files for the HLT")
0165
0166
0167 else:
0168
0169 if not search("2010+|2011+|2012+",ver1):
0170 print("We are treating MC files")
0171
0172 for pattern,blist in definitions.mc_pattern_blist_pairs:
0173 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0174
0175
0176
0177
0178 else:
0179 print("We are treating Data files:")
0180 blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
0181 for pattern,blist in definitions.data_pattern_blist_pairs:
0182 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
0183
0184
0185
0186 return blacklists
0187
0188
0189
0190 def get_roofiles_in_dir(directory):
0191 print(directory)
0192 files_list = [s for s in os.listdir(directory) if s.endswith(".root")]
0193 files_list_path=map(lambda s: os.path.join(directory,s), files_list)
0194
0195 return files_list_path
0196
0197
0198
0199 def get_filenames_from_pool(all_samples):
0200
0201
0202 files_list=get_roofiles_in_dir(all_samples)
0203
0204 if len(files_list)==0:
0205 print("Zero files found in directory %s!" %all_samples)
0206 return [],[]
0207
0208
0209 for name in files_list:
0210 print("* ",name)
0211 if len(files_list)%2!=0:
0212 print("The numbuer of file is not even... Trying to recover a catastrophe.")
0213
0214 files_list=make_files_pairs(files_list)
0215
0216
0217 ref_filenames=[]
0218 test_filenames=[]
0219
0220
0221
0222 for iname in range(len(files_list)):
0223 filename=files_list[iname]
0224 if iname%2==0:
0225 ref_filenames.append(filename)
0226 else:
0227 test_filenames.append(filename)
0228
0229 print("The guess would be the following:")
0230 for ref,test in zip(ref_filenames,test_filenames):
0231 refbasedir=os.path.dirname(ref)
0232 testbasedir=os.path.dirname(test)
0233 dir_to_print=refbasedir
0234 if refbasedir!=testbasedir:
0235 dir_to_print="%s and %s" %(refbasedir,testbasedir)
0236 print("* Directory: %s " %dir_to_print)
0237 refname=os.path.basename(ref)
0238 testname=os.path.basename(test)
0239 print(" o %s" %refname)
0240 print(" o %s" %testname)
0241
0242
0243
0244
0245
0246
0247
0248 return ref_filenames,test_filenames
0249
0250
0251
0252
0253 def get_clean_fileanames(ref_samples,test_samples):
0254
0255 ref_filenames=list(map(lambda s:s.strip(),ref_samples.split(",")))
0256 test_filenames=list(map(lambda s:s.strip(),test_samples.split(",")))
0257
0258 if len(ref_filenames)!=len(test_filenames):
0259 print("The numebr of reference and test files does not seem to be the same. Please check.")
0260 exit(2)
0261
0262 if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
0263 exit(2)
0264 return ref_filenames,test_filenames
0265
0266
0267
0268 def count_alive_processes(p_list):
0269 return len([p for p in p_list if p.returncode==None])
0270
0271
0272
0273 def call_compare_using_files(args):
0274 """Creates shell command to compare two files using compare_using_files.py
0275 script and calls it."""
0276 sample, ref_filename, test_filename, options = args
0277 gt = name2globaltag(ref_filename)
0278 blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
0279 command = " compare_using_files.py "
0280 command+= "%s %s " %(ref_filename,test_filename)
0281 command+= " -C -R "
0282 if options.do_pngs:
0283 command+= " -p "
0284 command+= " -o %s_%s " %(sample, gt)
0285
0286 command+= " --specify_run "
0287 if options.stat_test in ["Bin2Bin", "BinToBin"]:
0288 options.test_threshold = 0.9999
0289 command+= " -t %s " %options.test_threshold
0290 command+= " -s %s " %options.stat_test
0291
0292
0293 if options.hlt:
0294 command+=" -d HLT "
0295
0296 if options.hash_name:
0297 command += " --hash_name "
0298
0299 if options.blacklist_file:
0300 command += " --use_black_file "
0301
0302 if options.standalone:
0303 command += " --standalone "
0304 if len(blacklists[sample]) >0:
0305 command+= '-B %s ' %blacklists[sample]
0306 print("\nExecuting -- %s" %command)
0307
0308 process=call([x for x in command.split(" ") if len(x)>0])
0309 return process
0310
0311
0312
0313
0314 def do_comparisons_threaded(options):
0315
0316 n_processes= int(options.n_processes)
0317
0318 ref_filenames=[]
0319 test_filenames=[]
0320
0321 if len(options.all_samples)>0:
0322 ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
0323 else:
0324 ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
0325
0326
0327 ref_filenames=list(map(os.path.abspath,ref_filenames))
0328 test_filenames=list(map(os.path.abspath,test_filenames))
0329
0330 samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
0331
0332 if len(samples)==0:
0333 print("No Samples found... Quitting")
0334 return 0
0335
0336
0337
0338
0339 original_dir=os.getcwd()
0340
0341 outdir=options.out_dir
0342 if len(outdir)==0:
0343 print("Creating automatic outdir:", end=' ')
0344 outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
0345 print(outdir)
0346 if len(options.input_dir)==0:
0347 print("Creating automatic indir:", end=' ')
0348 options.input_dir=outdir
0349 print(options.input_dir)
0350
0351 if not os.path.exists(outdir):
0352 os.mkdir(outdir)
0353 os.chdir(outdir)
0354
0355
0356 n_comparisons=len(ref_filenames)
0357 if n_comparisons < n_processes:
0358 print("Less comparisons than possible processes: reducing n processes to", end=' ')
0359 n_processes=n_comparisons
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375 skim_name=""
0376 if search("20[01]",cmssw_version1)!=None:
0377 skim_name=cmssw_version1.split("_")[-1]
0378
0379 running_subprocesses=[]
0380 process_counter=0
0381
0382
0383
0384 pool = Pool(n_processes)
0385 args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
0386 pool.map(call_compare_using_files, args_iterable)
0387
0388 os.system("mv */*pkl .")
0389
0390 os.chdir("..")
0391
0392 def do_reports(indir):
0393
0394 os.chdir(indir)
0395 pkl_list=[x for x in os.listdir("./") if ".pkl" in x]
0396 running_subprocesses=[]
0397 n_processes=int(options.n_processes)
0398 process_counter=0
0399 for pklfilename in pkl_list:
0400 command = "compare_using_files.py "
0401 command+= "-R "
0402 if options.do_pngs:
0403 command+= " -p "
0404 command+= "-P %s " %pklfilename
0405 command+= "-o %s " %pklfilename[:-4]
0406 print("Executing %s" %command)
0407 process=call([x for x in command.split(" ") if len(x)>0])
0408 process_counter+=1
0409
0410 running_subprocesses.append(process)
0411 if process_counter>=n_processes:
0412 process_counter=0
0413 for p in running_subprocesses:
0414
0415 p.wait()
0416
0417 os.chdir("..")
0418
0419
0420 def do_html(options, hashing_flag, standalone):
0421
0422 if options.reports:
0423 print("Preparing reports for the single files...")
0424 do_reports(options.input_dir)
0425
0426 aggregation_rules={}
0427 aggregation_rules_twiki={}
0428
0429 if options.hlt:
0430 print("Aggregating directories according to HLT rules")
0431 aggregation_rules=definitions.aggr_pairs_dict['HLT']
0432 aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
0433 else:
0434 aggregation_rules=definitions.aggr_pairs_dict['reco']
0435 aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
0436 table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
0437
0438
0439 ofile = open("RelMonSummary.html","w")
0440 ofile.write(table_html)
0441 ofile.close()
0442
0443
0444
0445 if __name__ == "__main__":
0446
0447
0448 ref_samples=""
0449 test_samples=""
0450 all_samples=""
0451 n_processes=1
0452 out_dir=""
0453 in_dir=""
0454 n_threads=1
0455 run=-1
0456 stat_test="Chi2"
0457 test_threshold=0.00001
0458 hlt=False
0459
0460
0461
0462 parser = OptionParser(usage="usage: %prog [options]")
0463
0464 parser.add_option("-R","--ref_samples ",
0465 action="store",
0466 dest="ref_samples",
0467 default=ref_samples,
0468 help="The samples that act as reference (comma separated list)")
0469
0470 parser.add_option("-T","--test_samples",
0471 action="store",
0472 dest="test_samples",
0473 default=test_samples,
0474 help="The samples to be tested (comma separated list)")
0475
0476 parser.add_option("-a","--all_samples",
0477 action="store",
0478 dest="all_samples",
0479 default=all_samples,
0480 help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
0481
0482 parser.add_option("-o","--out_dir",
0483 action="store",
0484 dest="out_dir",
0485 default=out_dir,
0486 help="The outdir other than <Version1>VS<Version2>")
0487
0488 parser.add_option("-p","--do_pngs",
0489 action="store_true",
0490 dest="do_pngs",
0491 default=False,
0492 help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
0493
0494 parser.add_option("-r","--run ",
0495 action="store",
0496 dest="run",
0497 default=run,
0498 help="The run to be checked \n(default is %s)" %run)
0499
0500 parser.add_option("-t","--test_threshold",
0501 action="store",
0502 dest="test_threshold",
0503 default=test_threshold,
0504 help="Threshold for the statistical test \n(default is %s)" %test_threshold)
0505
0506 parser.add_option("-s","--stat_test",
0507 action="store",
0508 dest="stat_test",
0509 default=stat_test,
0510 help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
0511
0512 parser.add_option("-N","--numberOfProcesses",
0513 action="store",
0514 dest="n_processes",
0515 default=n_processes,
0516 help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
0517
0518 parser.add_option("--HLT",
0519 action="store_true",
0520 dest="hlt",
0521 default=False,
0522 help="Analyse HLT histograms\n(default is %s)" %hlt)
0523
0524 parser.add_option("-i","--input_dir",
0525 action="store",
0526 dest="input_dir",
0527 default=in_dir,
0528 help="Input directory for html creation \n(default is %s)" %in_dir)
0529
0530 parser.add_option("--reports",
0531 action="store_true",
0532 dest="reports",
0533 default=False,
0534 help="Do the reports for the pickles \n(default is %s)" %in_dir)
0535
0536 parser.add_option("--hash_name",
0537 action="store_true",
0538 dest="hash_name",
0539 default=False,
0540 help="Set if you want to minimize & hash the output HTML files.")
0541
0542 parser.add_option("--use_black_file",
0543 action="store_true",
0544 dest="blacklist_file",
0545 default=False,
0546 help="Use a black list file of histograms located @ /RelMon/data")
0547
0548 parser.add_option("--standalone",
0549 action="store_true",
0550 dest="standalone",
0551 default=False,
0552 help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
0553
0554 (options, args) = parser.parse_args()
0555
0556 if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
0557 print("No samples given as input.")
0558 parser.print_help()
0559 exit(2)
0560
0561 if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
0562 do_comparisons_threaded(options)
0563 if len(options.input_dir)>0:
0564 do_html(options, options.hash_name, options.standalone)
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576