File indexing completed on 2024-04-06 12:31:49
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 from __future__ import print_function
0013 from builtins import range
0014 from optparse import OptionParser
0015
0016 import os
0017 import pickle
0018 import glob
0019 from re import search
0020 from subprocess import call,PIPE
0021 from multiprocessing import Pool
0022 from sys import exit
0023
0024 import sys
0025 argv=sys.argv
0026 sys.argv=[]
0027 if "RELMON_SA" in os.environ:
0028 import definitions as definitions
0029 from dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0030 from dirstructure import Directory
0031 from directories2html import directory2html,make_summary_table
0032 from utils import ask_ok, unpickler, make_files_pairs
0033 else:
0034 import Utilities.RelMon.definitions as definitions
0035 from Utilities.RelMon.dqm_interfaces import DirWalkerFile,string2blacklist,DirWalkerFile_thread_wrapper
0036 from Utilities.RelMon.dirstructure import Directory
0037 from Utilities.RelMon.directories2html import directory2html,make_summary_table
0038 from Utilities.RelMon.utils import ask_ok, unpickler, make_files_pairs
0039 sys.argv=argv
0040
0041
0042
0043 def name2sample(filename):
0044 namebase=os.path.basename(filename)
0045 return namebase.split("__")[1]
0046
0047 def name2version(filename):
0048 namebase=os.path.basename(filename)
0049 return namebase.split("__")[2]
0050
0051 def name2run(filename):
0052 namebase=os.path.basename(filename)
0053 return namebase.split("__")[0].split("_")[2]
0054
0055 def name2runskim(filename):
0056 run=name2run(filename)
0057 skim=name2version(filename).split("_")[-1]
0058
0059 if "-v" in skim:
0060 skim = skim[:skim.rfind('-v')]
0061 return "%s_%s"%(run,skim)
0062
0063 def name2globaltag(filename):
0064 namebase = os.path.basename(filename)
0065 return namebase.split("__")[2].split("-")[1]
0066
0067
0068
0069 def guess_params(ref_filenames,test_filenames):
0070
0071 if len(ref_filenames)*len(test_filenames)==0:
0072 print("Empty reference and test filenames lists!")
0073 return [],"",""
0074
0075 samples=[]
0076 ref_versions=[]
0077 test_versions=[]
0078
0079 for ref, test in zip(map(os.path.basename,ref_filenames),map(os.path.basename,test_filenames)):
0080
0081 ref_sample=name2sample(ref)
0082 ref_version=name2version(ref)
0083 test_sample=name2sample(test)
0084 test_version=name2version(test)
0085
0086 print(" ## sample 1: %s vs sample 2: %s"%(ref_sample, test_sample))
0087
0088 if ref_sample!=test_sample:
0089 print("Files %s and %s do not seem to be relative to the same sample." %(ref, test))
0090
0091
0092
0093 if search("20[01]",ref_version)!=None:
0094 ref_sample+=ref_version.split("_")[-1]
0095 samples.append(ref_sample)
0096
0097
0098 ref_versions.append(ref_version)
0099 test_versions.append(test_version)
0100
0101
0102 ref_versions=list(set(ref_versions))
0103 test_versions=list(set(test_versions))
0104
0105
0106
0107
0108
0109
0110 cmssw_version1=ref_versions[0]
0111 cmssw_version2=test_versions[0]
0112
0113 return samples,cmssw_version1,cmssw_version2
0114
0115
0116
0117
0118 def check_root_files(names_list):
0119 for name in names_list:
0120 if not name.endswith(".root"):
0121 print("File %s does not seem to be a rootfile. Please check.")
0122 return False
0123 return True
0124
0125
0126
0127 def add_to_blacklist(blacklist, pattern, target, blist_piece):
0128 int_pattern=pattern
0129 int_pattern=pattern.strip()
0130 flip_condition=False
0131 if int_pattern[0]=='!':
0132 int_pattern=int_pattern[1:]
0133 flip_condition=True
0134
0135 condition = search(int_pattern,target)!=None
0136 if flip_condition:
0137 condition = not condition
0138
0139 if condition:
0140
0141 if blacklist!="":
0142 blacklist+=","
0143 blacklist+=blist_piece
0144
0145
0146 return blacklist
0147
0148
0149
0150 def guess_blacklists(samples,ver1,ver2,hlt):
0151 """Build a blacklist for each sample accordind to a set of rules
0152 """
0153 blacklists={}
0154 for sample in samples:
0155 blacklists[sample]="FED@1,AlcaBeamMonitor@1,HLT@1,AlCaReco@1"
0156
0157
0158 if hlt:
0159 blacklists[sample]+=",AlCaEcalPi0@2"
0160 if not search("2010+|2011+|2012+|2015+",ver1):
0161 print("We are treating MC files for the HLT")
0162 for pattern,blist in definitions.hlt_mc_pattern_blist_pairs:
0163 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0164 else:
0165 print("We are treating Data files for the HLT")
0166
0167
0168 else:
0169
0170 if not search("2010+|2011+|2012+",ver1):
0171 print("We are treating MC files")
0172
0173 for pattern,blist in definitions.mc_pattern_blist_pairs:
0174 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,sample,blist)
0175
0176
0177
0178
0179 else:
0180 print("We are treating Data files:")
0181 blacklists[sample]+=",By__Lumi__Section@-1,AlCaReco@1"
0182 for pattern,blist in definitions.data_pattern_blist_pairs:
0183 blacklists[sample]=add_to_blacklist(blacklists[sample],pattern,ver1,blist)
0184
0185
0186
0187 return blacklists
0188
0189
0190
0191 def get_roofiles_in_dir(directory):
0192 print(directory)
0193 files_list = [s for s in os.listdir(directory) if s.endswith(".root")]
0194 files_list_path=map(lambda s: os.path.join(directory,s), files_list)
0195
0196 return files_list_path
0197
0198
0199
0200 def get_filenames_from_pool(all_samples):
0201
0202
0203 files_list=get_roofiles_in_dir(all_samples)
0204
0205 if len(files_list)==0:
0206 print("Zero files found in directory %s!" %all_samples)
0207 return [],[]
0208
0209
0210 for name in files_list:
0211 print("* ",name)
0212 if len(files_list)%2!=0:
0213 print("The numbuer of file is not even... Trying to recover a catastrophe.")
0214
0215 files_list=make_files_pairs(files_list)
0216
0217
0218 ref_filenames=[]
0219 test_filenames=[]
0220
0221
0222
0223 for iname in range(len(files_list)):
0224 filename=files_list[iname]
0225 if iname%2==0:
0226 ref_filenames.append(filename)
0227 else:
0228 test_filenames.append(filename)
0229
0230 print("The guess would be the following:")
0231 for ref,test in zip(ref_filenames,test_filenames):
0232 refbasedir=os.path.dirname(ref)
0233 testbasedir=os.path.dirname(test)
0234 dir_to_print=refbasedir
0235 if refbasedir!=testbasedir:
0236 dir_to_print="%s and %s" %(refbasedir,testbasedir)
0237 print("* Directory: %s " %dir_to_print)
0238 refname=os.path.basename(ref)
0239 testname=os.path.basename(test)
0240 print(" o %s" %refname)
0241 print(" o %s" %testname)
0242
0243
0244
0245
0246
0247
0248
0249 return ref_filenames,test_filenames
0250
0251
0252
0253
0254 def get_clean_fileanames(ref_samples,test_samples):
0255
0256 ref_filenames=list(map(lambda s:s.strip(),ref_samples.split(",")))
0257 test_filenames=list(map(lambda s:s.strip(),test_samples.split(",")))
0258
0259 if len(ref_filenames)!=len(test_filenames):
0260 print("The numebr of reference and test files does not seem to be the same. Please check.")
0261 exit(2)
0262
0263 if not (check_root_files(ref_filenames) and check_root_files(test_filenames)):
0264 exit(2)
0265 return ref_filenames,test_filenames
0266
0267
0268
0269 def count_alive_processes(p_list):
0270 return len([p for p in p_list if p.returncode==None])
0271
0272
0273
0274 def call_compare_using_files(args):
0275 """Creates shell command to compare two files using compare_using_files.py
0276 script and calls it."""
0277 sample, ref_filename, test_filename, options = args
0278 gt = name2globaltag(ref_filename)
0279 blacklists=guess_blacklists([sample],name2version(ref_filename),name2version(test_filename),options.hlt)
0280 command = " compare_using_files.py "
0281 command+= "%s %s " %(ref_filename,test_filename)
0282 command+= " -C -R "
0283 if options.do_pngs:
0284 command+= " -p "
0285 command+= " -o %s_%s " %(sample, gt)
0286
0287 command+= " --specify_run "
0288 if options.stat_test in ["Bin2Bin", "BinToBin"]:
0289 options.test_threshold = 0.9999
0290 command+= " -t %s " %options.test_threshold
0291 command+= " -s %s " %options.stat_test
0292
0293
0294 if options.hlt:
0295 command+=" -d HLT "
0296
0297 if options.hash_name:
0298 command += " --hash_name "
0299
0300 if options.blacklist_file:
0301 command += " --use_black_file "
0302
0303 if options.standalone:
0304 command += " --standalone "
0305 if len(blacklists[sample]) >0:
0306 command+= '-B %s ' %blacklists[sample]
0307 print("\nExecuting -- %s" %command)
0308
0309 process=call([x for x in command.split(" ") if len(x)>0])
0310 return process
0311
0312
0313
0314
0315 def do_comparisons_threaded(options):
0316
0317 n_processes= int(options.n_processes)
0318
0319 ref_filenames=[]
0320 test_filenames=[]
0321
0322 if len(options.all_samples)>0:
0323 ref_filenames,test_filenames=get_filenames_from_pool(options.all_samples)
0324 else:
0325 ref_filenames,test_filenames=get_clean_fileanames(options.ref_samples,options.test_samples)
0326
0327
0328 ref_filenames=list(map(os.path.abspath,ref_filenames))
0329 test_filenames=list(map(os.path.abspath,test_filenames))
0330
0331 samples,cmssw_version1,cmssw_version2=guess_params(ref_filenames,test_filenames)
0332
0333 if len(samples)==0:
0334 print("No Samples found... Quitting")
0335 return 0
0336
0337
0338
0339
0340 original_dir=os.getcwd()
0341
0342 outdir=options.out_dir
0343 if len(outdir)==0:
0344 print("Creating automatic outdir:", end=' ')
0345 outdir="%sVS%s" %(cmssw_version1,cmssw_version2)
0346 print(outdir)
0347 if len(options.input_dir)==0:
0348 print("Creating automatic indir:", end=' ')
0349 options.input_dir=outdir
0350 print(options.input_dir)
0351
0352 if not os.path.exists(outdir):
0353 os.mkdir(outdir)
0354 os.chdir(outdir)
0355
0356
0357 n_comparisons=len(ref_filenames)
0358 if n_comparisons < n_processes:
0359 print("Less comparisons than possible processes: reducing n processes to", end=' ')
0360 n_processes=n_comparisons
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376 skim_name=""
0377 if search("20[01]",cmssw_version1)!=None:
0378 skim_name=cmssw_version1.split("_")[-1]
0379
0380 running_subprocesses=[]
0381 process_counter=0
0382
0383
0384
0385 pool = Pool(n_processes)
0386 args_iterable = [list(args) + [options] for args in zip(samples, ref_filenames, test_filenames)]
0387 pool.map(call_compare_using_files, args_iterable)
0388
0389 os.system("mv */*pkl .")
0390
0391 os.chdir("..")
0392
0393 def do_reports(indir):
0394
0395 os.chdir(indir)
0396 pkl_list=[x for x in os.listdir("./") if ".pkl" in x]
0397 running_subprocesses=[]
0398 n_processes=int(options.n_processes)
0399 process_counter=0
0400 for pklfilename in pkl_list:
0401 command = "compare_using_files.py "
0402 command+= "-R "
0403 if options.do_pngs:
0404 command+= " -p "
0405 command+= "-P %s " %pklfilename
0406 command+= "-o %s " %pklfilename[:-4]
0407 print("Executing %s" %command)
0408 process=call([x for x in command.split(" ") if len(x)>0])
0409 process_counter+=1
0410
0411 running_subprocesses.append(process)
0412 if process_counter>=n_processes:
0413 process_counter=0
0414 for p in running_subprocesses:
0415
0416 p.wait()
0417
0418 os.chdir("..")
0419
0420
0421 def do_html(options, hashing_flag, standalone):
0422
0423 if options.reports:
0424 print("Preparing reports for the single files...")
0425 do_reports(options.input_dir)
0426
0427 aggregation_rules={}
0428 aggregation_rules_twiki={}
0429
0430 if options.hlt:
0431 print("Aggregating directories according to HLT rules")
0432 aggregation_rules=definitions.aggr_pairs_dict['HLT']
0433 aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['HLT']
0434 else:
0435 aggregation_rules=definitions.aggr_pairs_dict['reco']
0436 aggregation_rules_twiki=definitions.aggr_pairs_twiki_dict['reco']
0437 table_html = make_summary_table(options.input_dir,aggregation_rules,aggregation_rules_twiki, hashing_flag, standalone)
0438
0439
0440 ofile = open("RelMonSummary.html","w")
0441 ofile.write(table_html)
0442 ofile.close()
0443
0444
0445
0446 if __name__ == "__main__":
0447
0448
0449 ref_samples=""
0450 test_samples=""
0451 all_samples=""
0452 n_processes=1
0453 out_dir=""
0454 in_dir=""
0455 n_threads=1
0456 run=-1
0457 stat_test="Chi2"
0458 test_threshold=0.00001
0459 hlt=False
0460
0461
0462
0463 parser = OptionParser(usage="usage: %prog [options]")
0464
0465 parser.add_option("-R","--ref_samples ",
0466 action="store",
0467 dest="ref_samples",
0468 default=ref_samples,
0469 help="The samples that act as reference (comma separated list)")
0470
0471 parser.add_option("-T","--test_samples",
0472 action="store",
0473 dest="test_samples",
0474 default=test_samples,
0475 help="The samples to be tested (comma separated list)")
0476
0477 parser.add_option("-a","--all_samples",
0478 action="store",
0479 dest="all_samples",
0480 default=all_samples,
0481 help="EXPERIMENTAL: Try to sort all samples selected (wildacrds) and organise a comparison")
0482
0483 parser.add_option("-o","--out_dir",
0484 action="store",
0485 dest="out_dir",
0486 default=out_dir,
0487 help="The outdir other than <Version1>VS<Version2>")
0488
0489 parser.add_option("-p","--do_pngs",
0490 action="store_true",
0491 dest="do_pngs",
0492 default=False,
0493 help="EXPERIMENTAL!!! Do the pngs of the comparison (takes 50%% of the total running time) \n(default is %s)" %False)
0494
0495 parser.add_option("-r","--run ",
0496 action="store",
0497 dest="run",
0498 default=run,
0499 help="The run to be checked \n(default is %s)" %run)
0500
0501 parser.add_option("-t","--test_threshold",
0502 action="store",
0503 dest="test_threshold",
0504 default=test_threshold,
0505 help="Threshold for the statistical test \n(default is %s)" %test_threshold)
0506
0507 parser.add_option("-s","--stat_test",
0508 action="store",
0509 dest="stat_test",
0510 default=stat_test,
0511 help="Statistical test (KS or Chi2) \n(default is %s)" %stat_test)
0512
0513 parser.add_option("-N","--numberOfProcesses",
0514 action="store",
0515 dest="n_processes",
0516 default=n_processes,
0517 help="Number of parallel processes to be run. Be Polite! \n(default is %s)" %n_processes)
0518
0519 parser.add_option("--HLT",
0520 action="store_true",
0521 dest="hlt",
0522 default=False,
0523 help="Analyse HLT histograms\n(default is %s)" %hlt)
0524
0525 parser.add_option("-i","--input_dir",
0526 action="store",
0527 dest="input_dir",
0528 default=in_dir,
0529 help="Input directory for html creation \n(default is %s)" %in_dir)
0530
0531 parser.add_option("--reports",
0532 action="store_true",
0533 dest="reports",
0534 default=False,
0535 help="Do the reports for the pickles \n(default is %s)" %in_dir)
0536
0537 parser.add_option("--hash_name",
0538 action="store_true",
0539 dest="hash_name",
0540 default=False,
0541 help="Set if you want to minimize & hash the output HTML files.")
0542
0543 parser.add_option("--use_black_file",
0544 action="store_true",
0545 dest="blacklist_file",
0546 default=False,
0547 help="Use a black list file of histograms located @ /RelMon/data")
0548
0549 parser.add_option("--standalone",
0550 action="store_true",
0551 dest="standalone",
0552 default=False,
0553 help="Define that using RelMon in standalone method. Makes CSS files accessible over HTTP")
0554
0555 (options, args) = parser.parse_args()
0556
0557 if len(options.test_samples)*len(options.ref_samples)+len(options.all_samples)==0 and len(options.input_dir)==0:
0558 print("No samples given as input.")
0559 parser.print_help()
0560 exit(2)
0561
0562 if len(options.all_samples)>0 or (len(options.ref_samples)*len(options.test_samples)>0):
0563 do_comparisons_threaded(options)
0564 if len(options.input_dir)>0:
0565 do_html(options, options.hash_name, options.standalone)
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577