File indexing completed on 2024-04-06 11:56:47
0001
0002
0003
0004
0005
0006
0007
0008
0009 from __future__ import print_function
0010 from builtins import range
0011 import os,sys, DLFCN
0012 import optparse
0013
0014
0015 from pluginCondDBPyInterface import *
0016 from CondCore.Utilities import iovInspector as inspect
0017 from CondCore.Utilities.timeUnitHelper import *
0018
0019
0020 import xmlrpclib
0021
0022
0023 try:
0024 import json
0025 except:
0026 try:
0027 import simplejson as json
0028 except:
0029 print("Please use lxplus or set an environment (for example crab) with json lib available")
0030 sys.exit(1)
0031
0032
0033 print("### command line:")
0034 copyargs = sys.argv[:]
0035 for i in range(len(copyargs)):
0036 if copyargs[i] == "":
0037 copyargs[i] = "\"\""
0038 if copyargs[i].find(" ") != -1:
0039 copyargs[i] = "\"%s\"" % copyargs[i]
0040 commandline = " ".join(copyargs)
0041
0042 print(commandline)
0043 infotofile = ["### %s\n" % commandline]
0044
0045
0046
0047
0048 usage='%prog [options]\n\n'+\
0049 'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
0050
0051 parser=optparse.OptionParser(usage)
0052
0053 parser.add_option("-d", "--alcaDataset",
0054 help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
0055 type="string",
0056
0057
0058 default='',
0059 dest="alcaDataset")
0060
0061 parser.add_option("-m", "--isMC",
0062 help="Whether sample is MC (true) or real data (false).",
0063 type="string",
0064 default="false",
0065 dest="isMC")
0066
0067 parser.add_option("-s", "--startRun",
0068 help="First run number in range.",
0069 type="int",
0070 default=0,
0071 dest="startRun")
0072
0073 parser.add_option("-e", "--endRun",
0074 help="Last run number in range.",
0075 type="int",
0076 default=999999999,
0077 dest="endRun")
0078
0079 parser.add_option("-b", "--minB",
0080 help="Lower limit on minimal B field for a run.",
0081 type="float",
0082
0083 default=0.,
0084 dest="minB")
0085
0086 parser.add_option("--maxB",
0087 help="Upper limit on B field for a run.",
0088 type="float",
0089 default=999.,
0090 dest="maxB")
0091
0092 parser.add_option("-r","--runRegistry",
0093 help="If present, use RunRegistry API for B field and data quality quiery",
0094 action="store_true",
0095 default=False,
0096 dest="runRegistry")
0097
0098 parser.add_option("-j","--json",
0099 help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
0100 "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
0101 type="string",
0102 default="",
0103 dest="json")
0104
0105 parser.add_option("-t", "--dbTag",
0106 help="Runinfo DB tag to use.",
0107 type="string",
0108 default="runinfo_31X_hlt",
0109 dest="dbTag")
0110
0111 parser.add_option("--printTags",
0112 help="If present, the only thing script will do is printing list of tags in the DB",
0113 action="store_true",
0114 default=False,
0115 dest="printTags")
0116
0117 parser.add_option("--dbName",
0118 help="RunInfo DB name to use. The default one is "+\
0119 "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
0120 type="string",
0121 default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
0122 dest="dbName")
0123
0124 parser.add_option("--dqDataset",
0125 help="Dataset name to query for good data quality runs. "+\
0126 "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
0127 "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
0128 "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
0129 type="string",
0130
0131
0132 default="",
0133 dest="dqDataset")
0134
0135 parser.add_option("-c", "--dqCriteria",
0136 help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
0137 "An example of a really strict condition:\n"
0138 "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
0139 "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
0140 "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
0141 type="string",
0142
0143
0144
0145 default="",
0146 dest="dqCriteria")
0147
0148 parser.add_option("-o", "--outputFile",
0149 help="Name for output file (please include the .py suffix)",
0150 type="string",
0151 default="filelist.py",
0152 dest="outputFile")
0153
0154 parser.add_option("-v", "--verbose",
0155 help="Degree of debug info verbosity",
0156 type="int",
0157 default=0,
0158 dest="verbose")
0159
0160 options,args=parser.parse_args()
0161
0162
0163
0164
0165
0166
0167
0168 if options.alcaDataset=='' and not options.printTags:
0169 print("--alcaDataset /your/dataset/name is required!")
0170 sys.exit()
0171
0172 if options.dqDataset=='':
0173 options.dqDataset = options.alcaDataset
0174
0175 if not (options.isMC=='true' or options.isMC=='false'):
0176 print("--isMC option can have only 'true' or 'false' arguments")
0177 sys.exit()
0178
0179 v = options.verbose
0180
0181 minI = options.minB*18160/3.8
0182 maxI = options.maxB*18160/3.8
0183
0184
0185 rr = ''
0186 if options.runRegistry: rr = ' --runRegistry'
0187
0188 jj = ''
0189 if options.json!='': jj = ' --json '+options.json
0190
0191 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
0192 ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
0193 ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
0194 ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
0195 ' --outputFile ' + options.outputFile
0196
0197 print("### all options, including default:")
0198 print(allOptions)
0199
0200
0201
0202
0203
0204
0205
0206
0207 def getGoodBRuns():
0208
0209 runs_b_on = []
0210
0211 sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
0212
0213 a = FWIncantation()
0214
0215 rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
0216
0217 db = rdbms.getDB(options.dbName)
0218 tags = db.allTags()
0219
0220 if options.printTags:
0221 print("\nOverview of all tags in "+options.dbName+" :\n")
0222 print(tags)
0223 print("\n")
0224 sys.exit()
0225
0226
0227
0228 tag = options.dbTag
0229
0230
0231
0232
0233 try :
0234
0235
0236
0237
0238
0239 iov = inspect.Iov(db,tag)
0240
0241
0242
0243 if v>1 :
0244 print("######## summries ########")
0245 for x in iov.summaries():
0246 print(x[0], x[1], x[2] ,x[3])
0247
0248 what={}
0249
0250 if v>1 :
0251 print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
0252 print(iov.trend(what))
0253
0254 if v>0:
0255 print("######## trends ########")
0256 for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
0257 if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
0258 print(x[0],x[1] ,x[2], x[2][4], x[2][3])
0259
0260 if x[2][4] >= minI and x[2][3] <= maxI:
0261 runs_b_on.append(int(x[0]))
0262
0263 except Exception as er :
0264 print(er)
0265
0266 print("### runs with good B field ###")
0267 print(runs_b_on)
0268
0269 return runs_b_on
0270
0271
0272
0273
0274
0275 def getGoodQRuns():
0276
0277 runs_good_dq = []
0278
0279 dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
0280 print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
0281
0282 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
0283
0284
0285
0286 ff = open('/tmp/runs_full_of_pink_bunnies', "r")
0287 line = ff.readline()
0288 while line and line!='':
0289 runs_good_dq.append(int(line))
0290 line = ff.readline()
0291 ff.close()
0292
0293 os.system('rm /tmp/runs_full_of_pink_bunnies')
0294
0295 print("### runs with good quality ###")
0296 print(runs_good_dq)
0297
0298 return runs_good_dq
0299
0300
0301
0302
0303
0304
0305 def getRunRegistryGoodRuns():
0306
0307 server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
0308
0309 rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
0310 " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
0311 if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
0312
0313 rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
0314 rrstr = rrstr.replace("bfield","'bfield'")
0315 rrstr = rrstr.replace("events","'events'")
0316 rrdata = eval(rrstr)
0317
0318 runs_good = []
0319 for rr in rrdata['events']: runs_good.append(rr[0])
0320
0321 return runs_good
0322
0323
0324
0325
0326 def getJSONGoodRuns():
0327
0328
0329 jsonfile=file(options.json,'r')
0330 jsondict = json.load(jsonfile)
0331
0332 runs_good = []
0333 for run in jsondict.keys(): runs_good.append(int(run))
0334 runs_good.sort()
0335
0336
0337
0338
0339
0340
0341
0342 return runs_good
0343
0344
0345
0346
0347 runs_b_on = []
0348
0349 if options.isMC=='false' and not options.runRegistry and options.json=='':
0350 runs_b_on = getGoodBRuns()
0351
0352 infotofile.append("### runs with good B field ###\n")
0353 infotofile.append("### %s\n" % str(runs_b_on))
0354
0355
0356
0357
0358 runs_good_dq = []
0359 runs_good = []
0360
0361 if options.isMC=='false' and not options.runRegistry and options.json=='':
0362 runs_good_dq = getGoodQRuns()
0363
0364 infotofile.append("### runs with good quality ###\n")
0365 infotofile.append("### %s\n" % str(runs_good_dq))
0366
0367
0368 runs_good = [val for val in runs_b_on if val in runs_good_dq]
0369
0370 print("### runs with good B field and quality ###")
0371 print(runs_good)
0372
0373 infotofile.append("### runs with good B field and quality ###\n")
0374 infotofile.append("### %s\n" % str(runs_good))
0375
0376
0377
0378
0379 if options.isMC=='false' and options.runRegistry and options.json=='':
0380 runs_good = getRunRegistryGoodRuns()
0381 print("### runs with good B field and quality ###")
0382 print(runs_good)
0383
0384
0385
0386
0387
0388
0389
0390 if options.isMC=='false' and options.json!='':
0391 runs_good = getJSONGoodRuns()
0392 print("### good runs from JSON file ###")
0393 print(runs_good)
0394
0395
0396
0397
0398 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
0399
0400
0401 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
0402
0403 list_of_files = []
0404 list_of_runs = []
0405 list_of_numevents = []
0406 total_numevents = 0
0407
0408 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
0409 for line in ff:
0410 (run, numevents, fname) = line.split(' ')
0411 if options.isMC=='false' and (int(run) not in runs_good):
0412 continue
0413 fname = fname.rstrip('\n')
0414 list_of_files.append(fname)
0415 list_of_runs.append(int(run))
0416 list_of_numevents.append(numevents)
0417 total_numevents += int(numevents)
0418 ff.close()
0419
0420
0421 uniq_list_of_runs = sorted(set(list_of_runs))
0422
0423 print("### list of runs with good B field and quality in the dataset: ###")
0424 print(uniq_list_of_runs)
0425 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
0426 infotofile.append("### %s\n" % str(uniq_list_of_runs))
0427
0428
0429
0430 files_events = list(zip(list_of_files, list_of_numevents))
0431 unique_files_events = list(set(files_events))
0432 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
0433 total_numevents = sum( map(int, list_of_numevents) )
0434
0435 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0436
0437 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0438
0439
0440
0441
0442
0443 size = len(list_of_files)
0444
0445
0446
0447
0448 ff = open(options.outputFile,'w')
0449 ff.write("".join(infotofile))
0450 ff.write("\nfileNames = [\n")
0451 comma = ","
0452 for i in range(0,size):
0453 if i==size-1:
0454 comma=""
0455
0456 ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
0457 ff.write(']\n')
0458 ff.close()
0459