File indexing completed on 2024-11-25 02:29:04
0001
0002
0003
0004
0005
0006
0007
0008
0009 from builtins import range
0010 import os,sys, DLFCN
0011 import optparse
0012
0013
0014 from pluginCondDBPyInterface import *
0015 from CondCore.Utilities import iovInspector as inspect
0016 from CondCore.Utilities.timeUnitHelper import *
0017
0018
0019 import xmlrpclib
0020
0021
0022 try:
0023 import json
0024 except:
0025 try:
0026 import simplejson as json
0027 except:
0028 print("Please use lxplus or set an environment (for example crab) with json lib available")
0029 sys.exit(1)
0030
0031
0032 print("### command line:")
0033 copyargs = sys.argv[:]
0034 for i in range(len(copyargs)):
0035 if copyargs[i] == "":
0036 copyargs[i] = "\"\""
0037 if copyargs[i].find(" ") != -1:
0038 copyargs[i] = "\"%s\"" % copyargs[i]
0039 commandline = " ".join(copyargs)
0040
0041 print(commandline)
0042 infotofile = ["### %s\n" % commandline]
0043
0044
0045
0046
0047 usage='%prog [options]\n\n'+\
0048 'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
0049
0050 parser=optparse.OptionParser(usage)
0051
0052 parser.add_option("-d", "--alcaDataset",
0053 help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
0054 type="string",
0055
0056
0057 default='',
0058 dest="alcaDataset")
0059
0060 parser.add_option("-m", "--isMC",
0061 help="Whether sample is MC (true) or real data (false).",
0062 type="string",
0063 default="false",
0064 dest="isMC")
0065
0066 parser.add_option("-s", "--startRun",
0067 help="First run number in range.",
0068 type="int",
0069 default=0,
0070 dest="startRun")
0071
0072 parser.add_option("-e", "--endRun",
0073 help="Last run number in range.",
0074 type="int",
0075 default=999999999,
0076 dest="endRun")
0077
0078 parser.add_option("-b", "--minB",
0079 help="Lower limit on minimal B field for a run.",
0080 type="float",
0081
0082 default=0.,
0083 dest="minB")
0084
0085 parser.add_option("--maxB",
0086 help="Upper limit on B field for a run.",
0087 type="float",
0088 default=999.,
0089 dest="maxB")
0090
0091 parser.add_option("-r","--runRegistry",
0092 help="If present, use RunRegistry API for B field and data quality quiery",
0093 action="store_true",
0094 default=False,
0095 dest="runRegistry")
0096
0097 parser.add_option("-j","--json",
0098 help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
0099 "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
0100 type="string",
0101 default="",
0102 dest="json")
0103
0104 parser.add_option("-t", "--dbTag",
0105 help="Runinfo DB tag to use.",
0106 type="string",
0107 default="runinfo_31X_hlt",
0108 dest="dbTag")
0109
0110 parser.add_option("--printTags",
0111 help="If present, the only thing script will do is printing list of tags in the DB",
0112 action="store_true",
0113 default=False,
0114 dest="printTags")
0115
0116 parser.add_option("--dbName",
0117 help="RunInfo DB name to use. The default one is "+\
0118 "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
0119 type="string",
0120 default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
0121 dest="dbName")
0122
0123 parser.add_option("--dqDataset",
0124 help="Dataset name to query for good data quality runs. "+\
0125 "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
0126 "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
0127 "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
0128 type="string",
0129
0130
0131 default="",
0132 dest="dqDataset")
0133
0134 parser.add_option("-c", "--dqCriteria",
0135 help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
0136 "An example of a really strict condition:\n"
0137 "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
0138 "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
0139 "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
0140 type="string",
0141
0142
0143
0144 default="",
0145 dest="dqCriteria")
0146
0147 parser.add_option("-o", "--outputFile",
0148 help="Name for output file (please include the .py suffix)",
0149 type="string",
0150 default="filelist.py",
0151 dest="outputFile")
0152
0153 parser.add_option("-v", "--verbose",
0154 help="Degree of debug info verbosity",
0155 type="int",
0156 default=0,
0157 dest="verbose")
0158
0159 options,args=parser.parse_args()
0160
0161
0162
0163
0164
0165
0166
0167 if options.alcaDataset=='' and not options.printTags:
0168 print("--alcaDataset /your/dataset/name is required!")
0169 sys.exit()
0170
0171 if options.dqDataset=='':
0172 options.dqDataset = options.alcaDataset
0173
0174 if not (options.isMC=='true' or options.isMC=='false'):
0175 print("--isMC option can have only 'true' or 'false' arguments")
0176 sys.exit()
0177
0178 v = options.verbose
0179
0180 minI = options.minB*18160/3.8
0181 maxI = options.maxB*18160/3.8
0182
0183
0184 rr = ''
0185 if options.runRegistry: rr = ' --runRegistry'
0186
0187 jj = ''
0188 if options.json!='': jj = ' --json '+options.json
0189
0190 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
0191 ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
0192 ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
0193 ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
0194 ' --outputFile ' + options.outputFile
0195
0196 print("### all options, including default:")
0197 print(allOptions)
0198
0199
0200
0201
0202
0203
0204
0205
0206 def getGoodBRuns():
0207
0208 runs_b_on = []
0209
0210 sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
0211
0212 a = FWIncantation()
0213
0214 rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
0215
0216 db = rdbms.getDB(options.dbName)
0217 tags = db.allTags()
0218
0219 if options.printTags:
0220 print("\nOverview of all tags in "+options.dbName+" :\n")
0221 print(tags)
0222 print("\n")
0223 sys.exit()
0224
0225
0226
0227 tag = options.dbTag
0228
0229
0230
0231
0232 try :
0233
0234
0235
0236
0237
0238 iov = inspect.Iov(db,tag)
0239
0240
0241
0242 if v>1 :
0243 print("######## summries ########")
0244 for x in iov.summaries():
0245 print(x[0], x[1], x[2] ,x[3])
0246
0247 what={}
0248
0249 if v>1 :
0250 print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
0251 print(iov.trend(what))
0252
0253 if v>0:
0254 print("######## trends ########")
0255 for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
0256 if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
0257 print(x[0],x[1] ,x[2], x[2][4], x[2][3])
0258
0259 if x[2][4] >= minI and x[2][3] <= maxI:
0260 runs_b_on.append(int(x[0]))
0261
0262 except Exception as er :
0263 print(er)
0264
0265 print("### runs with good B field ###")
0266 print(runs_b_on)
0267
0268 return runs_b_on
0269
0270
0271
0272
0273
0274 def getGoodQRuns():
0275
0276 runs_good_dq = []
0277
0278 dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
0279 print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
0280
0281 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
0282
0283
0284
0285 ff = open('/tmp/runs_full_of_pink_bunnies', "r")
0286 line = ff.readline()
0287 while line and line!='':
0288 runs_good_dq.append(int(line))
0289 line = ff.readline()
0290 ff.close()
0291
0292 os.system('rm /tmp/runs_full_of_pink_bunnies')
0293
0294 print("### runs with good quality ###")
0295 print(runs_good_dq)
0296
0297 return runs_good_dq
0298
0299
0300
0301
0302
0303
0304 def getRunRegistryGoodRuns():
0305
0306 server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
0307
0308 rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
0309 " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
0310 if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
0311
0312 rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
0313 rrstr = rrstr.replace("bfield","'bfield'")
0314 rrstr = rrstr.replace("events","'events'")
0315 rrdata = eval(rrstr)
0316
0317 runs_good = []
0318 for rr in rrdata['events']: runs_good.append(rr[0])
0319
0320 return runs_good
0321
0322
0323
0324
0325 def getJSONGoodRuns():
0326
0327
0328 jsonfile=file(options.json,'r')
0329 jsondict = json.load(jsonfile)
0330
0331 runs_good = []
0332 for run in jsondict.keys(): runs_good.append(int(run))
0333 runs_good.sort()
0334
0335
0336
0337
0338
0339
0340
0341 return runs_good
0342
0343
0344
0345
0346 runs_b_on = []
0347
0348 if options.isMC=='false' and not options.runRegistry and options.json=='':
0349 runs_b_on = getGoodBRuns()
0350
0351 infotofile.append("### runs with good B field ###\n")
0352 infotofile.append("### %s\n" % str(runs_b_on))
0353
0354
0355
0356
0357 runs_good_dq = []
0358 runs_good = []
0359
0360 if options.isMC=='false' and not options.runRegistry and options.json=='':
0361 runs_good_dq = getGoodQRuns()
0362
0363 infotofile.append("### runs with good quality ###\n")
0364 infotofile.append("### %s\n" % str(runs_good_dq))
0365
0366
0367 runs_good = [val for val in runs_b_on if val in runs_good_dq]
0368
0369 print("### runs with good B field and quality ###")
0370 print(runs_good)
0371
0372 infotofile.append("### runs with good B field and quality ###\n")
0373 infotofile.append("### %s\n" % str(runs_good))
0374
0375
0376
0377
0378 if options.isMC=='false' and options.runRegistry and options.json=='':
0379 runs_good = getRunRegistryGoodRuns()
0380 print("### runs with good B field and quality ###")
0381 print(runs_good)
0382
0383
0384
0385
0386
0387
0388
0389 if options.isMC=='false' and options.json!='':
0390 runs_good = getJSONGoodRuns()
0391 print("### good runs from JSON file ###")
0392 print(runs_good)
0393
0394
0395
0396
0397 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
0398
0399
0400 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
0401
0402 list_of_files = []
0403 list_of_runs = []
0404 list_of_numevents = []
0405 total_numevents = 0
0406
0407 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
0408 for line in ff:
0409 (run, numevents, fname) = line.split(' ')
0410 if options.isMC=='false' and (int(run) not in runs_good):
0411 continue
0412 fname = fname.rstrip('\n')
0413 list_of_files.append(fname)
0414 list_of_runs.append(int(run))
0415 list_of_numevents.append(numevents)
0416 total_numevents += int(numevents)
0417 ff.close()
0418
0419
0420 uniq_list_of_runs = sorted(set(list_of_runs))
0421
0422 print("### list of runs with good B field and quality in the dataset: ###")
0423 print(uniq_list_of_runs)
0424 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
0425 infotofile.append("### %s\n" % str(uniq_list_of_runs))
0426
0427
0428
0429 files_events = list(zip(list_of_files, list_of_numevents))
0430 unique_files_events = list(set(files_events))
0431 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
0432 total_numevents = sum( map(int, list_of_numevents) )
0433
0434 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0435
0436 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0437
0438
0439
0440
0441
0442 size = len(list_of_files)
0443
0444
0445
0446
0447 ff = open(options.outputFile,'w')
0448 ff.write("".join(infotofile))
0449 ff.write("\nfileNames = [\n")
0450 comma = ","
0451 for i in range(0,size):
0452 if i==size-1:
0453 comma=""
0454
0455 ff.write(" '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
0456 ff.write(']\n')
0457 ff.close()
0458