MuonAlignmentAlgorithms/scripts/findQualityFiles.py

0001 #! /usr/bin/env python3
0002
0003 ######################################################
0004 ### See documentation at
0005 ### https://twiki.cern.ch/twiki/bin/view/CMS/FindQualityFilesPy
0006 ### also run it with -h option
0007 ######################################################
0008
0009 from builtins import range
0010 import os,sys, DLFCN
0011 import optparse
0012
0013 # for RunInfo API
0014 from pluginCondDBPyInterface import *
0015 from CondCore.Utilities import iovInspector as inspect
0016 from CondCore.Utilities.timeUnitHelper import *
0017
0018 # for RunRegistry API
0019 import xmlrpclib
0020
0021 # for json support
0022 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
0023     import json
0024 except:
0025     try:
0026         import simplejson as json
0027     except:
0028         print("Please use lxplus or set an environment (for example crab) with json lib available")
0029         sys.exit(1)
0030
0031 ######################################################
0032 print("### command line:")
0033 copyargs = sys.argv[:]
0034 for i in range(len(copyargs)):
0035   if copyargs[i] == "":
0036     copyargs[i] = "\"\""
0037   if copyargs[i].find(" ") != -1:
0038     copyargs[i] = "\"%s\"" % copyargs[i]
0039 commandline = " ".join(copyargs)
0040
0041 print(commandline)
0042 infotofile = ["### %s\n" % commandline]
0043
0044 ######################################################
0045 # To parse commandline args
0046
0047 usage='%prog [options]\n\n'+\
0048     'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
0049
0050 parser=optparse.OptionParser(usage)
0051
0052 parser.add_option("-d", "--alcaDataset",
0053                    help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
0054                    type="string",
0055                    #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
0056                    #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
0057                    default='',
0058                    dest="alcaDataset")
0059
0060 parser.add_option("-m", "--isMC",
0061                    help="Whether sample is MC (true) or real data (false).",
0062                    type="string",
0063                    default="false",
0064                    dest="isMC")
0065
0066 parser.add_option("-s", "--startRun",
0067                    help="First run number in range.",
0068                    type="int",
0069                    default=0,
0070                    dest="startRun")
0071
0072 parser.add_option("-e", "--endRun",
0073                    help="Last run number in range.",
0074                    type="int",
0075                    default=999999999,
0076                    dest="endRun")
0077
0078 parser.add_option("-b", "--minB",
0079                    help="Lower limit on minimal B field for a run.",
0080                    type="float",
0081                    #default=3.77,
0082                    default=0.,
0083                    dest="minB")
0084
0085 parser.add_option("--maxB",
0086                    help="Upper limit on B field for a run.",
0087                    type="float",
0088                    default=999.,
0089                    dest="maxB")
0090
0091 parser.add_option("-r","--runRegistry",
0092                    help="If present, use RunRegistry API for B field and data quality quiery",
0093                    action="store_true",
0094                    default=False,
0095                    dest="runRegistry")
0096
0097 parser.add_option("-j","--json",
0098                    help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
0099                    "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
0100                    type="string",
0101                    default="",
0102                    dest="json")
0103
0104 parser.add_option("-t", "--dbTag",
0105                    help="Runinfo DB tag to use.",
0106                    type="string",
0107                    default="runinfo_31X_hlt",
0108                    dest="dbTag")
0109
0110 parser.add_option("--printTags",
0111                    help="If present, the only thing script will do is printing list of tags in the DB",
0112                    action="store_true",
0113                    default=False,
0114                    dest="printTags")
0115
0116 parser.add_option("--dbName",
0117                    help="RunInfo DB name to use. The default one is "+\
0118                    "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
0119                    type="string",
0120                    default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
0121                    dest="dbName")
0122
0123 parser.add_option("--dqDataset",
0124                    help="Dataset name to query for good data quality runs. "+\
0125                    "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
0126                    "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
0127                    "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
0128                    type="string",
0129                    #default="/Cosmics/Commissioning08-v1/RAW",
0130                    #default="/Cosmics/CRAFT09-v1/RAW",
0131                    default="",
0132                    dest="dqDataset")
0133
0134 parser.add_option("-c", "--dqCriteria",
0135                    help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
0136                    "An example of a really strict condition:\n"
0137                    "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
0138                    "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
0139                    "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
0140                    type="string",
0141                    #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
0142                    #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
0143                    #default="DT_Shift_Offline=GOOD",
0144                    default="",
0145                    dest="dqCriteria")
0146
0147 parser.add_option("-o", "--outputFile",
0148                    help="Name for output file (please include the .py suffix)",
0149                    type="string",
0150                    default="filelist.py",
0151                    dest="outputFile")
0152
0153 parser.add_option("-v", "--verbose",
0154                    help="Degree of debug info verbosity",
0155                    type="int",
0156                    default=0,
0157                    dest="verbose")
0158
0159 options,args=parser.parse_args()
0160
0161 #if '' in (options.infilename,
0162 #          options.outfilename,
0163 #          options.outputCommands):
0164 #    raise ('Incomplete list of arguments!')
0165
0166
0167 if options.alcaDataset=='' and not options.printTags:
0168     print("--alcaDataset /your/dataset/name is required!")
0169     sys.exit()
0170
0171 if options.dqDataset=='':
0172     options.dqDataset = options.alcaDataset
0173
0174 if not (options.isMC=='true' or options.isMC=='false'):
0175     print("--isMC option can have only 'true' or 'false' arguments")
0176     sys.exit()
0177
0178 v = options.verbose
0179
0180 minI = options.minB*18160/3.8
0181 maxI = options.maxB*18160/3.8
0182
0183
0184 rr = ''
0185 if options.runRegistry: rr = ' --runRegistry'
0186
0187 jj = ''
0188 if options.json!='': jj = ' --json '+options.json
0189
0190 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
0191              ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
0192              ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
0193              ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
0194              ' --outputFile ' + options.outputFile
0195
0196 print("### all options, including default:")
0197 print(allOptions)
0198
0199
0200 ######################################################
0201 # functions definitions
0202
0203
0204 #########################
0205 # get good B field runs from RunInfo DB
0206 def getGoodBRuns():
0207
0208     runs_b_on = []
0209
0210     sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
0211
0212     a = FWIncantation()
0213     #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
0214     rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
0215
0216     db = rdbms.getDB(options.dbName)
0217     tags = db.allTags()
0218
0219     if options.printTags:
0220         print("\nOverview of all tags in "+options.dbName+" :\n")
0221         print(tags)
0222         print("\n")
0223         sys.exit()
0224
0225     # for inspecting last run after run has started
0226     #tag = 'runinfo_31X_hlt'
0227     tag = options.dbTag
0228
0229     # for inspecting last run after run has stopped
0230     #tag = 'runinfo_test'
0231
0232     try :
0233         #log = db.lastLogEntry(tag)
0234
0235         #for printing all log info present into log db
0236         #print log.getState()
0237
0238         iov = inspect.Iov(db,tag)
0239         #print "########overview of tag "+tag+"########"
0240         #print iov.list()
0241
0242         if v>1 :
0243             print("######## summries ########")
0244             for x in  iov.summaries():
0245                 print(x[0], x[1], x[2] ,x[3])
0246
0247         what={}
0248
0249         if v>1 :
0250             print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
0251             print(iov.trend(what))
0252
0253         if v>0:
0254             print("######## trends ########")
0255         for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
0256             if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
0257                 print(x[0],x[1] ,x[2], x[2][4], x[2][3])
0258                 #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
0259             if x[2][4] >= minI and x[2][3] <= maxI:
0260                 runs_b_on.append(int(x[0]))
0261
0262     except Exception as er :
0263         print(er)
0264
0265     print("### runs with good B field ###")
0266     print(runs_b_on)
0267
0268     return runs_b_on
0269
0270
0271 #########################
0272 # obtaining list of good quality runs
0273
0274 def getGoodQRuns():
0275
0276     runs_good_dq = []
0277
0278     dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
0279     print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
0280
0281     os.system('python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
0282
0283     #print 'python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
0284
0285     ff = open('/tmp/runs_full_of_pink_bunnies', "r")
0286     line = ff.readline()
0287     while line and line!='':
0288         runs_good_dq.append(int(line))
0289         line = ff.readline()
0290     ff.close()
0291
0292     os.system('rm /tmp/runs_full_of_pink_bunnies')
0293
0294     print("### runs with good quality ###")
0295     print(runs_good_dq)
0296
0297     return runs_good_dq
0298
0299 #########################
0300 # obtaining list of good B and quality runs from Run Registry
0301 # https://twiki.cern.ch/twiki/bin/view/CMS/DqmRrApi
0302 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/DQMRunRegistry
0303
0304 def getRunRegistryGoodRuns():
0305
0306     server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
0307
0308     rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
0309                 " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
0310     if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
0311
0312     rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
0313     rrstr = rrstr.replace("bfield","'bfield'")
0314     rrstr = rrstr.replace("events","'events'")
0315     rrdata = eval(rrstr)
0316
0317     runs_good = []
0318     for rr in rrdata['events']: runs_good.append(rr[0])
0319
0320     return runs_good
0321
0322 #########################
0323 # obtain a list of good runs from JSON file
0324
0325 def getJSONGoodRuns():
0326
0327     # read json file
0328     jsonfile=file(options.json,'r')
0329     jsondict = json.load(jsonfile)
0330
0331     runs_good = []
0332     for run in jsondict.keys(): runs_good.append(int(run))
0333     runs_good.sort()
0334
0335     #mruns=[]
0336     #for run in jsondict.keys():
0337     #  if int(run)<144115 and int(run)>136034: mruns.append(int(run))
0338     #mruns.sort()
0339     #print len(mruns),"runs in \n",mruns
0340
0341     return runs_good
0342
0343 ######################################################
0344 # get good B field runs from RunInfo DB
0345
0346 runs_b_on = []
0347
0348 if options.isMC=='false' and not options.runRegistry and options.json=='':
0349     runs_b_on = getGoodBRuns()
0350
0351     infotofile.append("### runs with good B field ###\n")
0352     infotofile.append("### %s\n" % str(runs_b_on))
0353
0354 ######################################################
0355 # Add requiremment of good quality runs
0356
0357 runs_good_dq = []
0358 runs_good = []
0359
0360 if options.isMC=='false' and not options.runRegistry and options.json=='':
0361     runs_good_dq = getGoodQRuns()
0362
0363     infotofile.append("### runs with good quality ###\n")
0364     infotofile.append("### %s\n" % str(runs_good_dq))
0365
0366     # find intersection of runs_b_on and runs_good_dq
0367     runs_good = [val for val in runs_b_on if val in runs_good_dq]
0368
0369     print("### runs with good B field and quality ###")
0370     print(runs_good)
0371
0372     infotofile.append("### runs with good B field and quality ###\n")
0373     infotofile.append("### %s\n" % str(runs_good))
0374
0375 ######################################################
0376 # use run registry API is specified
0377
0378 if options.isMC=='false' and options.runRegistry and options.json=='':
0379     runs_good = getRunRegistryGoodRuns()
0380     print("### runs with good B field and quality ###")
0381     print(runs_good)
0382
0383     #infotofile.append("### runs with good B field and quality ###\n")
0384     #infotofile.append("### %s\n" % str(runs_good))
0385
0386 ######################################################
0387 # use JSON file if specified
0388
0389 if options.isMC=='false' and options.json!='':
0390     runs_good = getJSONGoodRuns()
0391     print("### good runs from JSON file ###")
0392     print(runs_good)
0393
0394 ######################################################
0395 # Find files for good runs
0396
0397 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
0398 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
0399
0400 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
0401
0402 list_of_files = []
0403 list_of_runs = []
0404 list_of_numevents = []
0405 total_numevents = 0
0406
0407 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
0408 for line in ff:
0409     (run, numevents, fname) = line.split('   ')
0410     if options.isMC=='false' and (int(run) not in runs_good):
0411         continue
0412     fname = fname.rstrip('\n')
0413     list_of_files.append(fname)
0414     list_of_runs.append(int(run))
0415     list_of_numevents.append(numevents)
0416     total_numevents += int(numevents)
0417 ff.close()
0418 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
0419
0420 uniq_list_of_runs = sorted(set(list_of_runs))
0421
0422 print("### list of runs with good B field and quality in the dataset: ###")
0423 print(uniq_list_of_runs)
0424 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
0425 infotofile.append("### %s\n" % str(uniq_list_of_runs))
0426
0427
0428 # prevent against duplication due to the fact now a file can have events from several runs
0429 files_events = list(zip(list_of_files, list_of_numevents))
0430 unique_files_events = list(set(files_events))
0431 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
0432 total_numevents = sum( map(int, list_of_numevents) )
0433
0434 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0435
0436 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0437
0438 ######################################################
0439 # Write out results
0440
0441 # ff = open(options.outputFile+'.txt','w')
0442 size = len(list_of_files)
0443 # for i in range(0,size):
0444 #     ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
0445 # ff.close()
0446
0447 ff = open(options.outputFile,'w')
0448 ff.write("".join(infotofile))
0449 ff.write("\nfileNames = [\n")
0450 comma = ","
0451 for i in range(0,size):
0452     if i==size-1:
0453         comma=""
0454     #ff.write("    '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
0455     ff.write("    '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
0456 ff.write(']\n')
0457 ff.close()
0458