Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:56:47

0001 #! /usr/bin/env python3
0002 
0003 ######################################################
0004 ### See documentation at
0005 ### https://twiki.cern.ch/twiki/bin/view/CMS/FindQualityFilesPy
0006 ### also run it with -h option
0007 ######################################################
0008 
0009 from __future__ import print_function
0010 from builtins import range
0011 import os,sys, DLFCN
0012 import optparse
0013 
0014 # for RunInfo API
0015 from pluginCondDBPyInterface import *
0016 from CondCore.Utilities import iovInspector as inspect
0017 from CondCore.Utilities.timeUnitHelper import *
0018 
0019 # for RunRegistry API
0020 import xmlrpclib
0021 
0022 # for json support
0023 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
0024     import json
0025 except:
0026     try:
0027         import simplejson as json
0028     except:
0029         print("Please use lxplus or set an environment (for example crab) with json lib available")
0030         sys.exit(1)
0031 
0032 ######################################################
0033 print("### command line:")
0034 copyargs = sys.argv[:]
0035 for i in range(len(copyargs)):
0036   if copyargs[i] == "":
0037     copyargs[i] = "\"\""
0038   if copyargs[i].find(" ") != -1:
0039     copyargs[i] = "\"%s\"" % copyargs[i]
0040 commandline = " ".join(copyargs)
0041 
0042 print(commandline)
0043 infotofile = ["### %s\n" % commandline]
0044 
0045 ######################################################
0046 # To parse commandline args
0047 
0048 usage='%prog [options]\n\n'+\
0049     'Creates a Python configuration file with filenames for runs in specified run range, with certain min B field and data quality requirements.'
0050 
0051 parser=optparse.OptionParser(usage)
0052 
0053 parser.add_option("-d", "--alcaDataset",
0054                    help="[REQUIRED] Name of the input AlCa dataset to get filenames from.",
0055                    type="string",
0056                    #default="/Cosmics/Commissioning08-2213_Tosca090322_2pi_scaled_ReReco_FromTrackerPointing-v1/RAW-RECO",
0057                    #default="/Cosmics/Commissioning08_CRAFT_ALL_V11_StreamALCARECOMuAlGlobalCosmics_227_Tosca090216_ReReco_FromTrackerPointing_v5/ALCARECO",
0058                    default='',
0059                    dest="alcaDataset")
0060 
0061 parser.add_option("-m", "--isMC",
0062                    help="Whether sample is MC (true) or real data (false).",
0063                    type="string",
0064                    default="false",
0065                    dest="isMC")
0066 
0067 parser.add_option("-s", "--startRun",
0068                    help="First run number in range.",
0069                    type="int",
0070                    default=0,
0071                    dest="startRun")
0072 
0073 parser.add_option("-e", "--endRun",
0074                    help="Last run number in range.",
0075                    type="int",
0076                    default=999999999,
0077                    dest="endRun")
0078 
0079 parser.add_option("-b", "--minB",
0080                    help="Lower limit on minimal B field for a run.",
0081                    type="float",
0082                    #default=3.77,
0083                    default=0.,
0084                    dest="minB")
0085 
0086 parser.add_option("--maxB",
0087                    help="Upper limit on B field for a run.",
0088                    type="float",
0089                    default=999.,
0090                    dest="maxB")
0091 
0092 parser.add_option("-r","--runRegistry",
0093                    help="If present, use RunRegistry API for B field and data quality quiery",
0094                    action="store_true",
0095                    default=False,
0096                    dest="runRegistry")
0097 
0098 parser.add_option("-j","--json",
0099                    help="If present with JSON file as argument, use JSON file for the good runs and ignore B field and --runRegistry options. "+\
0100                    "The latest JSON file is available at /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions10/7TeV/StreamExpress/",
0101                    type="string",
0102                    default="",
0103                    dest="json")
0104 
0105 parser.add_option("-t", "--dbTag",
0106                    help="Runinfo DB tag to use.",
0107                    type="string",
0108                    default="runinfo_31X_hlt",
0109                    dest="dbTag")
0110 
0111 parser.add_option("--printTags",
0112                    help="If present, the only thing script will do is printing list of tags in the DB",
0113                    action="store_true",
0114                    default=False,
0115                    dest="printTags")
0116 
0117 parser.add_option("--dbName",
0118                    help="RunInfo DB name to use. The default one is "+\
0119                    "'oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO'",
0120                    type="string",
0121                    default="oracle://cms_orcoff_prod/CMS_COND_31X_RUN_INFO",
0122                    dest="dbName")
0123 
0124 parser.add_option("--dqDataset",
0125                    help="Dataset name to query for good data quality runs. "+\
0126                    "If this option is not used, dqDataset=alcaDataset is automatically set. "+\
0127                    "If alcaDataset does not have DQ information use /Cosmics/Commissioning08-v1/RAW for CRAFT08 "+\
0128                    "and use /Cosmics/CRAFT09-v1/RAW for CRAFT08",
0129                    type="string",
0130                    #default="/Cosmics/Commissioning08-v1/RAW",
0131                    #default="/Cosmics/CRAFT09-v1/RAW",
0132                    default="",
0133                    dest="dqDataset")
0134 
0135 parser.add_option("-c", "--dqCriteria",
0136                    help="Set of DQ criteria to use with -dq flag of dbs.\n"+\
0137                    "An example of a really strict condition:\n"
0138                    "'DT_Shift_Offline=GOOD&CSC_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD'"
0139                    "NOTE: if --runRegistry is used, DQ criteria sintax should be as Advanced query syntax for RR. E.g.:"
0140                    "\"{cmpDt}='GOOD' and {cmpCsc}='GOOD' and {cmpStrip}='GOOD' and {cmpPix}='GOOD'\"",
0141                    type="string",
0142                    #default="DT_Shift_Offline=GOOD&SiStrip_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
0143                    #default="DT_Shift_Offline=GOOD&Pixel_Shift_Offline=GOOD",
0144                    #default="DT_Shift_Offline=GOOD",
0145                    default="",
0146                    dest="dqCriteria")
0147 
0148 parser.add_option("-o", "--outputFile",
0149                    help="Name for output file (please include the .py suffix)",
0150                    type="string",
0151                    default="filelist.py",
0152                    dest="outputFile")
0153 
0154 parser.add_option("-v", "--verbose",
0155                    help="Degree of debug info verbosity",
0156                    type="int",
0157                    default=0,
0158                    dest="verbose")
0159 
0160 options,args=parser.parse_args() 
0161 
0162 #if '' in (options.infilename,
0163 #          options.outfilename,
0164 #          options.outputCommands):
0165 #    raise ('Incomplete list of arguments!')
0166 
0167 
0168 if options.alcaDataset=='' and not options.printTags:
0169     print("--alcaDataset /your/dataset/name is required!")
0170     sys.exit()
0171     
0172 if options.dqDataset=='':
0173     options.dqDataset = options.alcaDataset
0174 
0175 if not (options.isMC=='true' or options.isMC=='false'):
0176     print("--isMC option can have only 'true' or 'false' arguments")
0177     sys.exit()
0178 
0179 v = options.verbose
0180 
0181 minI = options.minB*18160/3.8
0182 maxI = options.maxB*18160/3.8
0183 
0184 
0185 rr = ''
0186 if options.runRegistry: rr = ' --runRegistry'
0187 
0188 jj = ''
0189 if options.json!='': jj = ' --json '+options.json
0190 
0191 allOptions = '### ' + copyargs[0] + ' --alcaDataset ' + options.alcaDataset + ' --isMC ' + options.isMC + \
0192              ' --startRun ' + str(options.startRun) + ' --endRun '+ str(options.endRun) + \
0193              ' --minB ' + str(options.minB) + ' --maxB ' + str(options.maxB) + rr + jj +\
0194              ' --dbTag ' + options.dbTag + ' --dqDataset ' + options.dqDataset + ' --dqCriteria "' + options.dqCriteria + '"'\
0195              ' --outputFile ' + options.outputFile
0196 
0197 print("### all options, including default:")
0198 print(allOptions)
0199 
0200 
0201 ######################################################
0202 # functions definitions
0203 
0204 
0205 #########################
0206 # get good B field runs from RunInfo DB
0207 def getGoodBRuns():
0208 
0209     runs_b_on = []
0210 
0211     sys.setdlopenflags(DLFCN.RTLD_GLOBAL+DLFCN.RTLD_LAZY)
0212 
0213     a = FWIncantation()
0214     #os.putenv("CORAL_AUTH_PATH","/afs/cern.ch/cms/DB/conddb")
0215     rdbms = RDBMS("/afs/cern.ch/cms/DB/conddb")
0216 
0217     db = rdbms.getDB(options.dbName)
0218     tags = db.allTags()
0219 
0220     if options.printTags:
0221         print("\nOverview of all tags in "+options.dbName+" :\n")
0222         print(tags)
0223         print("\n")
0224         sys.exit()
0225 
0226     # for inspecting last run after run has started  
0227     #tag = 'runinfo_31X_hlt'
0228     tag = options.dbTag
0229 
0230     # for inspecting last run after run has stopped  
0231     #tag = 'runinfo_test'
0232 
0233     try :
0234         #log = db.lastLogEntry(tag)
0235 
0236         #for printing all log info present into log db 
0237         #print log.getState()
0238 
0239         iov = inspect.Iov(db,tag)
0240         #print "########overview of tag "+tag+"########"
0241         #print iov.list()
0242     
0243         if v>1 :
0244             print("######## summries ########")
0245             for x in  iov.summaries():
0246                 print(x[0], x[1], x[2] ,x[3])
0247     
0248         what={}
0249     
0250         if v>1 :
0251             print("###(start_current,stop_current,avg_current,max_current,min_current,run_interval_micros) vs runnumber###")
0252             print(iov.trend(what))
0253     
0254         if v>0:
0255             print("######## trends ########")
0256         for x in iov.trendinrange(what,options.startRun-1,options.endRun+1):
0257             if v>0 or x[0]==67647 or x[0]==66893 or x[0]==67264:
0258                 print(x[0],x[1] ,x[2], x[2][4], x[2][3])
0259                 #print x[0],x[1] ,x[2], x[2][4], timeStamptoUTC(x[2][6]), timeStamptoUTC(x[2][7])
0260             if x[2][4] >= minI and x[2][3] <= maxI:
0261                 runs_b_on.append(int(x[0]))
0262 
0263     except Exception as er :
0264         print(er)
0265 
0266     print("### runs with good B field ###")
0267     print(runs_b_on)
0268 
0269     return runs_b_on
0270 
0271 
0272 #########################
0273 # obtaining list of good quality runs
0274 
0275 def getGoodQRuns():
0276 
0277     runs_good_dq = []
0278 
0279     dbs_quiery = "find run where dataset="+options.dqDataset+" and dq="+options.dqCriteria
0280     print('dbs search --noheader --query="'+dbs_quiery+'" | sort')
0281 
0282     os.system('python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies')
0283 
0284     #print 'python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_full_of_pink_bunnies'
0285 
0286     ff = open('/tmp/runs_full_of_pink_bunnies', "r")
0287     line = ff.readline()
0288     while line and line!='':
0289         runs_good_dq.append(int(line))
0290         line = ff.readline()
0291     ff.close()
0292 
0293     os.system('rm /tmp/runs_full_of_pink_bunnies')
0294 
0295     print("### runs with good quality ###")
0296     print(runs_good_dq)
0297 
0298     return runs_good_dq
0299 
0300 #########################
0301 # obtaining list of good B and quality runs from Run Registry
0302 # https://twiki.cern.ch/twiki/bin/view/CMS/DqmRrApi
0303 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/DQMRunRegistry
0304 
0305 def getRunRegistryGoodRuns():
0306 
0307     server = xmlrpclib.ServerProxy('http://pccmsdqm04.cern.ch/runregistry/xmlrpc')
0308     
0309     rr_quiery = "{runNumber}>="+str(options.startRun)+" and {runNumber}<="+str(options.endRun)+\
0310                 " and {bfield}>="+str(options.minB)+" and {bfield}<="+str(options.maxB)
0311     if options.dqCriteria != "": rr_quiery += " and "+options.dqCriteria
0312     
0313     rrstr = server.RunDatasetTable.export('GLOBAL', 'chart_runs_cum_evs_vs_bfield', rr_quiery)
0314     rrstr = rrstr.replace("bfield","'bfield'")
0315     rrstr = rrstr.replace("events","'events'")
0316     rrdata = eval(rrstr)
0317 
0318     runs_good = []
0319     for rr in rrdata['events']: runs_good.append(rr[0])
0320 
0321     return runs_good
0322 
0323 #########################
0324 # obtain a list of good runs from JSON file
0325 
0326 def getJSONGoodRuns():
0327 
0328     # read json file
0329     jsonfile=file(options.json,'r')
0330     jsondict = json.load(jsonfile)
0331 
0332     runs_good = []
0333     for run in jsondict.keys(): runs_good.append(int(run))
0334     runs_good.sort()
0335 
0336     #mruns=[]
0337     #for run in jsondict.keys():
0338     #  if int(run)<144115 and int(run)>136034: mruns.append(int(run))
0339     #mruns.sort()
0340     #print len(mruns),"runs in \n",mruns
0341     
0342     return runs_good
0343 
0344 ######################################################
0345 # get good B field runs from RunInfo DB
0346 
0347 runs_b_on = []
0348 
0349 if options.isMC=='false' and not options.runRegistry and options.json=='':
0350     runs_b_on = getGoodBRuns()
0351 
0352     infotofile.append("### runs with good B field ###\n")
0353     infotofile.append("### %s\n" % str(runs_b_on))
0354 
0355 ######################################################
0356 # Add requiremment of good quality runs
0357 
0358 runs_good_dq = []
0359 runs_good = []
0360 
0361 if options.isMC=='false' and not options.runRegistry and options.json=='':
0362     runs_good_dq = getGoodQRuns()
0363         
0364     infotofile.append("### runs with good quality ###\n")
0365     infotofile.append("### %s\n" % str(runs_good_dq))
0366 
0367     # find intersection of runs_b_on and runs_good_dq
0368     runs_good = [val for val in runs_b_on if val in runs_good_dq]
0369 
0370     print("### runs with good B field and quality ###")
0371     print(runs_good)
0372 
0373     infotofile.append("### runs with good B field and quality ###\n")
0374     infotofile.append("### %s\n" % str(runs_good))
0375 
0376 ######################################################
0377 # use run registry API is specified
0378 
0379 if options.isMC=='false' and options.runRegistry and options.json=='':
0380     runs_good = getRunRegistryGoodRuns()
0381     print("### runs with good B field and quality ###")
0382     print(runs_good)
0383     
0384     #infotofile.append("### runs with good B field and quality ###\n")
0385     #infotofile.append("### %s\n" % str(runs_good))
0386 
0387 ######################################################
0388 # use JSON file if specified
0389 
0390 if options.isMC=='false' and options.json!='':
0391     runs_good = getJSONGoodRuns()
0392     print("### good runs from JSON file ###")
0393     print(runs_good)
0394 
0395 ######################################################
0396 # Find files for good runs
0397 
0398 dbs_quiery = "find run, file.numevents, file where dataset="+options.alcaDataset+" and run>="+str(options.startRun)+" and run<="+str(options.endRun)+" and file.numevents>0"
0399 #print 'dbs search --noheader --query="'+dbs_quiery+'" | sort'
0400 
0401 os.system('python $DBSCMD_HOME/dbsCommandLine.py -c  search --noheader --query="'+dbs_quiery+'" | sort > /tmp/runs_and_files_full_of_pink_bunnies')
0402 
0403 list_of_files = []
0404 list_of_runs = []
0405 list_of_numevents = []
0406 total_numevents = 0
0407 
0408 ff = open('/tmp/runs_and_files_full_of_pink_bunnies','r')
0409 for line in ff:
0410     (run, numevents, fname) = line.split('   ')
0411     if options.isMC=='false' and (int(run) not in runs_good):
0412         continue
0413     fname = fname.rstrip('\n')
0414     list_of_files.append(fname)
0415     list_of_runs.append(int(run))
0416     list_of_numevents.append(numevents)
0417     total_numevents += int(numevents)
0418 ff.close()
0419 #os.system('rm /tmp/runs_and_files_full_of_pink_bunnies')
0420 
0421 uniq_list_of_runs = sorted(set(list_of_runs))
0422 
0423 print("### list of runs with good B field and quality in the dataset: ###")
0424 print(uniq_list_of_runs)
0425 infotofile.append("### list of runs with good B field and quality in the dataset: ###\n")
0426 infotofile.append("### %s\n" % str(uniq_list_of_runs))
0427 
0428 
0429 # prevent against duplication due to the fact now a file can have events from several runs
0430 files_events = list(zip(list_of_files, list_of_numevents))
0431 unique_files_events = list(set(files_events))
0432 list_of_files, list_of_numevents = map(list, list(zip(*unique_files_events)))
0433 total_numevents = sum( map(int, list_of_numevents) )
0434 
0435 print("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0436 
0437 infotofile.append("### total number of events in those "+str(len(uniq_list_of_runs))+" runs = "+str(total_numevents))
0438 
0439 ######################################################
0440 # Write out results
0441 
0442 # ff = open(options.outputFile+'.txt','w')
0443 size = len(list_of_files)
0444 # for i in range(0,size):
0445 #     ff.write(list_of_runs[i] + ", " + list_of_files[i]+"\n")
0446 # ff.close()
0447 
0448 ff = open(options.outputFile,'w')
0449 ff.write("".join(infotofile))
0450 ff.write("\nfileNames = [\n")
0451 comma = ","
0452 for i in range(0,size):
0453     if i==size-1:
0454         comma=""
0455     #ff.write("    '"+ list_of_files[i] +"'"+comma+" # "+ str(list_of_runs[i]) + "," + list_of_numevents[i] + "\n")
0456     ff.write("    '"+ list_of_files[i] +"'"+comma+" # "+ list_of_numevents[i] + "\n")
0457 ff.write(']\n')
0458 ff.close()
0459