BeamSpotProducer/scripts/BeamSpotWorkflow.py

0001 #!/usr/bin/env python3
0002 #____________________________________________________________
0003 #
0004 #  BeamSpotWorkflow
0005 #
0006 # A very complicate way to automatize the beam spot workflow
0007 #
0008 # Francisco Yumiceva, Lorenzo Uplegger
0009 # yumiceva@fnal.gov, uplegger@fnal.gov
0010 #
0011 # Fermilab, 2010
0012 #
0013 #____________________________________________________________
0014
0015 """
0016    BeamSpotWorkflow.py
0017
0018    A very complicate script to upload the results into the DB
0019
0020    usage: %prog -d <data file/directory> -t <tag name>
0021    -c, --cfg = CFGFILE : Use a different configuration file than the default
0022    -l, --lock = LOCK   : Create a lock file to have just one script running
0023    -o, --overwrite     : Overwrite results files when copying.
0024    -T, --Test          : Upload files to Test dropbox for data validation.
0025    -u, --upload        : Upload files to offline drop box via scp.
0026    -z, --zlarge        : Enlarge sigmaZ to 10 +/- 0.005 cm.
0027
0028    Francisco Yumiceva (yumiceva@fnal.gov)
0029    Lorenzo Uplegger   (send an email to Francisco)
0030    Fermilab 2010
0031
0032 """
0033
0034
0035 from builtins import range
0036 import sys,os
0037 import subprocess, re, time
0038 import datetime
0039 import configparser as ConfigParser
0040 import xmlrpclib
0041 from BeamSpotObj import BeamSpot
0042 from IOVObj import IOV
0043 from CommonMethods import *
0044
0045 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
0046     import json
0047 except:
0048     try:
0049         import simplejson as json
0050     except:
0051         error = "Please set a crab environment in order to get the proper JSON lib"
0052         exit(error)
0053
0054 #####################################################################################
0055 # General functions
0056 #####################################################################################
0057 def getLastUploadedIOV(tagName,destDB="oracle://cms_orcoff_prod/CMS_COND_31X_BEAMSPOT"):
0058     #return 582088327592295
0059     listIOVCommand = "cmscond_list_iov -c " + destDB + " -P /afs/cern.ch/cms/DB/conddb -t " + tagName
0060     dbError = subprocess.getstatusoutput( listIOVCommand )
0061     if dbError[0] != 0 :
0062         if dbError[1].find("metadata entry \"" + tagName + "\" does not exist") != -1:
0063             print("Creating a new tag because I got the following error contacting the DB")
0064             print(dbError[1])
0065             return 1
0066             #return 133928
0067         else:
0068             exit("ERROR: Can\'t connect to db because:\n" + dbError[1])
0069
0070
0071     aCommand = listIOVCommand + " | grep DB= | tail -1 | awk \'{print $1}\'"
0072     output = subprocess.getstatusoutput( aCommand )
0073
0074     #WARNING when we pass to lumi IOV this should be long long
0075     if output[1] == '':
0076         exit("ERROR: The tag " + tagName + " exists but I can't get the value of the last IOV")
0077
0078     return long(output[1])
0079
0080 ########################################################################
0081 def getListOfFilesToProcess(dataSet,lastRun=-1):
0082     queryCommand = "dbs --search --query \"find file where dataset=" + dataSet
0083     if lastRun != -1:
0084         queryCommand = queryCommand + " and run > " + str(lastRun)
0085     queryCommand = queryCommand + "\" | grep .root"
0086 #    print " >> " + queryCommand
0087     output = subprocess.getstatusoutput( queryCommand )
0088     return output[1].split('\n')
0089
0090 ########################################################################
0091 def getNumberOfFilesToProcessForRun(dataSet,run):
0092     queryCommand = "dbs --search --query \"find file where dataset=" + dataSet + " and run = " + str(run) + "\" | grep .root"
0093     #print " >> " + queryCommand
0094     output = subprocess.getstatusoutput( queryCommand )
0095     if output[0] != 0:
0096         return 0
0097     else:
0098         return len(output[1].split('\n'))
0099
0100 ########################################################################
0101 def getListOfRunsAndLumiFromDBS(dataSet,lastRun=-1):
0102     datasetList = dataSet.split(',')
0103     outputList = []
0104     for data in datasetList:
0105         queryCommand = "dbs --search --query \"find run,lumi where dataset=" + data
0106         if lastRun != -1:
0107             queryCommand = queryCommand + " and run > " + str(lastRun)
0108         queryCommand += "\""
0109         print(" >> " + queryCommand)
0110         output = []
0111         for i in range(0,3):
0112             output = subprocess.getstatusoutput( queryCommand )
0113             if output[0] == 0 and not (output[1].find("ERROR") != -1 or output[1].find("Error") != -1) :
0114                 break
0115         if output[0] != 0:
0116             exit("ERROR: I can't contact DBS for the following reason:\n" + output[1])
0117         #print output[1]
0118         tmpList = output[1].split('\n')
0119         for file in tmpList:
0120             outputList.append(file)
0121     runsAndLumis = {}
0122     for out in outputList:
0123         regExp = re.search('(\d+)\s+(\d+)',out)
0124         if regExp:
0125             run  = long(regExp.group(1))
0126             lumi = long(regExp.group(2))
0127             if not run in runsAndLumis:
0128                 runsAndLumis[run] = []
0129             runsAndLumis[run].append(lumi)
0130
0131 #    print runsAndLumis
0132 #    exit("ok")
0133     return runsAndLumis
0134
0135 #####################################################################################
0136 def getListOfRunsAndLumiFromFile(firstRun=-1,fileName=""):
0137     file = open(fileName);
0138     jsonFile = file.read();
0139     file.close()
0140     jsonList=json.loads(jsonFile);
0141
0142     selected_dcs = {};
0143     for element in jsonList:
0144         selected_dcs[long(element)]=jsonList[element]
0145     return selected_dcs
0146
0147 ########################################################################
0148 def getListOfRunsAndLumiFromRR(firstRun=-1):
0149     RunReg  ="http://pccmsdqm04.cern.ch/runregistry"
0150     #RunReg  = "http://localhost:40010/runregistry"
0151     #Dataset=%Online%
0152     Group   = "Collisions10"
0153
0154     # get handler to RR XML-RPC server
0155     FULLADDRESS=RunReg + "/xmlrpc"
0156     #print "RunRegistry from: ",FULLADDRESS
0157     server = xmlrpclib.ServerProxy(FULLADDRESS)
0158     #sel_runtable="{groupName} ='" + Group + "' and {runNumber} > " + str(firstRun) + " and {datasetName} LIKE '" + Dataset + "'"
0159     sel_runtable="{groupName} ='" + Group + "' and {runNumber} > " + str(firstRun)
0160     #sel_dcstable="{groupName} ='" + Group + "' and {runNumber} > " + str(firstRun) + " and {parDcsBpix} = 1 and {parDcsFpix} = 1 and {parDcsTibtid} = 1 and {parDcsTecM} = 1 and {parDcsTecP} = 1 and {parDcsTob} = 1 and {parDcsEbminus} = 1 and {parDcsEbplus} = 1 and {parDcsEeMinus} = 1 and {parDcsEePlus} = 1 and {parDcsEsMinus} = 1 and {parDcsEsPlus} = 1 and {parDcsHbheA} = 1 and {parDcsHbheB} = 1 and {parDcsHbheC} = 1 and {parDcsH0} = 1 and {parDcsHf} = 1"
0161
0162     maxAttempts = 3;
0163     tries = 0;
0164     while tries<maxAttempts:
0165         try:
0166             run_data = server.DataExporter.export('RUN'           , 'GLOBAL', 'csv_runs', sel_runtable)
0167             #dcs_data = server.DataExporter.export('RUNLUMISECTION', 'GLOBAL', 'json'    , sel_dcstable)
0168             break
0169         except:
0170             print("Something wrong in accessing runregistry, retrying in 2s....", tries, "/", maxAttempts)
0171             tries += 1
0172             time.sleep(2)
0173         if tries==maxAttempts:
0174             error = "Run registry unaccessible.....exiting now"
0175             return {};
0176
0177
0178     listOfRuns=[]
0179     for line in run_data.split("\n"):
0180         run=line.split(',')[0]
0181         if run.isdigit():
0182             listOfRuns.append(run)
0183
0184
0185     firstRun = listOfRuns[len(listOfRuns)-1];
0186     lastRun  = listOfRuns[0];
0187     sel_dcstable="{groupName} ='" + Group + "' and {runNumber} >= " + str(firstRun) + " and {runNumber} <= " + str(lastRun) + " and {parDcsBpix} = 1 and {parDcsFpix} = 1 and {parDcsTibtid} = 1 and {parDcsTecM} = 1 and {parDcsTecP} = 1 and {parDcsTob} = 1 and {parDcsEbminus} = 1 and {parDcsEbplus} = 1 and {parDcsEeMinus} = 1 and {parDcsEePlus} = 1 and {parDcsEsMinus} = 1 and {parDcsEsPlus} = 1 and {parDcsHbheA} = 1 and {parDcsHbheB} = 1 and {parDcsHbheC} = 1 and {parDcsH0} = 1 and {parDcsHf} = 1"
0188
0189     tries = 0;
0190     while tries<maxAttempts:
0191         try:
0192             #run_data = server.DataExporter.export('RUN'           , 'GLOBAL', 'csv_runs', sel_runtable)
0193             dcs_data = server.DataExporter.export('RUNLUMISECTION', 'GLOBAL', 'json'    , sel_dcstable)
0194             break
0195         except:
0196             print("I was able to get the list of runs and now I am trying to access the detector status, retrying in 2s....", tries, "/", maxAttempts)
0197             tries += 1
0198             time.sleep(2)
0199         if tries==maxAttempts:
0200             error = "Run registry unaccessible.....exiting now"
0201             return {};
0202
0203     selected_dcs={}
0204     jsonList=json.loads(dcs_data)
0205
0206     #for element in jsonList:
0207     for element in listOfRuns:
0208         #if element in listOfRuns:
0209         if element in jsonList:
0210             selected_dcs[long(element)]=jsonList[element]
0211         else:
0212             print("WARNING: Run " + element + " is a collision10 run with 0 lumis in Run Registry!")
0213             selected_dcs[long(element)]= [[]]
0214     #print selected_dcs
0215     return selected_dcs
0216
0217 ########################################################################
0218 def getLastClosedRun(DBSListOfFiles):
0219     runs = []
0220     for file in DBSListOfFiles:
0221         runNumber = getRunNumberFromDBSName(file)
0222         if runs.count(runNumber) == 0:
0223             runs.append(runNumber)
0224
0225     if len(runs) <= 1: #No closed run
0226         return -1
0227     else:
0228         runs.sort()
0229         return long(runs[len(runs)-2])
0230
0231 ########################################################################
0232 def getRunNumberFromFileName(fileName):
0233 #    regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
0234     regExp = re.search('(\D+)_(\d+)_(\d+)_',fileName)
0235     if not regExp:
0236         return -1
0237     return long(regExp.group(3))
0238
0239 ########################################################################
0240 def getRunNumberFromDBSName(fileName):
0241     regExp = re.search('(\D+)/(\d+)/(\d+)/(\d+)/(\D+)',fileName)
0242     if not regExp:
0243         return -1
0244     return long(regExp.group(3)+regExp.group(4))
0245
0246 ########################################################################
0247 def getNewRunList(fromDir,lastUploadedIOV):
0248     newRunList = []
0249     listOfFiles = ls(fromDir,".txt")
0250     runFileMap = {}
0251     for fileName in listOfFiles:
0252         runNumber = getRunNumberFromFileName(fileName)
0253         if runNumber > lastUploadedIOV:
0254             newRunList.append(fileName)
0255     return newRunList
0256
0257 ########################################################################
0258 def selectFilesToProcess(listOfRunsAndLumiFromDBS,listOfRunsAndLumiFromRR,newRunList,runListDir,dataSet,mailList,dbsTolerance,dbsTolerancePercent,rrTolerance,missingFilesTolerance,missingLumisTimeout):
0259     runsAndLumisProcessed = {}
0260     runsAndFiles = {}
0261     for fileName in newRunList:
0262         file = open(runListDir+fileName)
0263         for line in file:
0264             if line.find("Runnumber") != -1:
0265                 run = long(line.replace('\n','').split(' ')[1])
0266             elif line.find("LumiRange") != -1:
0267                 lumiLine = line.replace('\n','').split(' ')
0268                 begLumi = long(lumiLine[1])
0269                 endLumi = long(lumiLine[3])
0270                 if begLumi != endLumi:
0271                     error = "The lumi range is greater than 1 for run " + str(run) + " " + line + " in file: " + runListDir + fileName
0272                     exit(error)
0273                 else:
0274                     if not run in runsAndLumisProcessed:
0275                         runsAndLumisProcessed[run] = []
0276                     if begLumi in runsAndLumisProcessed[run]:
0277                         print("Lumi " + str(begLumi) + " in event " + str(run) + " already exist. This MUST not happen but right now I will ignore this lumi!")
0278                     else:
0279                         runsAndLumisProcessed[run].append(begLumi)
0280         if not run in runsAndFiles:
0281             runsAndFiles[run] = []
0282         runsAndFiles[run].append(fileName)
0283         file.close()
0284
0285     rrKeys = sorted(listOfRunsAndLumiFromRR.keys())
0286     dbsKeys = listOfRunsAndLumiFromDBS.keys()
0287     dbsKeys.sort()
0288     #I remove the last entry from DBS since I am not sure it is an already closed run!
0289     lastUnclosedRun = dbsKeys.pop()
0290     #print "Last unclosed run: " + str(lastUnclosedRun)
0291     procKeys = runsAndLumisProcessed.keys()
0292     procKeys.sort()
0293     #print "Run Registry:"
0294     #print rrKeys
0295     #print "DBS:"
0296     #print dbsKeys
0297     #print "List:"
0298     #print procKeys
0299     #print lastUnclosedRun
0300     filesToProcess = []
0301     for run in rrKeys:
0302         RRList = []
0303         for lumiRange in listOfRunsAndLumiFromRR[run]:
0304             if lumiRange != []:
0305                 for l in range(lumiRange[0],lumiRange[1]+1):
0306                     RRList.append(long(l))
0307         if run in procKeys and run < lastUnclosedRun:
0308             #print "run " + str(run) + " is in procKeys"
0309             if not run in dbsKeys and run != lastUnclosedRun:
0310                 error = "Impossible but run " + str(run) + " has been processed and it is also in the run registry but it is not in DBS!"
0311                 exit(error)
0312             print("Working on run " + str(run))
0313             nFiles = 0
0314             for data in dataSet.split(','):
0315                 nFiles = getNumberOfFilesToProcessForRun(data,run)
0316                 if nFiles != 0:
0317                     break
0318             if len(runsAndFiles[run]) < nFiles:
0319                 print("I haven't processed all files yet : " + str(len(runsAndFiles[run])) + " out of " + str(nFiles) + " for run: " + str(run))
0320                 if nFiles - len(runsAndFiles[run]) <= missingFilesTolerance:
0321                     timeoutManager("DBS_VERY_BIG_MISMATCH_Run"+str(run)) # resetting this timeout
0322                     timeoutType = timeoutManager("DBS_MISMATCH_Run"+str(run),missingLumisTimeout)
0323                     if timeoutType == 1:
0324                         print("WARNING: I previously set a timeout that expired...I'll continue with the script even if I didn't process all the lumis!")
0325                     else:
0326                         if timeoutType == -1:
0327                             print("WARNING: Setting the DBS_MISMATCH_Run" + str(run) + " timeout because I haven't processed all files!")
0328                         else:
0329                             print("WARNING: Timeout DBS_MISMATCH_Run" + str(run) + " is in progress.")
0330                         return filesToProcess
0331                 else:
0332                     timeoutType = timeoutManager("DBS_VERY_BIG_MISMATCH_Run"+str(run),missingLumisTimeout)
0333                     if timeoutType == 1:
0334                         error = "ERROR: I previously set a timeout that expired...I can't continue with the script because there are too many (" + str(nFiles - len(runsAndFiles[run])) + " files missing) and for too long " + str(missingLumisTimeout/3600) + " hours! I will process anyway the runs before this one (" + str(run) + ")"
0335                         sendEmail(mailList,error)
0336                         return filesToProcess
0337                         #exit(error)
0338                     else:
0339                         if timeoutType == -1:
0340                             print("WARNING: Setting the DBS_VERY_BIG_MISMATCH_Run" + str(run) + " timeout because I haven't processed all files!")
0341                         else:
0342                             print("WARNING: Timeout DBS_VERY_BIG_MISMATCH_Run" + str(run) + " is in progress.")
0343                         return filesToProcess
0344
0345             else:
0346                 timeoutManager("DBS_VERY_BIG_MISMATCH_Run"+str(run))
0347                 timeoutManager("DBS_MISMATCH_Run"+str(run))
0348                 print("I have processed " + str(len(runsAndFiles[run])) + " out of " + str(nFiles) + " files that are in DBS. So I should have all the lumis!")
0349             errors          = []
0350             badProcessed    = []
0351             badDBSProcessed = []
0352             badDBS          = []
0353             badRRProcessed  = []
0354             badRR           = []
0355             #It is important for runsAndLumisProcessed[run] to be the first because the comparision is not ==
0356             badDBSProcessed,badDBS = compareLumiLists(runsAndLumisProcessed[run],listOfRunsAndLumiFromDBS[run],errors)
0357             for i in range(0,len(errors)):
0358                 errors[i] = errors[i].replace("listA","the processed lumis")
0359                 errors[i] = errors[i].replace("listB","DBS")
0360             #print errors
0361             #print badProcessed
0362             #print badDBS
0363             #exit("ciao")
0364             if len(badDBS) != 0:
0365                 print("This is weird because I processed more lumis than the ones that are in DBS!")
0366             if len(badDBSProcessed) != 0 and run in rrKeys:
0367                 lastError = len(errors)
0368                 #print RRList
0369                 #It is important for runsAndLumisProcessed[run] to be the first because the comparision is not ==
0370                 badRRProcessed,badRR = compareLumiLists(runsAndLumisProcessed[run],RRList,errors)
0371                 for i in range(0,len(errors)):
0372                     errors[i] = errors[i].replace("listA","the processed lumis")
0373                     errors[i] = errors[i].replace("listB","Run Registry")
0374                 #print errors
0375                 #print badProcessed
0376                 #print badRunRegistry
0377
0378                 if len(badRRProcessed) != 0:
0379                     print("I have not processed some of the lumis that are in the run registry for run: " + str(run))
0380                     for lumi in badDBSProcessed:
0381                         if lumi in badRRProcessed:
0382                             badProcessed.append(lumi)
0383                     lenA = len(badProcessed)
0384                     lenB = len(RRList)
0385                     if 100.*lenA/lenB <= dbsTolerancePercent:
0386                         print("WARNING: I didn't process " + str(100.*lenA/lenB) + "% of the lumis but I am within the " + str(dbsTolerancePercent) + "% set in the configuration. Which corrispond to " + str(lenA) + " out of " + str(lenB) + " lumis")
0387                         #print errors
0388                         badProcessed = []
0389                     elif lenA <= dbsTolerance:
0390                         print("WARNING: I didn't process " + str(lenA) + " lumis but I am within the " + str(dbsTolerance) + " lumis set in the configuration. Which corrispond to " + str(lenA) + " out of " + str(lenB) + " lumis")
0391                         #print errors
0392                         badProcessed = []
0393                     else:
0394                         error = "ERROR: For run " + str(run) + " I didn't process " + str(100.*lenA/lenB) + "% of the lumis and I am not within the " + str(dbsTolerancePercent) + "% set in the configuration. The number of lumis that I didn't process (" + str(lenA) + " out of " + str(lenB) + ") is greater also than the " + str(dbsTolerance) + " lumis that I can tolerate. I can't process runs >= " + str(run) + " but I'll process the runs before!"
0395                         sendEmail(mailList,error)
0396                         print(error)
0397                         return filesToProcess
0398                         #exit(errors)
0399                     #return filesToProcess
0400                 elif len(errors) != 0:
0401                     print("The number of lumi sections processed didn't match the one in DBS but they cover all the ones in the Run Registry, so it is ok!")
0402                     #print errors
0403
0404             #If I get here it means that I passed or the DBS or the RR test
0405             if len(badProcessed) == 0:
0406                 for file in runsAndFiles[run]:
0407                     filesToProcess.append(file)
0408             else:
0409                 #print errors
0410                 print("This should never happen because if I have errors I return or exit! Run: " + str(run))
0411         else:
0412             error = "Run " + str(run) + " is in the run registry but it has not been processed yet!"
0413             print(error)
0414             timeoutType = timeoutManager("MISSING_RUNREGRUN_Run"+str(run),missingLumisTimeout)
0415             if timeoutType == 1:
0416                 if len(RRList) <= rrTolerance:
0417                     error = "WARNING: I previously set the MISSING_RUNREGRUN_Run" + str(run) + " timeout that expired...I am missing run " + str(run) + " but it only had " + str(len(RRList)) + " <= " + str(rrTolerance) + " lumis. So I will continue and ignore it... "
0418                     #print listOfRunsAndLumiFromRR[run]
0419                     print(error)
0420                     #sendEmail(mailList,error)
0421                 else:
0422                     error = "ERROR: I previously set the MISSING_RUNREGRUN_Run" + str(run) + " timeout that expired...I am missing run " + str(run) + " which has " + str(len(RRList)) + " > " + str(rrTolerance) + " lumis. I can't continue but I'll process the runs before this one"
0423                     sendEmail(mailList,error)
0424                     return filesToProcess
0425                     #exit(error)
0426             else:
0427                 if timeoutType == -1:
0428                     print("WARNING: Setting the MISSING_RUNREGRUN_Run" + str(run) + " timeout because I haven't processed a run!")
0429                 else:
0430                     print("WARNING: Timeout MISSING_RUNREGRUN_Run" + str(run) + " is in progress.")
0431                 return filesToProcess
0432
0433     return filesToProcess
0434 ########################################################################
0435 def compareLumiLists(listA,listB,errors=[],tolerance=0):
0436     lenA = len(listA)
0437     lenB = len(listB)
0438     if lenA < lenB-(lenB*float(tolerance)/100):
0439         errors.append("ERROR: The number of lumi sections is different: listA(" + str(lenA) + ")!=(" + str(lenB) + ")listB")
0440     #else:
0441         #errors.append("Lumi check ok!listA(" + str(lenA) + ")-(" + str(lenB) + ")listB")
0442     #print errors
0443     listA.sort()
0444     listB.sort()
0445     #shorter = lenA
0446     #if lenB < shorter:
0447     #    shorter = lenB
0448     #a = 0
0449     #b = 0
0450     badA = []
0451     badB = []
0452     #print listB
0453     #print listA
0454     #print len(listA)
0455     #print len(listB)
0456     #counter = 1
0457     for lumi in listA:
0458         #print str(counter) + "->" + str(lumi)
0459         #counter += 1
0460         if not lumi in listB:
0461             errors.append("Lumi (" + str(lumi) + ") is in listA but not in listB")
0462             badB.append(lumi)
0463             #print "Bad B: " + str(lumi)
0464     #exit("hola")
0465     for lumi in listB:
0466         if not lumi in listA:
0467             errors.append("Lumi (" + str(lumi) + ") is in listB but not in listA")
0468             badA.append(lumi)
0469             #print "Bad A: " + str(lumi)
0470
0471     return badA,badB
0472
0473 ########################################################################
0474 def removeUncompleteRuns(newRunList,dataSet):
0475     processedRuns = {}
0476     for fileName in newRunList:
0477         run = getRunNumberFromFileName(fileName)
0478         if not run in processedRuns:
0479             processedRuns[run] = 0
0480         processedRuns[run] += 1
0481
0482     for run in processedRuns.keys():
0483         nFiles = getNumberOfFilesToProcessForRun(dataSet,run)
0484         if processedRuns[run] < nFiles:
0485             print("I haven't processed all files yet : " + str(processedRuns[run]) + " out of " + str(nFiles) + " for run: " + str(run))
0486         else:
0487             print("All files have been processed for run: " + str(run) + " (" + str(processedRuns[run]) + " out of " + str(nFiles) + ")")
0488
0489 ########################################################################
0490 def aselectFilesToProcess(listOfFilesToProcess,newRunList):
0491     selectedFiles = []
0492     runsToProcess = {}
0493     processedRuns = {}
0494     for file in listOfFilesToProcess:
0495         run = getRunNumberFromDBSName(file)
0496 #        print "To process: " + str(run)
0497         if run not in runsToProcess:
0498             runsToProcess[run] = 1
0499         else:
0500             runsToProcess[run] = runsToProcess[run] + 1
0501
0502     for file in newRunList:
0503         run = getRunNumberFromFileName(file)
0504 #        print "Processed: " + str(run)
0505         if run not in processedRuns:
0506             processedRuns[run] = 1
0507         else:
0508             processedRuns[run] = processedRuns[run] + 1
0509
0510     #WARNING: getLastClosedRun MUST also have a timeout otherwise the last run will not be considered
0511     lastClosedRun = getLastClosedRun(listOfFilesToProcess)
0512 #    print "LastClosedRun:-" + str(lastClosedRun) + "-"
0513
0514     processedRunsKeys = sorted(processedRuns.keys())
0515
0516     for run in processedRunsKeys:
0517         if run <= lastClosedRun :
0518             print("For run " + str(run) + " I have processed " + str(processedRuns[run]) + " files and in DBS there are " + str(runsToProcess[run]) + " files!")
0519             if not run in runsToProcess:
0520                 exit("ERROR: I have a result file for run " + str(run) + " but it doesn't exist in DBS. Impossible but it happened!")
0521             lumiList = getDBSLumiListForRun(run)
0522             if processedRuns[run] == runsToProcess[run]:
0523                 for file in newRunList:
0524                     if run == getRunNumberFromFileName(file):
0525                         selectedFiles.append(file)
0526             else:
0527                 exit("ERROR: For run " + str(run) + " I have processed " + str(processedRuns[run]) + " files but in DBS there are " + str(runsToProcess[run]) + " files!")
0528     return selectedFiles
0529
0530 ########################################################################
0531 def main():
0532     ######### COMMAND LINE OPTIONS ##############
0533     option,args = parse(__doc__)
0534
0535     ######### Check if there is already a megascript running ########
0536     if option.lock:
0537         setLockName('.' + option.lock)
0538         if checkLock():
0539             print("There is already a megascript runnning...exiting")
0540             return
0541         else:
0542             lock()
0543
0544
0545     destDB = 'oracle://cms_orcon_prod/CMS_COND_31X_BEAMSPOT'
0546     if option.Test:
0547         destDB = 'oracle://cms_orcoff_prep/CMS_COND_BEAMSPOT'
0548
0549     ######### CONFIGURATION FILE ################
0550     cfgFile = "BeamSpotWorkflow.cfg"
0551     if option.cfg:
0552         cfgFile = option.cfg
0553     configurationFile = os.getenv("CMSSW_BASE") + "/src/RecoVertex/BeamSpotProducer/scripts/" + cfgFile
0554     configuration     = ConfigParser.ConfigParser()
0555     print('Reading configuration from ', configurationFile)
0556     configuration.read(configurationFile)
0557
0558     sourceDir             = configuration.get('Common','SOURCE_DIR')
0559     archiveDir            = configuration.get('Common','ARCHIVE_DIR')
0560     workingDir            = configuration.get('Common','WORKING_DIR')
0561     databaseTag           = configuration.get('Common','DBTAG')
0562     dataSet               = configuration.get('Common','DATASET')
0563     fileIOVBase           = configuration.get('Common','FILE_IOV_BASE')
0564     dbIOVBase             = configuration.get('Common','DB_IOV_BASE')
0565     dbsTolerance          = float(configuration.get('Common','DBS_TOLERANCE'))
0566     dbsTolerancePercent   = float(configuration.get('Common','DBS_TOLERANCE_PERCENT'))
0567     rrTolerance           = float(configuration.get('Common','RR_TOLERANCE'))
0568     missingFilesTolerance = float(configuration.get('Common','MISSING_FILES_TOLERANCE'))
0569     missingLumisTimeout   = float(configuration.get('Common','MISSING_LUMIS_TIMEOUT'))
0570     jsonFileName          = configuration.get('Common','JSON_FILE')
0571     mailList              = configuration.get('Common','EMAIL')
0572
0573     ######### DIRECTORIES SETUP #################
0574     if sourceDir[len(sourceDir)-1] != '/':
0575         sourceDir = sourceDir + '/'
0576     if not dirExists(sourceDir):
0577         error = "ERROR: The source directory " + sourceDir + " doesn't exist!"
0578         sendEmail(mailList,error)
0579         exit(error)
0580
0581     if archiveDir[len(archiveDir)-1] != '/':
0582         archiveDir = archiveDir + '/'
0583     if not os.path.isdir(archiveDir):
0584         os.mkdir(archiveDir)
0585
0586     if workingDir[len(workingDir)-1] != '/':
0587         workingDir = workingDir + '/'
0588     if not os.path.isdir(workingDir):
0589         os.mkdir(workingDir)
0590     else:
0591         os.system("rm -f "+ workingDir + "*")
0592
0593
0594     print("Getting last IOV for tag: " + databaseTag)
0595     lastUploadedIOV = 1
0596     if destDB == "oracle://cms_orcon_prod/CMS_COND_31X_BEAMSPOT":
0597         lastUploadedIOV = getLastUploadedIOV(databaseTag)
0598     else:
0599         lastUploadedIOV = getLastUploadedIOV(databaseTag,destDB)
0600
0601     #lastUploadedIOV = 133885
0602     #lastUploadedIOV = 575216380019329
0603     if dbIOVBase == "lumiid":
0604         lastUploadedIOV = unpackLumiid(lastUploadedIOV)["run"]
0605
0606     ######### Get list of files processed after the last IOV
0607     print("Getting list of files processed after IOV " + str(lastUploadedIOV))
0608     newProcessedRunList      = getNewRunList(sourceDir,lastUploadedIOV)
0609     if len(newProcessedRunList) == 0:
0610         exit("There are no new runs after " + str(lastUploadedIOV))
0611
0612     ######### Copy files to archive directory
0613     print("Copying files to archive directory")
0614     copiedFiles = []
0615     for i in range(3):
0616         copiedFiles = cp(sourceDir,archiveDir,newProcessedRunList)
0617         if len(copiedFiles) == len(newProcessedRunList):
0618             break;
0619     if len(copiedFiles) != len(newProcessedRunList):
0620         error = "ERROR: I can't copy more than " + str(len(copiedFiles)) + " files out of " + str(len(newProcessedRunList))
0621         sendEmail(mailList,error)
0622         exit(error)
0623
0624
0625     ######### Get from DBS the list of files after last IOV
0626     #listOfFilesToProcess = getListOfFilesToProcess(dataSet,lastUploadedIOV)
0627     print("Getting list of files from DBS")
0628     listOfRunsAndLumiFromDBS = getListOfRunsAndLumiFromDBS(dataSet,lastUploadedIOV)
0629     if len(listOfRunsAndLumiFromDBS) == 0:
0630         exit("There are no files in DBS to process")
0631     print("Getting list of files from RR")
0632     listOfRunsAndLumiFromRR  = getListOfRunsAndLumiFromRR(lastUploadedIOV)
0633     if(not listOfRunsAndLumiFromRR):
0634         print("Looks like I can't get anything from the run registry so I'll get the data from the json file " + jsonFileName)
0635         listOfRunsAndLumiFromRR  = getListOfRunsAndLumiFromFile(lastUploadedIOV,jsonFileName)
0636     ######### Get list of files to process for DB
0637     #selectedFilesToProcess = selectFilesToProcess(listOfFilesToProcess,copiedFiles)
0638     #completeProcessedRuns = removeUncompleteRuns(copiedFiles,dataSet)
0639     #print copiedFiles
0640     #print completeProcessedRuns
0641     #exit("complete")
0642     print("Getting list of files to process")
0643     selectedFilesToProcess = selectFilesToProcess(listOfRunsAndLumiFromDBS,listOfRunsAndLumiFromRR,copiedFiles,archiveDir,dataSet,mailList,dbsTolerance,dbsTolerancePercent,rrTolerance,missingFilesTolerance,missingLumisTimeout)
0644     if len(selectedFilesToProcess) == 0:
0645         exit("There are no files to process")
0646
0647     #print selectedFilesToProcess
0648     ######### Copy files to working directory
0649     print("Copying files from archive to working directory")
0650     copiedFiles = []
0651     for i in range(3):
0652         copiedFiles = cp(archiveDir,workingDir,selectedFilesToProcess)
0653         if len(copiedFiles) == len(selectedFilesToProcess):
0654             break;
0655         else:
0656             subprocess.getstatusoutput("rm -rf " + workingDir)
0657     if len(copiedFiles) != len(selectedFilesToProcess):
0658         error = "ERROR: I can't copy more than " + str(len(copiedFiles)) + " files out of " + str(len(selectedFilesToProcess)) + " from " + archiveDir + " to " + workingDir
0659         sendEmail(mailList,error)
0660         exit(error)
0661
0662     print("Sorting and cleaning beamlist")
0663     beamSpotObjList = []
0664     for fileName in copiedFiles:
0665         readBeamSpotFile(workingDir+fileName,beamSpotObjList,fileIOVBase)
0666
0667     sortAndCleanBeamList(beamSpotObjList,fileIOVBase)
0668
0669     if len(beamSpotObjList) == 0:
0670         error = "WARNING: None of the processed and copied payloads has a valid fit so there are no results. This shouldn't happen since we are filtering using the run register, so there should be at least one good run."
0671         exit(error)
0672
0673     payloadFileName = "PayloadFile.txt"
0674
0675     runBased = False
0676     if dbIOVBase == "runnumber":
0677         runBased = True
0678
0679     payloadList = createWeightedPayloads(workingDir+payloadFileName,beamSpotObjList,runBased)
0680     if len(payloadList) == 0:
0681         error = "WARNING: I wasn't able to create any payload even if I have some BeamSpot objects."
0682         exit(error)
0683
0684
0685     tmpPayloadFileName = workingDir + "SingleTmpPayloadFile.txt"
0686     tmpSqliteFileName  = workingDir + "SingleTmpSqliteFile.db"
0687
0688     writeDBTemplate = os.getenv("CMSSW_BASE") + "/src/RecoVertex/BeamSpotProducer/test/write2DB_template.py"
0689     readDBTemplate  = os.getenv("CMSSW_BASE") + "/src/RecoVertex/BeamSpotProducer/test/readDB_template.py"
0690     payloadNumber = -1
0691     iovSinceFirst = '0';
0692     iovTillLast   = '0';
0693
0694     #Creating the final name for the combined sqlite file
0695     uuid = subprocess.getstatusoutput('uuidgen -t')[1]
0696     final_sqlite_file_name = databaseTag + '@' + uuid
0697     sqlite_file     = workingDir + final_sqlite_file_name + ".db"
0698     metadata_file   = workingDir + final_sqlite_file_name + ".txt"
0699
0700     for payload in payloadList:
0701         payloadNumber += 1
0702         if option.zlarge:
0703             payload.sigmaZ = 10
0704             payload.sigmaZerr = 2.5e-05
0705         tmpFile = file(tmpPayloadFileName,'w')
0706         dumpValues(payload,tmpFile)
0707         tmpFile.close()
0708         if not writeSqliteFile(tmpSqliteFileName,databaseTag,dbIOVBase,tmpPayloadFileName,writeDBTemplate,workingDir):
0709             error = "An error occurred while writing the sqlite file: " + tmpSqliteFileName
0710             exit(error)
0711         readSqliteFile(tmpSqliteFileName,databaseTag,readDBTemplate,workingDir)
0712
0713         ##############################################################
0714         #WARNING I am not sure if I am packing the right values
0715         if dbIOVBase == "runnumber":
0716             iov_since = str(payload.Run)
0717             iov_till  = iov_since
0718         elif dbIOVBase == "lumiid":
0719             iov_since = str( pack(int(payload.Run), int(payload.IOVfirst)) )
0720             iov_till  = str( pack(int(payload.Run), int(payload.IOVlast)) )
0721         elif dbIOVBase == "timestamp":
0722             error = "ERROR: IOV " + dbIOVBase + " still not implemented."
0723             exit(error)
0724         else:
0725             error = "ERROR: IOV " + dbIOVBase + " unrecognized!"
0726             exit(error)
0727
0728         if payloadNumber == 0:
0729             iovSinceFirst = iov_since
0730         if payloadNumber == len(payloadList)-1:
0731             iovTillLast   = iov_till
0732
0733         appendSqliteFile(final_sqlite_file_name + ".db", tmpSqliteFileName, databaseTag, iov_since, iov_till ,workingDir)
0734         os.system("rm -f " + tmpPayloadFileName + " " + tmpSqliteFileName)
0735
0736
0737     #### CREATE payload for merged output
0738
0739     print(" create MERGED payload card for dropbox ...")
0740
0741     dfile = open(metadata_file,'w')
0742
0743     dfile.write('destDB '  + destDB        +'\n')
0744     dfile.write('tag '     + databaseTag   +'\n')
0745     dfile.write('inputtag'                 +'\n')
0746     dfile.write('since '   + iovSinceFirst +'\n')
0747     #dfile.write('till '    + iov_till      +'\n')
0748     dfile.write('Timetype '+ dbIOVBase     +'\n')
0749
0750     ###################################################
0751     # WARNING tagType forced to offline
0752     print("WARNING TAG TYPE forced to be just offline")
0753     tagType = "offline"
0754     checkType = tagType
0755     if tagType == "express":
0756         checkType = "hlt"
0757     dfile.write('IOVCheck ' + checkType + '\n')
0758     dfile.write('usertext Beam spot position\n')
0759
0760     dfile.close()
0761
0762
0763
0764     if option.upload:
0765         print(" scp files to offline Drop Box")
0766         dropbox = "/DropBox"
0767         if option.Test:
0768             dropbox = "/DropBox_test"
0769         print("UPLOADING TO TEST DB")
0770         uploadSqliteFile(workingDir, final_sqlite_file_name, dropbox)
0771
0772     archive_sqlite_file_name = "Payloads_" + iovSinceFirst + "_" + iovTillLast + "_" + final_sqlite_file_name
0773     archive_results_file_name = "Payloads_" + iovSinceFirst + "_" + iovTillLast + "_" + databaseTag + ".txt"
0774     if not os.path.isdir(archiveDir + 'payloads'):
0775         os.mkdir(archiveDir + 'payloads')
0776     subprocess.getstatusoutput('mv ' + sqlite_file   + ' ' + archiveDir + 'payloads/' + archive_sqlite_file_name + '.db')
0777     subprocess.getstatusoutput('mv ' + metadata_file + ' ' + archiveDir + 'payloads/' + archive_sqlite_file_name + '.txt')
0778     subprocess.getstatusoutput('cp ' + workingDir + payloadFileName + ' ' + archiveDir + 'payloads/' + archive_results_file_name)
0779
0780     print(archiveDir + "payloads/" + archive_sqlite_file_name + '.db')
0781     print(archiveDir + "payloads/" + archive_sqlite_file_name + '.txt')
0782
0783     rmLock()
0784
0785 if __name__ == '__main__':
0786     main()