python/mpslib/Mpslibclass.py

0001 # This Jobdatabas-Class interacts with the mps.db file.
0002 # It's member-variables are often called in the mps_... scripts.
0003 #
0004 # Meaning of the database variables: (still need to work on these)
0005 #
0006 # (1) Header
0007 #       header          - version information
0008 #       batchScript     - base script for serial job
0009 #       cfgTemplate     - template for cfg file
0010 #       infiList        - list of input files to be serialized
0011 #       classInf        - batch class information (might contain two ':'-separated)
0012 #       addFiles        - job name for submission
0013 #       driver          - specifies whether merge job is foreseen
0014 #       nJobs           - number of serial jobs (not including merge job)
0015 #       mergeScript     - base script for merge job
0016 #       mssDir          - directory for mass storage (e.g. Castor)
0017 #       updateTime      - time of last update (seconds since 1970)
0018 #       updateTimeHuman - time of last update (human readable)
0019 #       elapsedTime     - seconds since last update
0020 #       mssDirPool      - pool for $mssDir (e.g. cmscaf/cmscafuser)
0021 #       pedeMem         - Memory allocated for pede
0022 #       spare1
0023 #       spare2
0024 #       spare3
0025
0026 # (2) Job-level variables/lists
0027 #       JOBNUMBER   - ADDED, selfexplanatory
0028 #       JOBDIR      - name of job directory (not full path)
0029 #       JOBSTATUS   - status of job
0030 #       JOBRUNTIME  - present CPU time of job
0031 #       JOBNEVT     - number of events processed by job
0032 #       JOBHOST     - presently used to store remark
0033 #       JOBINCR     - CPU increment since last check
0034 #       JOBREMARK   - comment
0035 #       JOBSP1      - spare
0036 #       JOBSP2      - possible weight for pede
0037 #       JOBSP3      - possible name as given to mps_setup.pl -N <name> ...
0038 #       JOBID       - ID of the LSF/HTCondor job
0039
0040 from builtins import range
0041 import datetime
0042 import time
0043 import os
0044 import sys
0045 import re
0046 import math
0047 import fileinput
0048
0049 #-------------------------------------------------------------------------------
0050 class jobdatabase:
0051
0052     JOBNUMBER, JOBDIR, JOBID, JOBSTATUS, JOBNTRY, JOBRUNTIME, JOBNEVT, JOBHOST, JOBINCR, \
0053     JOBREMARK, JOBSP1, JOBSP2, JOBSP3 = ([] for i in range(13))
0054
0055     header, batchScript, cfgTemplate, infiList, classInf, addFiles, driver, mergeScript, \
0056     mssDir, updateTimeHuman, mssDirPool, spare1, spare2, spare3 = ('' for i in range(14))
0057
0058     updateTime, elapsedTime, pedeMem , nJobs = -1, -1, -1, -1
0059
0060     #-------------------------------------------------------------------------------
0061     # parses the mps.db file into the member variables and arrays
0062     __default_db_file_name = "mps.db"
0063     def read_db(self, db_file_name = __default_db_file_name):
0064         try:
0065             DBFILE = open(db_file_name,'r')
0066         except IOError as e:
0067             if e.args != (2, 'No such file or directory'):
0068                 raise
0069             else:
0070                 if db_file_name == jobdatabase.__default_db_file_name:
0071                     msg = ("No 'mps.db' found. Make sure you are in a campaign "
0072                            "directory and that the campaign is set up.")
0073                 else:
0074                     msg = "Database file '"+db_file_name+"' not found. Exiting."
0075                 print(msg)
0076                 sys.exit(1)
0077
0078         #read infolines at the top, used rstrip to delete the '\n'
0079         self.header          = DBFILE.readline().strip()
0080         self.batchScript     = DBFILE.readline().rstrip('\n')
0081         self.cfgTemplate     = DBFILE.readline().rstrip('\n')
0082         self.infiList        = DBFILE.readline().rstrip('\n')
0083         self.classInf        = DBFILE.readline().rstrip('\n')   #formerly named 'class' ->conflict
0084         self.addFiles        = DBFILE.readline().rstrip('\n')
0085         self.driver          = DBFILE.readline().rstrip('\n')
0086         self.mergeScript     = DBFILE.readline().rstrip('\n')
0087         self.mssDir          = DBFILE.readline().rstrip('\n')
0088         self.updateTime      = int(DBFILE.readline())
0089         self.updateTimeHuman = DBFILE.readline().rstrip('\n')
0090         self.elapsedTime     = int(DBFILE.readline())
0091         self.mssDirPool      = DBFILE.readline().rstrip('\n')
0092         self.pedeMem         = int(DBFILE.readline())
0093         self.spare1          = DBFILE.readline().rstrip('\n')
0094         self.spare2          = DBFILE.readline().rstrip('\n')
0095         self.spare3          = DBFILE.readline().rstrip('\n')
0096
0097         #read actual jobinfo into arrays
0098         self.nJobs = 0
0099         milleJobs = 0
0100
0101
0102         for line in DBFILE:
0103             if line.strip() == "": continue # ignore empty lines
0104             line = line.rstrip('\n')        # removes the pesky \n from line
0105             parts = line.split(":")         # read each line and split into parts list
0106             self.JOBNUMBER.append(int(parts[0]))
0107             self.JOBDIR.append(parts[1].strip())
0108             self.JOBID.append(parts[2])
0109             self.JOBSTATUS.append(parts[3].strip())
0110             self.JOBNTRY.append(int(parts[4]))
0111             self.JOBRUNTIME.append(int(parts[5]))   #int float?
0112             self.JOBNEVT.append(int(parts[6]))
0113             self.JOBHOST.append(parts[7].strip())
0114             self.JOBINCR.append(int(parts[8]))
0115             self.JOBREMARK.append(parts[9].strip())
0116             self.JOBSP1.append(parts[10].strip())
0117             self.JOBSP2.append(parts[11].strip())
0118             self.JOBSP3.append(parts[12].strip())
0119
0120             #count number of jobs
0121             if not self.JOBDIR[self.nJobs].startswith("jobm"):
0122                 milleJobs += 1
0123             self.nJobs += 1
0124         self.nJobs = milleJobs
0125
0126         DBFILE.close()
0127
0128
0129
0130     #-------------------------------------------------------------------------------
0131     # prints the member varaiables and arrays to the terminal
0132     def print_memdb(self):
0133         #print metainfo
0134         print("\n=== mps database printout ===\n")
0135         print(self.header)
0136         print('Script:\t\t',    self.batchScript)
0137         print('cfg:\t\t',       self.cfgTemplate)
0138         print('files:\t\t',     self.infiList)
0139         print('class:\t\t',     self.classInf)
0140         print('name:\t\t',      self.addFiles)
0141         print('driver:\t\t',    self.driver)
0142         print('mergeScript:\t', self.mergeScript)
0143         print('mssDir:\t\t',    self.mssDir)
0144         print('updateTime:\t',  self.updateTimeHuman)
0145         print('elapsed:\t',     self.elapsedTime)
0146         print('mssDirPool:\t',  self.mssDirPool)
0147         print('pedeMem:\t',             self.pedeMem, '\n')
0148
0149         #print interesting Job-level lists ---- to add: t/evt, fix remarks
0150         headFmt = '###     dir      jobid    stat  try  rtime      nevt  remark   weight  name'
0151         jobFmt = '%03d  %6s  %10s%6s  %3d  %5d  %6d  %12s  %5s  %s'
0152         mrgFmt = '%s  %6s  %10s%6s  %3d  %5d  %6d  %12s  %5s  %s'
0153         if self.nJobs>999:
0154             headFmt = '####     dir       jobid  stat  try  rtime      nevt    remark   weight  name'
0155             jobFmt = '%04d  %7s  %10s%6s  %3d  %5d  %6d  %12s  %5s  %s'
0156             mrgFmt = '%s   %7s  %10s%6s  %3d  %5d  %6d  %12s  %5s  %s'
0157         if self.nJobs>9999:
0158             jobFmt = '%d  %s  %10s%6s  %3d  %5d  %6d  %12s  %5s  %s'
0159             mrgFmt = '%s    %8s  %10s%6s  %3d  %5d  %6d  %12s  %5s  %s'
0160         print(headFmt)
0161         print("------------------------------------------------------------------------------")
0162         for i in range(self.nJobs):
0163             remarkField = self.JOBHOST[i]
0164             if self.JOBSTATUS[i] == "FAIL":
0165                 remarkField = self.JOBREMARK[i]
0166             print(jobFmt % (
0167                 self.JOBNUMBER[i],
0168                 self.JOBDIR[i],
0169                 self.JOBID[i],
0170                 self.JOBSTATUS[i][:6],
0171                 self.JOBNTRY[i],
0172                 self.JOBRUNTIME[i],
0173                 self.JOBNEVT[i],
0174                 remarkField[:12],
0175                 self.JOBSP2[i],
0176                 self.JOBSP3[i]))
0177
0178         #print merge Jobs if merge mode
0179         if self.driver == 'merge':
0180             for i in range(self.nJobs,len(self.JOBDIR)):
0181                 remarkField = self.JOBHOST[i]
0182                 if (self.JOBSTATUS[i] == "FAIL") or (self.JOBSTATUS[i] == "WARN"):
0183                     remarkField = self.JOBREMARK[i]
0184                 print(mrgFmt % (
0185                     'MMM',
0186                     self.JOBDIR[i],
0187                     self.JOBID[i],
0188                     self.JOBSTATUS[i][:6],
0189                     self.JOBNTRY[i],
0190                     self.JOBRUNTIME[i],
0191                     self.JOBNEVT[i],
0192                     remarkField[:12],
0193                     self.JOBSP2[i],
0194                     self.JOBSP3[i]))
0195
0196         #print summed info
0197         totalEvents = sum(self.JOBNEVT[:self.nJobs])
0198         totalCpu    = sum(self.JOBRUNTIME[:self.nJobs])
0199         meanCpuPerEvent = 0.
0200         if totalEvents > 0:
0201             meanCpuPerEvent = float(totalCpu)/totalEvents
0202         print("------------------------------------------------------------------------------")
0203         print("\t\t\t\t\tEvent total:\t",       totalEvents)
0204         print("\t\t\t\t\tCPU total:\t",         totalCpu,               's')
0205         print("\t\t\t\t\tMean CPU/event:\t",meanCpuPerEvent,'s')
0206
0207
0208
0209
0210
0211     #-------------------------------------------------------------------------------
0212     # writes a new mps.db file from the members. Replaces the old mps.db
0213     def write_db(self):
0214         self.header = "mps database schema 4.0"
0215         self.currentTime = int(time.time())
0216         self.elapsedTime = 0;
0217         if self.updateTime != 0:
0218             self.elapsedTime = self.currentTime - self.updateTime
0219         self.updateTime = self.currentTime
0220         self.updateTimeHuman = str(datetime.datetime.today())   #no timezone :(
0221         self.spare1 = "-- unused --"
0222         self.spare2 = "-- unused --"
0223         self.spare3 = "-- unused --"
0224
0225         #if mps.db already exists, backup as mps.db~ (in case of interupt during write)
0226         os.system('[[ -a mps.db ]] && cp -p mps.db mps.db~')
0227
0228         #write mps.db header
0229         DBFILE = open ("mps.db", "w")
0230         headData = [ self.header, self.batchScript, self.cfgTemplate, self.infiList,
0231                      self.classInf, self.addFiles, self.driver, self.mergeScript,
0232                      self.mssDir, self.updateTime, self.updateTimeHuman,
0233                      self.elapsedTime, self.mssDirPool, self.pedeMem,
0234                      self.spare1, self.spare2, self.spare3 ]
0235         for item in headData:
0236             DBFILE.write("%s\n" % item)
0237
0238         #write mps.db jobinfo
0239         for i in range(len(self.JOBID)):
0240             DBFILE.write('%d:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s\n' %
0241                          (i+1,
0242                           self.JOBDIR[i],
0243                           self.JOBID[i],
0244                           self.JOBSTATUS[i],
0245                           self.JOBNTRY[i],
0246                           self.JOBRUNTIME[i],
0247                           self.JOBNEVT[i],
0248                           self.JOBHOST[i],
0249                           self.JOBINCR[i],
0250                           self.JOBREMARK[i],
0251                           self.JOBSP1[i],
0252                           self.JOBSP2[i],
0253                           self.JOBSP3[i]))
0254         DBFILE.close()
0255
0256     #-------------------------------------------------------------------------------
0257     # returns job class as stored in db
0258     # one and only argument may be "mille" or "pede" for mille or pede jobs
0259     def get_class(self, argument=''):
0260         CLASSES = self.classInf.split(':')
0261         if len(CLASSES)<1 or len(CLASSES)>2:
0262             print('\nget_class():\n  class must be of the form \'class\' or \'classMille:classPede\', but is \'%s\'!\n\n', classInf)
0263             sys.exit(1)
0264         elif argument == 'mille':
0265             return CLASSES[0]
0266         elif argument == 'pede':
0267             if len(CLASSES) == 1:
0268                 return CLASSES[0]
0269             elif len(CLASSES) == 2:
0270                 return CLASSES[1]
0271         else:
0272             print('\nget_class():\n  Know class only for \'mille\' or \'pede\', not %s!\n\n' %argument)
0273             sys.exit(1)
0274
0275     #-------------------------------------------------------------------------------
0276     #  Take card file, blank all INFI directives and insert the INFI directives
0277     #  from the modifier file instead
0278     def mps_splice(self,inCfg='',modCfg='',outCfg='the.py',isn=0,skip_events=0,max_events=0):
0279
0280         with open(inCfg, 'r') as INFILE:
0281             body = INFILE.read()
0282
0283         # read modifier file
0284         with open(modCfg, 'r') as MODFILE:
0285             mods = MODFILE.read()
0286         mods = mods.strip()
0287
0288         # prepare the new fileNames directive. Delete first line if necessary.
0289         fileNames = mods.split('\n')
0290         if 'CastorPool=' in fileNames[0]:
0291             del fileNames[0]
0292
0293         # replace ISN number (input is a string of three digit number with leading zeros though)
0294         body = re.sub(re.compile('ISN',re.M), isn, body)
0295
0296         # print to outCfg
0297         with open(outCfg, 'w') as OUTFILE:
0298             OUTFILE.write(body)
0299
0300         # Number of total files and number of extends for cms.vstring are needed
0301         numberOfFiles   = len(fileNames)
0302         numberOfExtends = int(math.ceil(numberOfFiles/255.))
0303
0304         # Create and insert the readFile.extend lines
0305         for j in range(numberOfExtends):
0306             insertBlock = "readFiles.extend([\n    "
0307             i=0
0308             currentStart = j*255
0309             while (i<255) and ((currentStart+i)<numberOfFiles):
0310                 entry = fileNames[currentStart+i].strip()
0311                 if (i==254) or ((currentStart+i+1)==numberOfFiles):
0312                     insertBlock += "\'"+entry+"\'])\n"
0313                 else:
0314                     insertBlock += "\'"+entry+"\',\n    "
0315                 i+=1
0316
0317             for line in fileinput.input(outCfg, inplace=1):
0318                 print(line,end='')
0319                 if re.match('readFiles\s*=\s*cms.untracked.vstring()',line):
0320                     print(insertBlock,end='')
0321
0322         if skip_events != 0:
0323             with open(outCfg, "a") as f:
0324                 f.write("process.source.skipEvents = cms.untracked.uint32({0:d})\n"
0325                         .format(skip_events))
0326
0327         if max_events != 0:
0328             with open(outCfg, "a") as f:
0329                 f.write("process.maxEvents = cms.untracked.PSet(input = "
0330                         "cms.untracked.int32({0:d}))\n".format(max_events))
0331