Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:04:37

0001 #!/usr/bin/env python3
0002 
0003 # This script sets up parallel jobs for the build, integrate and run
0004 # step when using Herwig with the CMSSW framework.
0005 # It takes a cmsRun file, adjusts the parameters in it accordingly to
0006 # the options and saves them to temporary cmsRun files. For each step
0007 # a different cmsRun file is created. The original file remains
0008 # unaltered.
0009 
0010 # Possible options:
0011 # -b/--build : sets the number of build jobs and starts the build step.
0012 # -i/--integrate : sets the maximal number of integration jobs
0013 #     This option already has to be set when the build step is invoked.
0014 #     The integration step will be performed if this option is set,
0015 #     unless --nointegration is chosen.
0016 #     The actual number of integration jobs may be smaller. It is
0017 #     determined by the number of files in Herwig-scratch/Build.
0018 # -r/--run : sets the number of run jobs and starts the run step.
0019 # --nointegration : use this option to set up several integration jobs
0020 #     without actually performing them
0021 # --stoprun: use this option if you want to create the cmsRun files
0022 #     without calling cmsRun
0023 # --resumerun: no new cmsRun files for the run step will be created
0024 #     For this option to work 'temporary' cmsRun files complying to the
0025 #     naming scheme have to be availible. Only files up to the number
0026 #     of jobs defined by --run will be considered.
0027 # --keepfiles : don't remove the created temporary cmsRun files
0028 # --l/--log: write the output of each shell command called in a
0029 #     seperate log file
0030 
0031 # Comments in the cmsRun file in the process.generator part may confuse
0032 # this script. Check the temporary cmsRun files if errors occur.
0033 
0034 # A parallelized run step is achieved by calling cmsRun an according
0035 # number of times with different seeds for Herwig. The built in feature
0036 # of Herwig wont be used.
0037 
0038 # Author: Dominik Beutel
0039 
0040 
0041 from __future__ import print_function
0042 import argparse
0043 import sys
0044 import os
0045 import subprocess
0046 import re
0047 
0048 
0049 
0050 def uint(string):
0051     """Unsigned int type"""
0052     value = int(string)
0053     if value < 0:
0054         msg = '{0} is negative'.format(string)
0055         raise argparse.ArgumentTypeError(msg)
0056     return value
0057 
0058 
0059 
0060 def adjust_pset(cmsrunfilename, savefilename, par_list):
0061     """Takes the cmsRun filem, removes all occurences of runMode, jobs,
0062        maxJobs and integrationList parameters in the process.generator
0063        part.
0064        The the parameters in par_list are set instead and saved.
0065     """ 
0066 
0067     with open(cmsrunfilename, 'r') as readfile:
0068         parsestring = readfile.read()
0069 
0070         # get first opening bracket after process.generator
0071         begin_gen_step = parsestring.find('(', parsestring.find('process.generator'))
0072 
0073         # find matching bracket
0074         end_gen_step = begin_gen_step
0075         bracket_counter = 1
0076         for position in range(begin_gen_step+1, len(parsestring)):
0077             if parsestring[position] == '(':
0078                 bracket_counter += 1
0079             if parsestring[position] == ')':
0080                 bracket_counter -= 1
0081             if not bracket_counter:
0082                 end_gen_step = position
0083                 break
0084 
0085         # get string between brackets
0086         gen_string = parsestring[begin_gen_step+1:end_gen_step]
0087 
0088         # remove all parameters that would interfere
0089         gen_string = re.sub(r',\s*runModeList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
0090         gen_string = re.sub(r',\s*jobs\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
0091         gen_string = re.sub(r',\s*integrationList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
0092         gen_string = re.sub(r',\s*maxJobs\s*=\s*cms.untracked.uint32\((.*?)\)', '', gen_string)
0093         gen_string = re.sub(r',\s*seed\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
0094 
0095 
0096     # write the savefile with all parameters given in par_list
0097     with open(savefilename,'w') as savefile:
0098         savefile.write(parsestring[:begin_gen_step+1])
0099         savefile.write(gen_string)
0100         for item in par_list:
0101             savefile.write(',\n')
0102             savefile.write(item)
0103         savefile.write(parsestring[end_gen_step:])
0104 
0105 
0106 
0107 def cleanupandexit(filelist):
0108     """Delete the files in filelist and exit"""
0109     for filename in filelist:
0110         os.remove(filename)
0111     sys.exit(0)
0112 
0113 
0114 
0115 
0116 ##################################################
0117 # Get command line arguments
0118 ##################################################
0119 
0120 parser = argparse.ArgumentParser()
0121 
0122 parser.add_argument('cmsRunfile', help='filename of the cmsRun configuration')
0123 parser.add_argument('-b', '--build', help='set the number of build jobs', type=int, choices=range(0,11), default=0)
0124 parser.add_argument('-i', '--integrate', help='set the maximal number of integration jobs', type=uint, default=0)
0125 parser.add_argument('-r', '--run', help='set the number of run jobs', type=int, choices=range(0,11), default=0)
0126 parser.add_argument('--nointegration', help='build -i integration jobs without actually integrating', action='store_true')
0127 parser.add_argument('--keepfiles', help='don\'t delete temporary files', action='store_true')
0128 parser.add_argument('--stoprun', help='stop after creating the cmsRun files for the run step', action='store_true')
0129 parser.add_argument('--resumerun', help='use existing \'temporary\' files for the run step', action='store_true')
0130 parser.add_argument('-l', '--log', help='write the output of each process in a separate log file', action='store_true')
0131 
0132 args = parser.parse_args()
0133 
0134 # List of files needed for clean-up
0135 cleanupfiles = []
0136 
0137 # Create a template name for all created files
0138 template_name = args.cmsRunfile.replace('.', '_')
0139 
0140 
0141 
0142 ##################################################
0143 # Execute the different run modes
0144 ##################################################
0145 
0146 ## Build ##
0147 
0148 # jobs defines number of build jobs in the cmsRun file
0149 # maxJobs tells Herwig to prepare the according number
0150 #     of integrations
0151 
0152 if args.build != 0:
0153     # Set up parameters
0154     parameters = ['runModeList = cms.untracked.string(\'build\')']
0155     parameters.append('jobs = cms.untracked.int32(' + str(args.build) + ')')
0156     if args.integrate != 0:
0157         parameters.append('maxJobs = cms.untracked.uint32(' + str(args.integrate) + ')')
0158 
0159     build_name = template_name + '_build.py'
0160     adjust_pset(args.cmsRunfile, build_name, parameters)
0161 
0162     cleanupfiles.append(build_name)
0163 
0164     # Start build job
0165     print('Setting up {0} build jobs.'.format(str(args.build)))
0166     print('Setting up a maximum of {0} integration jobs.'.format(str(args.integrate)))
0167     print('Calling\t\'cmsRun ' + build_name + '\'')
0168 
0169     if args.log:
0170         print('Writing ouput to log file: ' + build_name[:-2] + 'log')
0171         with open(build_name[:-2] + 'log', 'w') as build_log:
0172             process = subprocess.Popen(['cmsRun', build_name], stdout=build_log, stderr=subprocess.STDOUT)
0173     else:
0174         process = subprocess.Popen(['cmsRun ' + build_name], shell=True)
0175     process.wait()
0176 
0177     print('--------------------')
0178     print('Build step finished.')
0179     print('--------------------')
0180 
0181 
0182 
0183 ## Integrate ##
0184 
0185 # Stop in case no integration is desired
0186 if args.nointegration:
0187     print('--nointegration: Run will be stopped here.')
0188     cleanupandexit(cleanupfiles)
0189 
0190 if args.integrate != 0:
0191     # Determine number of integration jobs
0192     actual_int_jobs = len([string for string in os.listdir('Herwig-scratch/Build') if re.match(r'integrationJob[0-9]+', string)])
0193     
0194     # Stop if this number exceeds the given parameter
0195     if actual_int_jobs > args.integrate:
0196         print('Actual number of integration jobs {0} exceeds \'--integrate {1}\'.'.format(actual_int_jobs, args.integrate))
0197         print('Integration will not be performed.')
0198         cleanupandexit(cleanupfiles)
0199 
0200     # Start the integration jobs
0201     print('Found {0} integration jobs, a maxiumum of {1} was given.'.format(actual_int_jobs, args.integrate))
0202     print('Starting all jobs.')
0203     if not args.log:
0204         print('--- Output may be cluttered. (Try the option -l/--log) ---')
0205     processes = []
0206     for i in range(actual_int_jobs):
0207         # Set up parameters
0208         parameters = ['runModeList = cms.untracked.string(\'integrate\')']
0209         parameters.append('integrationList = cms.untracked.string(\'' + str(i) + '\')')
0210     
0211         integration_name = template_name + '_integrate_' + str(i) + '.py'
0212         adjust_pset(args.cmsRunfile, integration_name, parameters)
0213 
0214         cleanupfiles.append(integration_name)
0215     
0216         print('Calling\t\'cmsRun ' + integration_name + '\'')
0217         if args.log:
0218             print('Writing ouput to log file: ' + integration_name[:-2] + 'log')
0219             with open(integration_name[:-2] + 'log', 'w') as integration_log:
0220                 processes.append( subprocess.Popen(['cmsRun', integration_name], stdout=integration_log, stderr=subprocess.STDOUT) )
0221         else:
0222             processes.append( subprocess.Popen(['cmsRun', integration_name]) )
0223 
0224 
0225     # Wait for all processes to finish
0226     for process in processes:
0227         process.wait()
0228     print('--------------------------')
0229     print('Integration step finished.')
0230     print('--------------------------')
0231 
0232 
0233 
0234 ## Run mode ##
0235 
0236 ## This part uses the parallelization of the run step provided by
0237 ## Herwig. At the moment it is not usable.
0238 
0239 ##if args.run != 0:
0240 ##    parameters = ['runModeList = cms.untracked.string(\'run\')']
0241 ##    parameters.append('jobs = cms.untracked.int32(' + str(args.run) + ')')
0242 ##
0243 ##    run_name = template_name + '_run.py'
0244 ##    adjust_pset(args.cmsRunfile, run_name, parameters)
0245 ##    cleanupfiles.append(run_name)
0246 ##
0247 ##    print 'Setting up {0} run jobs.'.format(str(args.run))
0248 ##    print 'Calling\n\t\'cmsRun ' + run_name + '\'\nfor the Herwig run step.'.format(str(args.run))
0249 ##    process = subprocess.Popen(['cmsRun ' + run_name], shell=True)
0250 ##    process.wait()
0251 ##    print '------------------'
0252 ##    print 'Run step finished.'
0253 ##    print '------------------'
0254 
0255 ## This is the alternative for a paralellized run step. cmsRun is called
0256 ## as often as give with the option -r/--run. So the total number of
0257 ## generated events is a corresponding multiple of the number of events
0258 ## given in the cmsRun file.
0259 
0260 
0261 if args.stoprun and args.resumerun:
0262     print('--stoprun AND --resumerun are chosen: run step will be omitted.')
0263     cleanupandexit(cleanupfiles)
0264 
0265 if args.run != 0:
0266     # Start the run jobs
0267     print('Setting up {0} runs.'.format(args.run))
0268     if not args.log:
0269         print('--- Output may be cluttered. (Try the option -l/--log) ---')
0270     processes = []
0271     for i in range(args.run):
0272         run_name = template_name + '_run_' + str(i) + '.py'
0273 
0274         # Only create new files if this isn't a resumed run
0275         if not args.resumerun:
0276             parameters = ['runModeList = cms.untracked.string(\'run\')']
0277             # Set different seeds
0278             parameters.append('seed = cms.untracked.int32(' + str(i) + ')')
0279             adjust_pset(args.cmsRunfile, run_name, parameters)
0280 
0281         # Unless run will be stopped execute the jobs
0282         if not args.stoprun:
0283             # Don't mark the files for cleanup if this is a resumed run
0284             if not args.resumerun:
0285                 cleanupfiles.append(run_name)
0286 
0287             if not os.path.isfile(run_name):
0288                 print('\'' + run_name + '\' not found. It will be skipped.')
0289                 continue
0290 
0291             print('Calling\t\'cmsRun ' + run_name + '\'')
0292             if args.log:
0293                 print('Writing ouput to log file: ' + run_name[:-2] + 'log')
0294                 with open(run_name[:-2] + 'log', 'w') as run_log:
0295                     processes.append( subprocess.Popen(['cmsRun', run_name], stdout=run_log, stderr=subprocess.STDOUT) )
0296             else:
0297                 processes.append( subprocess.Popen(['cmsRun', run_name]) )
0298 
0299 
0300     # Wait for all processes to finish
0301     for process in processes:
0302         process.wait()
0303     if args.stoprun:
0304         print('--stoprun: kept run files and stopped before calling cmsRun')
0305     print('------------------')
0306     print('Run step finished.')
0307     print('------------------')
0308 
0309 
0310 
0311 if not args.keepfiles:
0312     cleanupandexit(cleanupfiles)