Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-12-01 23:40:22

0001 #!/usr/bin/env python3
0002 
0003 # This script sets up parallel jobs for the build, integrate and run
0004 # step when using Herwig with the CMSSW framework.
0005 # It takes a cmsRun file, adjusts the parameters in it accordingly to
0006 # the options and saves them to temporary cmsRun files. For each step
0007 # a different cmsRun file is created. The original file remains
0008 # unaltered.
0009 
0010 # Possible options:
0011 # -b/--build : sets the number of build jobs and starts the build step.
0012 # -i/--integrate : sets the maximal number of integration jobs
0013 #     This option already has to be set when the build step is invoked.
0014 #     The integration step will be performed if this option is set,
0015 #     unless --nointegration is chosen.
0016 #     The actual number of integration jobs may be smaller. It is
0017 #     determined by the number of files in Herwig-scratch/Build.
0018 # -r/--run : sets the number of run jobs and starts the run step.
0019 # --nointegration : use this option to set up several integration jobs
0020 #     without actually performing them
0021 # --stoprun: use this option if you want to create the cmsRun files
0022 #     without calling cmsRun
0023 # --resumerun: no new cmsRun files for the run step will be created
0024 #     For this option to work 'temporary' cmsRun files complying to the
0025 #     naming scheme have to be availible. Only files up to the number
0026 #     of jobs defined by --run will be considered.
0027 # --keepfiles : don't remove the created temporary cmsRun files
0028 # --l/--log: write the output of each shell command called in a
0029 #     seperate log file
0030 
0031 # Comments in the cmsRun file in the process.generator part may confuse
0032 # this script. Check the temporary cmsRun files if errors occur.
0033 
0034 # A parallelized run step is achieved by calling cmsRun an according
0035 # number of times with different seeds for Herwig. The built in feature
0036 # of Herwig wont be used.
0037 
0038 # Author: Dominik Beutel
0039 
0040 
0041 import argparse
0042 import sys
0043 import os
0044 import subprocess
0045 import re
0046 
0047 
0048 
0049 def uint(string):
0050     """Unsigned int type"""
0051     value = int(string)
0052     if value < 0:
0053         msg = '{0} is negative'.format(string)
0054         raise argparse.ArgumentTypeError(msg)
0055     return value
0056 
0057 
0058 
0059 def adjust_pset(cmsrunfilename, savefilename, par_list):
0060     """Takes the cmsRun filem, removes all occurences of runMode, jobs,
0061        maxJobs and integrationList parameters in the process.generator
0062        part.
0063        The the parameters in par_list are set instead and saved.
0064     """ 
0065 
0066     with open(cmsrunfilename, 'r') as readfile:
0067         parsestring = readfile.read()
0068 
0069         # get first opening bracket after process.generator
0070         begin_gen_step = parsestring.find('(', parsestring.find('process.generator'))
0071 
0072         # find matching bracket
0073         end_gen_step = begin_gen_step
0074         bracket_counter = 1
0075         for position in range(begin_gen_step+1, len(parsestring)):
0076             if parsestring[position] == '(':
0077                 bracket_counter += 1
0078             if parsestring[position] == ')':
0079                 bracket_counter -= 1
0080             if not bracket_counter:
0081                 end_gen_step = position
0082                 break
0083 
0084         # get string between brackets
0085         gen_string = parsestring[begin_gen_step+1:end_gen_step]
0086 
0087         # remove all parameters that would interfere
0088         gen_string = re.sub(r',\s*runModeList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
0089         gen_string = re.sub(r',\s*jobs\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
0090         gen_string = re.sub(r',\s*integrationList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
0091         gen_string = re.sub(r',\s*maxJobs\s*=\s*cms.untracked.uint32\((.*?)\)', '', gen_string)
0092         gen_string = re.sub(r',\s*seed\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
0093 
0094 
0095     # write the savefile with all parameters given in par_list
0096     with open(savefilename,'w') as savefile:
0097         savefile.write(parsestring[:begin_gen_step+1])
0098         savefile.write(gen_string)
0099         for item in par_list:
0100             savefile.write(',\n')
0101             savefile.write(item)
0102         savefile.write(parsestring[end_gen_step:])
0103 
0104 
0105 
0106 def cleanupandexit(filelist):
0107     """Delete the files in filelist and exit"""
0108     for filename in filelist:
0109         os.remove(filename)
0110     sys.exit(0)
0111 
0112 
0113 
0114 
0115 ##################################################
0116 # Get command line arguments
0117 ##################################################
0118 
0119 parser = argparse.ArgumentParser()
0120 
0121 parser.add_argument('cmsRunfile', help='filename of the cmsRun configuration')
0122 parser.add_argument('-b', '--build', help='set the number of build jobs', type=int, choices=range(0,11), default=0)
0123 parser.add_argument('-i', '--integrate', help='set the maximal number of integration jobs', type=uint, default=0)
0124 parser.add_argument('-r', '--run', help='set the number of run jobs', type=int, choices=range(0,11), default=0)
0125 parser.add_argument('--nointegration', help='build -i integration jobs without actually integrating', action='store_true')
0126 parser.add_argument('--keepfiles', help='don\'t delete temporary files', action='store_true')
0127 parser.add_argument('--stoprun', help='stop after creating the cmsRun files for the run step', action='store_true')
0128 parser.add_argument('--resumerun', help='use existing \'temporary\' files for the run step', action='store_true')
0129 parser.add_argument('-l', '--log', help='write the output of each process in a separate log file', action='store_true')
0130 
0131 args = parser.parse_args()
0132 
0133 # List of files needed for clean-up
0134 cleanupfiles = []
0135 
0136 # Create a template name for all created files
0137 template_name = args.cmsRunfile.replace('.', '_')
0138 
0139 
0140 
0141 ##################################################
0142 # Execute the different run modes
0143 ##################################################
0144 
0145 ## Build ##
0146 
0147 # jobs defines number of build jobs in the cmsRun file
0148 # maxJobs tells Herwig to prepare the according number
0149 #     of integrations
0150 
0151 if args.build != 0:
0152     # Set up parameters
0153     parameters = ['runModeList = cms.untracked.string(\'build\')']
0154     parameters.append('jobs = cms.untracked.int32(' + str(args.build) + ')')
0155     if args.integrate != 0:
0156         parameters.append('maxJobs = cms.untracked.uint32(' + str(args.integrate) + ')')
0157 
0158     build_name = template_name + '_build.py'
0159     adjust_pset(args.cmsRunfile, build_name, parameters)
0160 
0161     cleanupfiles.append(build_name)
0162 
0163     # Start build job
0164     print('Setting up {0} build jobs.'.format(str(args.build)))
0165     print('Setting up a maximum of {0} integration jobs.'.format(str(args.integrate)))
0166     print('Calling\t\'cmsRun ' + build_name + '\'')
0167 
0168     if args.log:
0169         print('Writing ouput to log file: ' + build_name[:-2] + 'log')
0170         with open(build_name[:-2] + 'log', 'w') as build_log:
0171             process = subprocess.Popen(['cmsRun', build_name], stdout=build_log, stderr=subprocess.STDOUT)
0172     else:
0173         process = subprocess.Popen(['cmsRun ' + build_name], shell=True)
0174     process.wait()
0175 
0176     print('--------------------')
0177     print('Build step finished.')
0178     print('--------------------')
0179 
0180 
0181 
0182 ## Integrate ##
0183 
0184 # Stop in case no integration is desired
0185 if args.nointegration:
0186     print('--nointegration: Run will be stopped here.')
0187     cleanupandexit(cleanupfiles)
0188 
0189 if args.integrate != 0:
0190     # Determine number of integration jobs
0191     actual_int_jobs = len([string for string in os.listdir('Herwig-scratch/Build') if re.match(r'integrationJob[0-9]+', string)])
0192     
0193     # Stop if this number exceeds the given parameter
0194     if actual_int_jobs > args.integrate:
0195         print('Actual number of integration jobs {0} exceeds \'--integrate {1}\'.'.format(actual_int_jobs, args.integrate))
0196         print('Integration will not be performed.')
0197         cleanupandexit(cleanupfiles)
0198 
0199     # Start the integration jobs
0200     print('Found {0} integration jobs, a maxiumum of {1} was given.'.format(actual_int_jobs, args.integrate))
0201     print('Starting all jobs.')
0202     if not args.log:
0203         print('--- Output may be cluttered. (Try the option -l/--log) ---')
0204     processes = []
0205     for i in range(actual_int_jobs):
0206         # Set up parameters
0207         parameters = ['runModeList = cms.untracked.string(\'integrate\')']
0208         parameters.append('integrationList = cms.untracked.string(\'' + str(i) + '\')')
0209     
0210         integration_name = template_name + '_integrate_' + str(i) + '.py'
0211         adjust_pset(args.cmsRunfile, integration_name, parameters)
0212 
0213         cleanupfiles.append(integration_name)
0214     
0215         print('Calling\t\'cmsRun ' + integration_name + '\'')
0216         if args.log:
0217             print('Writing ouput to log file: ' + integration_name[:-2] + 'log')
0218             with open(integration_name[:-2] + 'log', 'w') as integration_log:
0219                 processes.append( subprocess.Popen(['cmsRun', integration_name], stdout=integration_log, stderr=subprocess.STDOUT) )
0220         else:
0221             processes.append( subprocess.Popen(['cmsRun', integration_name]) )
0222 
0223 
0224     # Wait for all processes to finish
0225     for process in processes:
0226         process.wait()
0227     print('--------------------------')
0228     print('Integration step finished.')
0229     print('--------------------------')
0230 
0231 
0232 
0233 ## Run mode ##
0234 
0235 ## This part uses the parallelization of the run step provided by
0236 ## Herwig. At the moment it is not usable.
0237 
0238 ##if args.run != 0:
0239 ##    parameters = ['runModeList = cms.untracked.string(\'run\')']
0240 ##    parameters.append('jobs = cms.untracked.int32(' + str(args.run) + ')')
0241 ##
0242 ##    run_name = template_name + '_run.py'
0243 ##    adjust_pset(args.cmsRunfile, run_name, parameters)
0244 ##    cleanupfiles.append(run_name)
0245 ##
0246 ##    print 'Setting up {0} run jobs.'.format(str(args.run))
0247 ##    print 'Calling\n\t\'cmsRun ' + run_name + '\'\nfor the Herwig run step.'.format(str(args.run))
0248 ##    process = subprocess.Popen(['cmsRun ' + run_name], shell=True)
0249 ##    process.wait()
0250 ##    print '------------------'
0251 ##    print 'Run step finished.'
0252 ##    print '------------------'
0253 
0254 ## This is the alternative for a paralellized run step. cmsRun is called
0255 ## as often as give with the option -r/--run. So the total number of
0256 ## generated events is a corresponding multiple of the number of events
0257 ## given in the cmsRun file.
0258 
0259 
0260 if args.stoprun and args.resumerun:
0261     print('--stoprun AND --resumerun are chosen: run step will be omitted.')
0262     cleanupandexit(cleanupfiles)
0263 
0264 if args.run != 0:
0265     # Start the run jobs
0266     print('Setting up {0} runs.'.format(args.run))
0267     if not args.log:
0268         print('--- Output may be cluttered. (Try the option -l/--log) ---')
0269     processes = []
0270     for i in range(args.run):
0271         run_name = template_name + '_run_' + str(i) + '.py'
0272 
0273         # Only create new files if this isn't a resumed run
0274         if not args.resumerun:
0275             parameters = ['runModeList = cms.untracked.string(\'run\')']
0276             # Set different seeds
0277             parameters.append('seed = cms.untracked.int32(' + str(i) + ')')
0278             adjust_pset(args.cmsRunfile, run_name, parameters)
0279 
0280         # Unless run will be stopped execute the jobs
0281         if not args.stoprun:
0282             # Don't mark the files for cleanup if this is a resumed run
0283             if not args.resumerun:
0284                 cleanupfiles.append(run_name)
0285 
0286             if not os.path.isfile(run_name):
0287                 print('\'' + run_name + '\' not found. It will be skipped.')
0288                 continue
0289 
0290             print('Calling\t\'cmsRun ' + run_name + '\'')
0291             if args.log:
0292                 print('Writing ouput to log file: ' + run_name[:-2] + 'log')
0293                 with open(run_name[:-2] + 'log', 'w') as run_log:
0294                     processes.append( subprocess.Popen(['cmsRun', run_name], stdout=run_log, stderr=subprocess.STDOUT) )
0295             else:
0296                 processes.append( subprocess.Popen(['cmsRun', run_name]) )
0297 
0298 
0299     # Wait for all processes to finish
0300     for process in processes:
0301         process.wait()
0302     if args.stoprun:
0303         print('--stoprun: kept run files and stopped before calling cmsRun')
0304     print('------------------')
0305     print('Run step finished.')
0306     print('------------------')
0307 
0308 
0309 
0310 if not args.keepfiles:
0311     cleanupandexit(cleanupfiles)