Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
#!/usr/bin/env python3

# This script sets up parallel jobs for the build, integrate and run
# step when using Herwig with the CMSSW framework.
# It takes a cmsRun file, adjusts the parameters in it accordingly to
# the options and saves them to temporary cmsRun files. For each step
# a different cmsRun file is created. The original file remains
# unaltered.

# Possible options:
# -b/--build : sets the number of build jobs and starts the build step.
# -i/--integrate : sets the maximal number of integration jobs
#     This option already has to be set when the build step is invoked.
#     The integration step will be performed if this option is set,
#     unless --nointegration is chosen.
#     The actual number of integration jobs may be smaller. It is
#     determined by the number of files in Herwig-scratch/Build.
# -r/--run : sets the number of run jobs and starts the run step.
# --nointegration : use this option to set up several integration jobs
#     without actually performing them
# --stoprun: use this option if you want to create the cmsRun files
#     without calling cmsRun
# --resumerun: no new cmsRun files for the run step will be created
#     For this option to work 'temporary' cmsRun files complying to the
#     naming scheme have to be availible. Only files up to the number
#     of jobs defined by --run will be considered.
# --keepfiles : don't remove the created temporary cmsRun files
# --l/--log: write the output of each shell command called in a
#     seperate log file

# Comments in the cmsRun file in the process.generator part may confuse
# this script. Check the temporary cmsRun files if errors occur.

# A parallelized run step is achieved by calling cmsRun an according
# number of times with different seeds for Herwig. The built in feature
# of Herwig wont be used.

# Author: Dominik Beutel


import argparse
import sys
import os
import subprocess
import re



def uint(string):
    """Unsigned int type"""
    value = int(string)
    if value < 0:
        msg = '{0} is negative'.format(string)
        raise argparse.ArgumentTypeError(msg)
    return value



def adjust_pset(cmsrunfilename, savefilename, par_list):
    """Takes the cmsRun filem, removes all occurences of runMode, jobs,
       maxJobs and integrationList parameters in the process.generator
       part.
       The the parameters in par_list are set instead and saved.
    """ 

    with open(cmsrunfilename, 'r') as readfile:
        parsestring = readfile.read()

        # get first opening bracket after process.generator
        begin_gen_step = parsestring.find('(', parsestring.find('process.generator'))

        # find matching bracket
        end_gen_step = begin_gen_step
        bracket_counter = 1
        for position in range(begin_gen_step+1, len(parsestring)):
            if parsestring[position] == '(':
                bracket_counter += 1
            if parsestring[position] == ')':
                bracket_counter -= 1
            if not bracket_counter:
                end_gen_step = position
                break

        # get string between brackets
        gen_string = parsestring[begin_gen_step+1:end_gen_step]

        # remove all parameters that would interfere
        gen_string = re.sub(r',\s*runModeList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
        gen_string = re.sub(r',\s*jobs\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)
        gen_string = re.sub(r',\s*integrationList\s*=\s*cms.untracked.string\((.*?)\)', '', gen_string)
        gen_string = re.sub(r',\s*maxJobs\s*=\s*cms.untracked.uint32\((.*?)\)', '', gen_string)
        gen_string = re.sub(r',\s*seed\s*=\s*cms.untracked.int32\((.*?)\)', '', gen_string)


    # write the savefile with all parameters given in par_list
    with open(savefilename,'w') as savefile:
        savefile.write(parsestring[:begin_gen_step+1])
        savefile.write(gen_string)
        for item in par_list:
            savefile.write(',\n')
            savefile.write(item)
        savefile.write(parsestring[end_gen_step:])



def cleanupandexit(filelist):
    """Delete the files in filelist and exit"""
    for filename in filelist:
        os.remove(filename)
    sys.exit(0)




##################################################
# Get command line arguments
##################################################

parser = argparse.ArgumentParser()

parser.add_argument('cmsRunfile', help='filename of the cmsRun configuration')
parser.add_argument('-b', '--build', help='set the number of build jobs', type=int, choices=range(0,11), default=0)
parser.add_argument('-i', '--integrate', help='set the maximal number of integration jobs', type=uint, default=0)
parser.add_argument('-r', '--run', help='set the number of run jobs', type=int, choices=range(0,11), default=0)
parser.add_argument('--nointegration', help='build -i integration jobs without actually integrating', action='store_true')
parser.add_argument('--keepfiles', help='don\'t delete temporary files', action='store_true')
parser.add_argument('--stoprun', help='stop after creating the cmsRun files for the run step', action='store_true')
parser.add_argument('--resumerun', help='use existing \'temporary\' files for the run step', action='store_true')
parser.add_argument('-l', '--log', help='write the output of each process in a separate log file', action='store_true')

args = parser.parse_args()

# List of files needed for clean-up
cleanupfiles = []

# Create a template name for all created files
template_name = args.cmsRunfile.replace('.', '_')



##################################################
# Execute the different run modes
##################################################

## Build ##

# jobs defines number of build jobs in the cmsRun file
# maxJobs tells Herwig to prepare the according number
#     of integrations

if args.build != 0:
    # Set up parameters
    parameters = ['runModeList = cms.untracked.string(\'build\')']
    parameters.append('jobs = cms.untracked.int32(' + str(args.build) + ')')
    if args.integrate != 0:
        parameters.append('maxJobs = cms.untracked.uint32(' + str(args.integrate) + ')')

    build_name = template_name + '_build.py'
    adjust_pset(args.cmsRunfile, build_name, parameters)

    cleanupfiles.append(build_name)

    # Start build job
    print('Setting up {0} build jobs.'.format(str(args.build)))
    print('Setting up a maximum of {0} integration jobs.'.format(str(args.integrate)))
    print('Calling\t\'cmsRun ' + build_name + '\'')

    if args.log:
        print('Writing ouput to log file: ' + build_name[:-2] + 'log')
        with open(build_name[:-2] + 'log', 'w') as build_log:
            process = subprocess.Popen(['cmsRun', build_name], stdout=build_log, stderr=subprocess.STDOUT)
    else:
        process = subprocess.Popen(['cmsRun ' + build_name], shell=True)
    process.wait()

    print('--------------------')
    print('Build step finished.')
    print('--------------------')



## Integrate ##

# Stop in case no integration is desired
if args.nointegration:
    print('--nointegration: Run will be stopped here.')
    cleanupandexit(cleanupfiles)

if args.integrate != 0:
    # Determine number of integration jobs
    actual_int_jobs = len([string for string in os.listdir('Herwig-scratch/Build') if re.match(r'integrationJob[0-9]+', string)])
    
    # Stop if this number exceeds the given parameter
    if actual_int_jobs > args.integrate:
        print('Actual number of integration jobs {0} exceeds \'--integrate {1}\'.'.format(actual_int_jobs, args.integrate))
        print('Integration will not be performed.')
        cleanupandexit(cleanupfiles)

    # Start the integration jobs
    print('Found {0} integration jobs, a maxiumum of {1} was given.'.format(actual_int_jobs, args.integrate))
    print('Starting all jobs.')
    if not args.log:
        print('--- Output may be cluttered. (Try the option -l/--log) ---')
    processes = []
    for i in range(actual_int_jobs):
        # Set up parameters
        parameters = ['runModeList = cms.untracked.string(\'integrate\')']
        parameters.append('integrationList = cms.untracked.string(\'' + str(i) + '\')')
    
        integration_name = template_name + '_integrate_' + str(i) + '.py'
        adjust_pset(args.cmsRunfile, integration_name, parameters)

        cleanupfiles.append(integration_name)
    
        print('Calling\t\'cmsRun ' + integration_name + '\'')
        if args.log:
            print('Writing ouput to log file: ' + integration_name[:-2] + 'log')
            with open(integration_name[:-2] + 'log', 'w') as integration_log:
                processes.append( subprocess.Popen(['cmsRun', integration_name], stdout=integration_log, stderr=subprocess.STDOUT) )
        else:
            processes.append( subprocess.Popen(['cmsRun', integration_name]) )


    # Wait for all processes to finish
    for process in processes:
        process.wait()
    print('--------------------------')
    print('Integration step finished.')
    print('--------------------------')



## Run mode ##

## This part uses the parallelization of the run step provided by
## Herwig. At the moment it is not usable.

##if args.run != 0:
##    parameters = ['runModeList = cms.untracked.string(\'run\')']
##    parameters.append('jobs = cms.untracked.int32(' + str(args.run) + ')')
##
##    run_name = template_name + '_run.py'
##    adjust_pset(args.cmsRunfile, run_name, parameters)
##    cleanupfiles.append(run_name)
##
##    print 'Setting up {0} run jobs.'.format(str(args.run))
##    print 'Calling\n\t\'cmsRun ' + run_name + '\'\nfor the Herwig run step.'.format(str(args.run))
##    process = subprocess.Popen(['cmsRun ' + run_name], shell=True)
##    process.wait()
##    print '------------------'
##    print 'Run step finished.'
##    print '------------------'

## This is the alternative for a paralellized run step. cmsRun is called
## as often as give with the option -r/--run. So the total number of
## generated events is a corresponding multiple of the number of events
## given in the cmsRun file.


if args.stoprun and args.resumerun:
    print('--stoprun AND --resumerun are chosen: run step will be omitted.')
    cleanupandexit(cleanupfiles)

if args.run != 0:
    # Start the run jobs
    print('Setting up {0} runs.'.format(args.run))
    if not args.log:
        print('--- Output may be cluttered. (Try the option -l/--log) ---')
    processes = []
    for i in range(args.run):
        run_name = template_name + '_run_' + str(i) + '.py'

        # Only create new files if this isn't a resumed run
        if not args.resumerun:
            parameters = ['runModeList = cms.untracked.string(\'run\')']
            # Set different seeds
            parameters.append('seed = cms.untracked.int32(' + str(i) + ')')
            adjust_pset(args.cmsRunfile, run_name, parameters)

        # Unless run will be stopped execute the jobs
        if not args.stoprun:
            # Don't mark the files for cleanup if this is a resumed run
            if not args.resumerun:
                cleanupfiles.append(run_name)

            if not os.path.isfile(run_name):
                print('\'' + run_name + '\' not found. It will be skipped.')
                continue

            print('Calling\t\'cmsRun ' + run_name + '\'')
            if args.log:
                print('Writing ouput to log file: ' + run_name[:-2] + 'log')
                with open(run_name[:-2] + 'log', 'w') as run_log:
                    processes.append( subprocess.Popen(['cmsRun', run_name], stdout=run_log, stderr=subprocess.STDOUT) )
            else:
                processes.append( subprocess.Popen(['cmsRun', run_name]) )


    # Wait for all processes to finish
    for process in processes:
        process.wait()
    if args.stoprun:
        print('--stoprun: kept run files and stopped before calling cmsRun')
    print('------------------')
    print('Run step finished.')
    print('------------------')



if not args.keepfiles:
    cleanupandexit(cleanupfiles)