Warning, /HLTrigger/Tools/scripts/convertToRaw is written in an unsupported language. File is not indexed.
0001 #! /usr/bin/env python3
0002
0003 import argparse
0004 import glob
0005 import json
0006 import os
0007 import re
0008 import shutil
0009 import socket
0010 import subprocess
0011 import sys
0012
0013 def cmsRun(config: str, verbose: bool, **args):
0014 cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ]
0015 sys.stdout.write(' \\\n '.join(cmd))
0016 sys.stdout.write('\n\n')
0017 if verbose:
0018 status = subprocess.run(cmd, stdout=None, stderr=None)
0019 else:
0020 status = subprocess.run(cmd, capture_output=True, text=True)
0021
0022 # handle error conditions
0023 if status.returncode < 0:
0024 sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode)
0025 if not verbose:
0026 sys.stderr.write('\n')
0027 sys.stderr.write(status.stderr)
0028 sys.exit(status.returncode)
0029 elif status.returncode > 0:
0030 sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode)
0031 if not verbose:
0032 sys.stderr.write('\n')
0033 sys.stderr.write(status.stderr)
0034 sys.exit(status.returncode)
0035
0036
0037 class LuminosityBlockRange:
0038 def __init__(self, value: str = '') -> None:
0039 self.min_run = 0
0040 self.max_run = 0
0041 self.min_lumi = 0
0042 self.max_lumi = 0
0043 if value and value != 'all':
0044 ((self.min_run, self.min_lumi), (self.max_run, self.max_lumi)) = LuminosityBlockRange.parse_range(value)
0045
0046 @staticmethod
0047 def parse_value(value: str) -> int:
0048 return 0 if value in ('', 'min', 'max') else int(value)
0049
0050 @staticmethod
0051 def parse_value_pair(value: str) -> (int, int):
0052 if value.count(':') > 1:
0053 raise ValueError('invalid syntax')
0054 (first, second) = value.split(':') if ':' in value else ('', value)
0055 return LuminosityBlockRange.parse_value(first), LuminosityBlockRange.parse_value(second)
0056
0057 @staticmethod
0058 def parse_range(value: str) -> ((int, int), (int, int)):
0059 if value.count('-') > 1:
0060 raise ValueError('invalid syntax')
0061 (first, second) = value.split('-') if '-' in value else (value, value)
0062 return LuminosityBlockRange.parse_value_pair(first), LuminosityBlockRange.parse_value_pair(second)
0063
0064 def is_in_range(self, run: int, lumi: int) -> bool:
0065 return (
0066 (self.min_run == 0 or self.min_run == run) and (self.min_lumi == 0 or self.min_lumi <= lumi) or
0067 (self.min_run != 0 and self.min_run < run)
0068 ) and (
0069 (self.max_run == 0 or self.max_run == run) and (self.max_lumi == 0 or self.max_lumi >= lumi) or
0070 (self.min_run != 0 and self.max_run > run)
0071 )
0072
0073
0074 # default values
0075 events_per_file = 100
0076 events_per_lumi = 11655
0077 output_directory = os.getcwd()
0078
0079 parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter)
0080 parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input files in .root format')
0081 parser.add_argument('-s', '--source', type=str, dest='raw_data_collection', metavar='TAG', default='rawDataCollector', help='name of the FEDRawDataCollection to be repacked into RAW format')
0082 parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default=os.getcwd(), help='base path to store the output files; subdirectories based on the run number are automatically created')
0083 parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events')
0084 parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection')
0085 parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range')
0086 parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='print additional information while processing the input files')
0087 parser.add_argument('-1', '--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)')
0088
0089 # parse the command line arguments and options
0090 args = parser.parse_args()
0091 if args.output_directory and args.output_directory.endswith('/'):
0092 args.output_directory = args.output_directory[:-1]
0093
0094 # read the list of input files from the command line arguments
0095 files = [ 'file:' + f if (not ':' in f and not f.startswith('/store/') and os.path.exists(f)) else f for f in args.files ]
0096
0097 # extract the list of runs and lumiections in the input files
0098 class FileInfo(object):
0099 def __init__(self):
0100 self.events = 0
0101 self.files = set()
0102
0103 header = re.compile(r'^ +Run +Lumi +# Events$')
0104 empty = re.compile(r'^ *$')
0105 content = {}
0106
0107 for f in files:
0108
0109 # run edmFileUtil --eventsInLumis ...
0110 print(f'preprocessing input file {f}')
0111 output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)
0112 if args.verbose:
0113 print(output.stdout)
0114
0115 # handle error conditions
0116 if output.returncode < 0:
0117 sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode)
0118 if not args.verbose:
0119 sys.stderr.write('\n')
0120 sys.stderr.write(output.stderr)
0121 sys.exit(output.returncode)
0122 elif output.returncode > 0:
0123 sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode)
0124 if not args.verbose:
0125 sys.stderr.write('\n')
0126 sys.stderr.write(output.stderr)
0127 sys.exit(output.returncode)
0128
0129 # parse the output of edmFileUtil
0130 parsing = False
0131 for line in output.stdout.splitlines():
0132 if not parsing and header.match(line):
0133 # start parsing
0134 parsing = True
0135 continue
0136
0137 if parsing and empty.match(line):
0138 # stop parsing
0139 parsing = False
0140 continue
0141
0142 if parsing:
0143 run, lumi, events = tuple(map(int, line.split()))
0144 if not args.range.is_in_range(run, lumi):
0145 print(f' run {run}, lumisection {lumi} is outside of the given range and will be skipped')
0146 continue
0147 if events == 0:
0148 print(f' run {run}, lumisection {lumi} is empty and will be skipped')
0149 continue
0150 print(f' run {run}, lumisection {lumi} with {events} events will be processed')
0151 if not run in content:
0152 content[run] = {}
0153 if not lumi in content[run]:
0154 content[run][lumi] = FileInfo()
0155 content[run][lumi].events += events
0156 content[run][lumi].files.add(f)
0157 print()
0158
0159 # drop empty lumisections
0160 # note: this may no longer be needed, but is left as a cross check
0161 for run in content:
0162 empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ]
0163 for lumi in empty_lumis:
0164 del content[run][lumi]
0165
0166 # drop empty runs
0167 empty_runs = [ run for run in content if not content[run] ]
0168 for run in empty_runs:
0169 del content[run]
0170
0171 # locate the CMSSW configuration file
0172 config_name = 'HLTrigger/Tools/python/convertToRaw.py'
0173 current_area = os.environ['CMSSW_BASE']
0174 release_area = os.environ['CMSSW_RELEASE_BASE']
0175
0176 config_py = current_area + '/src/' + config_name
0177 if not os.path.exists(config_py):
0178 config_py = release_area + '/src/' + config_name
0179 if not os.path.exists(config_py):
0180 sys.stderr.write('error: cannot find the configuration file %s\n' % config_name)
0181 sys.exit(1)
0182
0183 # convert the input data to FED RAW data format
0184 converted_files = []
0185
0186 # process each run
0187 for run in sorted(content):
0188
0189 # create the output directory structure
0190 run_path = args.output_directory + f'/run{run:06d}'
0191 shutil.rmtree(run_path, ignore_errors=True)
0192 os.makedirs(run_path)
0193
0194 if args.one_file_per_lumi:
0195 # process the whole run
0196 lumis = sorted(content[run])
0197 print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis)))
0198 cmsRun(config_py, args.verbose, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, rawDataCollection = args.raw_data_collection, outputPath = args.output_directory)
0199 converted_files = glob.glob(run_path + f'/run{run:06d}_ls{lumi:04d}_*.raw')
0200
0201 else:
0202 # process lumisections individualy, then merge the output
0203 summary = {
0204 'data': [0, 0, 0, 0], # [ 'events', 'files', 'lumisections', 'last lumisection' ]
0205 'definition': run_path + '/jsd/EoR.jsd',
0206 'source': socket.getfqdn() + '_' + str(os.getpid())
0207 }
0208
0209 for lumi in sorted(content[run]):
0210
0211 # process individual lumisections
0212 print('found run %d, lumi %d, with %d events' % (run, lumi, content[run][lumi].events))
0213 lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
0214 shutil.rmtree(lumi_path, ignore_errors=True)
0215 os.makedirs(lumi_path)
0216 cmsRun(config_py, args.verbose, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, rawDataCollection = args.raw_data_collection, outputPath = lumi_path)
0217
0218 # merge all lumisections data
0219
0220 # number of events expected to be processed
0221 if args.events_per_lumi < 0:
0222 expected_events = content[run][lumi].events
0223 else:
0224 expected_events = min(args.events_per_lumi, content[run][lumi].events)
0225
0226 # number of files expected to be created
0227 expected_files = (expected_events + args.events_per_file - 1) // args.events_per_file
0228
0229 # find the files produced by the conversion job and move them to the per-run path
0230 lumi_base_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
0231 lumi_path = lumi_base_path + f'/run{run:06d}'
0232
0233 # jsd files
0234 jsd_path = lumi_path + '/jsd'
0235 if not os.path.exists(run_path + '/jsd'):
0236 shutil.move(jsd_path, run_path)
0237 else:
0238 shutil.rmtree(jsd_path)
0239
0240 # lumisection data and EoLS files
0241 lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*')
0242 for f in lumi_files:
0243 target = run_path + f.removeprefix(lumi_path)
0244 shutil.move(f, target)
0245 if f.endswith('.raw'):
0246 converted_files.append(target)
0247
0248 # read the partial EoR file
0249 eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn'
0250 with open(eor_file) as f:
0251 eor = json.load(f)
0252 produced_events = int(eor['data'][0])
0253 produced_files = int(eor['data'][1])
0254 produced_lumis = int(eor['data'][2])
0255 produced_last_lumi = int(eor['data'][3])
0256 assert produced_events == expected_events
0257 assert produced_files == expected_files
0258 assert produced_lumis == 1
0259 assert produced_last_lumi == lumi
0260 summary['data'][0] += expected_events
0261 summary['data'][1] += expected_files
0262 summary['data'][2] += 1
0263 summary['data'][3] = lumi
0264 os.remove(eor_file)
0265
0266 # remove the intermediate directory
0267 shutil.rmtree(lumi_base_path, ignore_errors=True)
0268
0269 # write the final EoR file
0270 # implemented by hand instead of using json.dump() to match the style used by the DAQ tools
0271 assert len(converted_files) == summary['data'][1]
0272 eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn'
0273 with open(eor_file, 'w') as file:
0274 file.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
0275 file.close()
0276
0277 # mark the .raw files as not executable
0278 for f in converted_files:
0279 os.chmod(f, 0o644)
0280
0281 # write a cff file for processing the converted files
0282 cff_file = args.output_directory + f'/run{run:06d}_cff.py'
0283 with open(cff_file, 'w') as file:
0284 file.write("""import FWCore.ParameterSet.Config as cms
0285
0286 from EventFilter.Utilities.FedRawDataInputSource_cfi import source as _source
0287 source = _source.clone(
0288 eventChunkSize = 200, # MB
0289 eventChunkBlock = 200, # MB
0290 numBuffers = 4,
0291 maxBufferedFiles = 4,
0292 fileListMode = True,
0293 fileNames = (
0294 %s
0295 )
0296 )
0297
0298 from EventFilter.Utilities.EvFDaqDirector_cfi import EvFDaqDirector as _EvFDaqDirector
0299 EvFDaqDirector = _EvFDaqDirector.clone(
0300 buBaseDir = '%s',
0301 runNumber = %d
0302 )
0303
0304 from EventFilter.Utilities.FastMonitoringService_cfi import FastMonitoringService as _FastMonitoringService
0305 FastMonitoringService = _FastMonitoringService.clone()
0306 """ % ('\n'.join(" '" + f + "'," for f in converted_files), args.output_directory, run))
0307 file.close()
0308
0309 # all done