Project CMSSW displayed by LXR

0001 #! /usr/bin/env python3
0002
0003 import argparse
0004 import glob
0005 import json
0006 import os
0007 import re
0008 import shutil
0009 import socket
0010 import subprocess
0011 import sys
0012
0013 def cmsRun(config: str, verbose: bool, **args):
0014     cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ]
0015     sys.stdout.write(' \\\n  '.join(cmd))
0016     sys.stdout.write('\n\n')
0017     if verbose:
0018         status = subprocess.run(cmd, stdout=None, stderr=None)
0019     else:
0020         status = subprocess.run(cmd, capture_output=True, text=True)
0021
0022     # handle error conditions
0023     if status.returncode < 0:
0024         sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode)
0025         if not verbose:
0026             sys.stderr.write('\n')
0027             sys.stderr.write(status.stderr)
0028         sys.exit(status.returncode)
0029     elif status.returncode > 0:
0030         sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode)
0031         if not verbose:
0032             sys.stderr.write('\n')
0033             sys.stderr.write(status.stderr)
0034         sys.exit(status.returncode)
0035
0036
0037 class LuminosityBlockRange:
0038     def __init__(self, value: str = '') -> None:
0039         self.min_run = 0
0040         self.max_run = 0
0041         self.min_lumi = 0
0042         self.max_lumi = 0
0043         if value and value != 'all':
0044             ((self.min_run, self.min_lumi), (self.max_run, self.max_lumi)) = LuminosityBlockRange.parse_range(value)
0045
0046     @staticmethod
0047     def parse_value(value: str) -> int:
0048         return 0 if value in ('', 'min', 'max') else int(value)
0049
0050     @staticmethod
0051     def parse_value_pair(value: str) -> (int, int):
0052         if value.count(':') > 1:
0053             raise ValueError('invalid syntax')
0054         (first, second) = value.split(':') if ':' in value else ('', value)
0055         return LuminosityBlockRange.parse_value(first), LuminosityBlockRange.parse_value(second)
0056
0057     @staticmethod
0058     def parse_range(value: str) -> ((int, int), (int, int)):
0059         if value.count('-') > 1:
0060             raise ValueError('invalid syntax')
0061         (first, second) = value.split('-') if '-' in value else (value, value)
0062         return LuminosityBlockRange.parse_value_pair(first), LuminosityBlockRange.parse_value_pair(second)
0063
0064     def is_in_range(self, run: int, lumi: int) -> bool:
0065         return (
0066             (self.min_run == 0 or self.min_run == run) and (self.min_lumi == 0 or self.min_lumi <= lumi) or
0067             (self.min_run != 0 and self.min_run < run)
0068         ) and (
0069             (self.max_run == 0 or self.max_run == run) and (self.max_lumi == 0 or self.max_lumi >= lumi) or
0070             (self.min_run != 0 and self.max_run > run)
0071         )
0072
0073
0074 # default values
0075 events_per_file = 100
0076 events_per_lumi = 11655
0077 output_directory = os.getcwd()
0078
0079 parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter)
0080 parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input files in .root format')
0081 parser.add_argument('-s', '--source', type=str, dest='raw_data_collection', metavar='TAG', default='rawDataCollector', help='name of the FEDRawDataCollection to be repacked into RAW format')
0082 parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default=os.getcwd(), help='base path to store the output files; subdirectories based on the run number are automatically created')
0083 parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events')
0084 parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection')
0085 parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range')
0086 parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='print additional information while processing the input files')
0087 parser.add_argument('-1', '--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)')
0088
0089 # parse the command line arguments and options
0090 args = parser.parse_args()
0091 if args.output_directory and args.output_directory.endswith('/'):
0092     args.output_directory = args.output_directory[:-1]
0093
0094 # read the list of input files from the command line arguments
0095 files = [ 'file:' + f if (not ':' in f and not f.startswith('/store/') and os.path.exists(f)) else f for f in args.files ]
0096
0097 # extract the list of runs and lumiections in the input files
0098 class FileInfo(object):
0099     def __init__(self):
0100         self.events = 0
0101         self.files = set()
0102
0103 header  = re.compile(r'^ +Run +Lumi +# Events$')
0104 empty   = re.compile(r'^ *$')
0105 content = {}
0106
0107 for f in files:
0108
0109     # run edmFileUtil --eventsInLumis ...
0110     print(f'preprocessing input file {f}')
0111     output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)
0112     if args.verbose:
0113         print(output.stdout)
0114
0115     # handle error conditions
0116     if output.returncode < 0:
0117         sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode)
0118         if not args.verbose:
0119             sys.stderr.write('\n')
0120             sys.stderr.write(output.stderr)
0121         sys.exit(output.returncode)
0122     elif output.returncode > 0:
0123         sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode)
0124         if not args.verbose:
0125             sys.stderr.write('\n')
0126             sys.stderr.write(output.stderr)
0127         sys.exit(output.returncode)
0128
0129     # parse the output of edmFileUtil
0130     parsing = False
0131     for line in output.stdout.splitlines():
0132         if not parsing and header.match(line):
0133             # start parsing
0134             parsing = True
0135             continue
0136
0137         if parsing and empty.match(line):
0138             # stop parsing
0139             parsing = False
0140             continue
0141
0142         if parsing:
0143             run, lumi, events = tuple(map(int, line.split()))
0144             if not args.range.is_in_range(run, lumi):
0145                 print(f'  run {run}, lumisection {lumi} is outside of the given range and will be skipped')
0146                 continue
0147             if events == 0:
0148                 print(f'  run {run}, lumisection {lumi} is empty and will be skipped')
0149                 continue
0150             print(f'  run {run}, lumisection {lumi} with {events} events will be processed')
0151             if not run in content:
0152                 content[run] = {}
0153             if not lumi in content[run]:
0154                 content[run][lumi] = FileInfo()
0155             content[run][lumi].events += events
0156             content[run][lumi].files.add(f)
0157     print()
0158
0159 # drop empty lumisections
0160 # note: this may no longer be needed, but is left as a cross check
0161 for run in content:
0162     empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ]
0163     for lumi in empty_lumis:
0164         del content[run][lumi]
0165
0166 # drop empty runs
0167 empty_runs = [ run for run in content if not content[run] ]
0168 for run in empty_runs:
0169     del content[run]
0170
0171 # locate the CMSSW configuration file
0172 config_name = 'HLTrigger/Tools/python/convertToRaw.py'
0173 current_area = os.environ['CMSSW_BASE']
0174 release_area = os.environ['CMSSW_RELEASE_BASE']
0175
0176 config_py = current_area + '/src/' + config_name
0177 if not os.path.exists(config_py):
0178     config_py = release_area + '/src/' + config_name
0179 if not os.path.exists(config_py):
0180     sys.stderr.write('error: cannot find the configuration file %s\n' % config_name)
0181     sys.exit(1)
0182
0183 # convert the input data to FED RAW data format
0184 converted_files = []
0185
0186 # process each run
0187 for run in sorted(content):
0188
0189     # create the output directory structure
0190     run_path = args.output_directory + f'/run{run:06d}'
0191     shutil.rmtree(run_path, ignore_errors=True)
0192     os.makedirs(run_path)
0193
0194     if args.one_file_per_lumi:
0195         # process the whole run
0196         lumis = sorted(content[run])
0197         print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis)))
0198         cmsRun(config_py, args.verbose, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, rawDataCollection = args.raw_data_collection, outputPath = args.output_directory)
0199         converted_files = glob.glob(run_path + f'/run{run:06d}_ls{lumi:04d}_*.raw')
0200
0201     else:
0202         # process lumisections individualy, then merge the output
0203         summary = {
0204             'data': [0, 0, 0, 0],   # [ 'events', 'files', 'lumisections', 'last lumisection' ]
0205             'definition': run_path + '/jsd/EoR.jsd',
0206             'source': socket.getfqdn() + '_' + str(os.getpid())
0207         }
0208
0209         for lumi in sorted(content[run]):
0210
0211             # process individual lumisections
0212             print('found run %d, lumi %d, with %d events' % (run, lumi, content[run][lumi].events))
0213             lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
0214             shutil.rmtree(lumi_path, ignore_errors=True)
0215             os.makedirs(lumi_path)
0216             cmsRun(config_py, args.verbose, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, rawDataCollection = args.raw_data_collection, outputPath = lumi_path)
0217
0218             # merge all lumisections data
0219
0220             # number of events expected to be processed
0221             if args.events_per_lumi < 0:
0222                 expected_events = content[run][lumi].events
0223             else:
0224                 expected_events = min(args.events_per_lumi, content[run][lumi].events)
0225
0226             # number of files expected to be created
0227             expected_files = (expected_events + args.events_per_file - 1) // args.events_per_file
0228
0229             # find the files produced by the conversion job and move them to the per-run path
0230             lumi_base_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
0231             lumi_path = lumi_base_path + f'/run{run:06d}'
0232
0233             # jsd files
0234             jsd_path = lumi_path + '/jsd'
0235             if not os.path.exists(run_path + '/jsd'):
0236                 shutil.move(jsd_path, run_path)
0237             else:
0238                 shutil.rmtree(jsd_path)
0239
0240             # lumisection data and EoLS files
0241             lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*')
0242             for f in lumi_files:
0243                 target = run_path + f.removeprefix(lumi_path)
0244                 shutil.move(f, target)
0245                 if f.endswith('.raw'):
0246                     converted_files.append(target)
0247
0248             # read the partial EoR file
0249             eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn'
0250             with open(eor_file) as f:
0251                 eor = json.load(f)
0252                 produced_events = int(eor['data'][0])
0253                 produced_files = int(eor['data'][1])
0254                 produced_lumis = int(eor['data'][2])
0255                 produced_last_lumi = int(eor['data'][3])
0256                 assert produced_events == expected_events
0257                 assert produced_files == expected_files
0258                 assert produced_lumis == 1
0259                 assert produced_last_lumi == lumi
0260                 summary['data'][0] += expected_events
0261                 summary['data'][1] += expected_files
0262                 summary['data'][2] += 1
0263                 summary['data'][3] = lumi
0264             os.remove(eor_file)
0265
0266             # remove the intermediate directory
0267             shutil.rmtree(lumi_base_path, ignore_errors=True)
0268
0269         # write the final EoR file
0270         # implemented by hand instead of using json.dump() to match the style used by the DAQ tools
0271         assert len(converted_files) == summary['data'][1]
0272         eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn'
0273         with open(eor_file, 'w') as file:
0274             file.write('{\n   "data" : [ "%d", "%d", "%d", "%d" ],\n   "definition" : "%s",\n   "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
0275             file.close()
0276
0277     # mark the .raw files as not executable
0278     for f in converted_files:
0279         os.chmod(f, 0o644)
0280
0281     # write a cff file for processing the converted files
0282     cff_file = args.output_directory + f'/run{run:06d}_cff.py'
0283     with open(cff_file, 'w') as file:
0284         file.write("""import FWCore.ParameterSet.Config as cms
0285
0286 from EventFilter.Utilities.FedRawDataInputSource_cfi import source as _source
0287 source = _source.clone(
0288     eventChunkSize = 200,   # MB
0289     eventChunkBlock = 200,  # MB
0290     numBuffers = 4,
0291     maxBufferedFiles = 4,
0292     fileListMode = True,
0293     fileNames = (
0294 %s
0295     )
0296 )
0297
0298 from EventFilter.Utilities.EvFDaqDirector_cfi import EvFDaqDirector as _EvFDaqDirector
0299 EvFDaqDirector = _EvFDaqDirector.clone(
0300     buBaseDir = '%s',
0301     runNumber = %d
0302 )
0303
0304 from EventFilter.Utilities.FastMonitoringService_cfi import FastMonitoringService as _FastMonitoringService
0305 FastMonitoringService = _FastMonitoringService.clone()
0306 """ % ('\n'.join("        '" + f + "'," for f in converted_files), args.output_directory, run))
0307         file.close()
0308
0309     # all done