1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
|
#!/usr/bin/env python3
import argparse
import fnmatch
import os
import re
import shutil
import subprocess
import sys
import time
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
import Configuration.Geometry.defaultPhase2ConditionsEra_cff as _settings
_PH2_GLOBAL_TAG, _PH2_ERA = _settings.get_era_and_conditions(_settings.DEFAULT_VERSION)
# Automatically generate the default geometry from DEFAULT_VERSION
_PH2_GEOMETRY = f"Extended{_settings.DEFAULT_VERSION}"
# Get the actual era name from the version key
_PH2_ERA_NAME = _settings.properties['Run4'][_settings.DEFAULT_VERSION]['Era']
# Function to display help information
def print_help():
help_text = """
This script runs HLT test configurations for the CMS Phase2 upgrade.
Arguments:
--globaltag : GlobalTag for the CMS conditions (required)
--geometry : Geometry setting for the CMS process (required)
--events : Number of events to process (default: 1)
--threads : Number of threads to use (default: 1)
--parallelJobs : Number of parallel cmsRun and hltDiff executions (default: 4)
--restrictPathsTo : Restrict paths to be run to a user defined subset (e.g. "HLT_Ele*")
--procModifiers : Optionally use one (or more) cmssw processModifier
--cachedInput : Optionally use an existing RAW data file (do not regenerate it from scratch)
--dryRun : Optionally do not run any of the configuration created
Example usage:
hltPhase2UpgradeIntegrationTests --globaltag auto:phase2_realistic_T33 --geometry Extended2026D110 --events 10 --threads 4
"""
print(help_text)
# Function to run a shell command and handle errors
def run_command(command, log_file=None, workdir=None):
try:
print(f"Running command: {command}")
with open(log_file, "w") as log:
subprocess.run(command, shell=True, check=True, cwd=workdir, stdout=log, stderr=log)
except subprocess.CalledProcessError as e:
print(f"Error running command: {e}")
return e.returncode # Return the error code
return 0 # Return 0 for success
# Function to compare the single file HLT results with the respect to a the base
def compare_single_file(root_file, base_root_file, num_events, output_dir):
root_path = os.path.join(output_dir, root_file)
print(f"Comparing {root_path} with {base_root_file} using hltDiff...")
# Run the hltDiff command
hlt_diff_command = f"hltDiff -o {base_root_file} -n {root_path}"
result = subprocess.run(hlt_diff_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Decode and process the output
output = result.stdout.decode("utf-8")
print(output) # Print for debug purposes
# Use a dynamic check based on the number of events configured
expected_match_string = f"Found {num_events} matching events, out of which 0 have different HLT results"
# Check if the output contains the expected match string
if expected_match_string not in output:
return f"Error: {root_file} has different HLT results!"
return None # Return None if no issues are found
# Argument Parser for command-line configuration
parser = argparse.ArgumentParser(description="Run HLT Test Configurations")
parser.add_argument("--globaltag", default=_PH2_GLOBAL_TAG, help="GlobalTag for the CMS conditions")
parser.add_argument("--geometry", default=_PH2_GEOMETRY, help="Geometry setting for the CMS process") # Auto-generated geometry default
parser.add_argument("--era", default=_PH2_ERA_NAME, help="Era setting for the CMS process") # Convert _PH2_ERA to string
parser.add_argument("--events", type=int, default=10, help="Number of events to process")
parser.add_argument("--parallelJobs", type=int, default=4, help="Number of parallel cmsRun HLT jobs")
parser.add_argument("--threads", type=int, default=1, help="Number of threads to use")
parser.add_argument("--restrictPathsTo", nargs='+', default=[], help="List of HLT paths to restrict to")
parser.add_argument("--cachedInput", default=None, help="Predefined input file to use instead of running TTbar GEN,SIM step")
parser.add_argument("--procModifiers", default=None, help="Optional process modifier for cmsDriver") # New argument for procModifiers
parser.add_argument("--dryRun", action="store_true", help="Only generate configurations without running them")
# Step 0: Capture the start time and print the start timestamp
start_time = time.time()
start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("------------------------------------------------------------")
print(f"Script started at {start_timestamp}")
print("------------------------------------------------------------")
# Parse arguments
try:
args = parser.parse_args()
except SystemExit:
print_help()
sys.exit(0)
global_tag = args.globaltag
era = args.era
geometry = args.geometry
num_events = args.events
num_threads = args.threads
num_parallel_jobs = args.parallelJobs
restrict_paths_to = args.restrictPathsTo
proc_modifiers = args.procModifiers # Store the procModifiers option
# Print the values in a nice formatted manner
print(f"{'Configuration Summary':^40}")
print("=" * 40)
print(f"Global Tag: {global_tag}")
print(f"Geometry: {geometry}")
print(f"Era: {era}")
print(f"Num Events: {num_events}")
print(f"Num Threads: {num_threads}")
print(f"Num Parallel Jobs: {num_parallel_jobs}")
# Print restrictPathsTo if provided
if restrict_paths_to:
print(f"Restricting paths to: {', '.join(restrict_paths_to)}")
# Print procModifiers if provided
if proc_modifiers:
print(f"Proc Modifiers: {proc_modifiers}")
if args.cachedInput:
print(f"Using cached input file: {args.cachedInput}")
else:
print(f"Using regenerated GEN-SIM-DIGI-RAW file from scratch")
print("=" * 40)
# Directory where all test configurations will be stored
output_dir = "hlt_test_configs"
# If the directory exists, remove it first
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
# Create the directory
os.makedirs(output_dir)
# Define the cmsDriver.py command to create the base configuration
# If cachedInput is provided, use it as the input for the base cmsDriver command
if args.cachedInput:
base_cmsdriver_command = (
f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "
f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
f"--geometry {geometry} --era {era} --filein {args.cachedInput} --fileout {output_dir}/hlt.root --no_exec "
f"--mc --nThreads {num_threads} "
f"--processName=HLTX "
f"--inputCommands='keep *, drop *_hlt*_*_HLT, drop triggerTriggerFilterObjectWithRefs_l1t*_*_HLT' "
f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
f'--customise_commands "process.options.wantSummary=True"'
)
else:
base_cmsdriver_command = (
f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "
f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
f"--geometry {geometry} --era {era} --filein file:{output_dir}/step1.root --fileout {output_dir}/hlt.root --no_exec "
f"--nThreads {num_threads} "
f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
f'--customise_commands "process.options.wantSummary=True"'
)
# Add procModifiers if provided
if proc_modifiers:
base_cmsdriver_command += f" --procModifiers {proc_modifiers}"
# The base configuration file and the dumped configuration file
base_config_file = os.path.join(output_dir, "Phase2_L1P2GT_HLT.py")
dumped_config_file = os.path.join(output_dir, "Phase2_dump.py")
log_file = os.path.join(output_dir, "hlt.log")
# Step 1: Run the cmsDriver.py command to generate the base configuration in the output directory
print(f"Running cmsDriver.py to generate the base config: {base_config_file}")
subprocess.run(base_cmsdriver_command, shell=True, cwd=output_dir)
# Step 2: Use edmConfigDump to dump the full configuration
print(f"Dumping the full configuration using edmConfigDump to {dumped_config_file}")
with open(dumped_config_file, "w") as dump_file, open(log_file, "w") as log:
subprocess.run(f"edmConfigDump {base_config_file}", shell=True, stdout=dump_file, stderr=log)
# Step 3: Extract the list of HLT paths from the dumped configuration
print(f"Extracting HLT paths from {dumped_config_file}...")
# Read the dumped configuration to extract HLT paths
with open(dumped_config_file, "r") as f:
config_content = f.read()
# Use regex to find all HLT and L1T paths defined in process.schedule
unsorted_hlt_paths = re.findall(r"process\.(HLT_[A-Za-z0-9_]+|L1T_[A-Za-z0-9_]+|DST_[A-Za-z0-9_]+)", config_content)
# Remove duplicates and sort alphabetically
hlt_paths = sorted(set(unsorted_hlt_paths))
if not hlt_paths:
print("No HLT paths found in the schedule!")
exit(1)
print(f"Found {len(hlt_paths)} HLT paths.")
# Step 3b: Restrict paths using wildcard patterns if the option is provided
if restrict_paths_to:
valid_paths = set() # Using a set to store matched paths
# Iterate over each provided pattern
for pattern in restrict_paths_to:
# Use fnmatch to match the pattern to hlt_paths
matched = fnmatch.filter(hlt_paths, pattern)
valid_paths.update(matched) # Add matches to the set of valid paths
# If no matches found, emit a warning for that pattern
if not matched:
print(f"Warning: No paths matched the pattern: {pattern}")
# Convert the set to a sorted list
valid_paths = sorted(valid_paths)
# If no valid paths remain after filtering, exit
if not valid_paths:
print("Error: None of the specified patterns matched any paths. Exiting.")
exit(1)
# Update hlt_paths to contain only the valid ones
hlt_paths = valid_paths
# Continue using the restricted hlt_paths further down the script
print(f"Using {len(hlt_paths)} HLT paths after applying restrictions.")
# Step 4: Broadened Regex for Matching process.schedule
schedule_match = re.search(
r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))",
config_content
)
if not schedule_match:
print("No schedule match found after tweaking regex! Exiting...")
exit(1)
else:
print(f"Matched schedule section.")
# Step 5: Generate N configurations by modifying the dumped config to keep only one path at a time
for path_name in hlt_paths:
# Create a new configuration file for this path
config_filename = os.path.join(output_dir, f"Phase2_{path_name}.py")
# Define regex to find all HLT paths in the cms.Schedule and replace them
def replace_hlt_paths(match):
all_paths = match.group(2).split(", ")
# Keep non-HLT/L1T paths and include only the current HLT or L1T path
filtered_paths = [path for path in all_paths if not re.match(r"process\.(HLT_|L1T_)", path) or f"process.{path_name}" in path]
return match.group(1) + ", ".join(filtered_paths) + match.group(3)
# Apply the regex to remove all HLT and L1T paths except the current one
modified_content = re.sub(
r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))",
replace_hlt_paths,
config_content
)
# Modify the fileout parameter to save a unique root file for each path
modified_content = re.sub(
r"fileName = cms\.untracked\.string\('.*'\)",
f"fileName = cms.untracked.string('{output_dir}/{path_name}.root')",
modified_content
)
# Write the new config to a file
with open(config_filename, "w") as new_config:
new_config.write(modified_content)
print(f"Generated config: {config_filename}")
print(f"Generated {len(hlt_paths)} configuration files in the {output_dir} directory.")
# Step 6: Run cmsDriver.py for TTbar GEN,SIM steps and save the output in output_dir
ttbar_config_file = os.path.join(output_dir, "TTbar_GEN_SIM_step.py")
ttbar_command = (
f"cmsDriver.py TTbar_14TeV_TuneCP5_cfi -s GEN,SIM,DIGI:pdigi_valid,L1TrackTrigger,L1,L1P2GT,DIGI2RAW -n {num_events} "
f"--conditions {global_tag} --beamspot DBrealisticHLLHC --datatier GEN-SIM-DIGI-RAW "
f"--eventcontent FEVTDEBUG --geometry {geometry} --era {era} "
f"--relval 9000,100 --fileout {output_dir}/step1.root --nThreads {num_threads} "
f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
f"--python_filename {ttbar_config_file}"
)
if not args.cachedInput and not args.dryRun:
print("Running TTbar GEN,SIM step...")
run_command(ttbar_command, log_file=os.path.join(output_dir, "ttbar_gen_sim.log"))
# Directory containing HLT test configurations
hlt_configs_dir = output_dir
# Check if the directory exists
if not os.path.exists(hlt_configs_dir):
print(f"Directory {hlt_configs_dir} not found! Exiting...")
exit(1)
# Step 7: Function to run cmsRun on a given HLT config file and save the output
def run_cmsrun(config_file):
# Extract the HLT path name from the config file (e.g., "Phase2_HLT_IsoMu24_FromL1TkMuon.py")
base_name = os.path.basename(config_file).replace("Phase2_", "").replace(".py", "")
log_file = os.path.join(output_dir, f"{base_name}.log")
# Run the cmsRun command and log the output
cmsrun_command = f"cmsRun {config_file}"
result_code = run_command(cmsrun_command, log_file=log_file)
if result_code != 0:
print(f"cmsRun failed for {config_file} with exit code {result_code}. Check {log_file} for details.")
return result_code # Return the error code
print(f"cmsRun completed for {config_file}")
return 0 # Return 0 for success
# Step 8: Loop through all files in hlt_test_configs and run cmsRun on each in parallel
config_files = [
f for f in os.listdir(hlt_configs_dir)
if f.endswith(".py") and f.startswith("Phase2_") and f != "Phase2_dump.py"
]
print(f"Found {len(config_files)} configuration files in {hlt_configs_dir}.")
##### stop here in case it's dryRun mode
if args.dryRun: # Check if the --dryRun flag is active
print("Dry run mode activated. All configurations have been created.")
exit(0)
# Run cmsRun on all config files in parallel and handle errors
error_occurred = False
with ThreadPoolExecutor(max_workers=num_parallel_jobs) as executor:
futures = {executor.submit(run_cmsrun, os.path.join(output_dir, config_file)): config_file for config_file in config_files}
for future in as_completed(futures):
config_file = futures[future]
try:
result_code = future.result()
if result_code != 0:
error_occurred = True
print(f"cmsRun for {config_file} exited with code {result_code}")
except Exception as exc:
error_occurred = True
print(f"cmsRun for {config_file} generated an exception: {exc}")
if error_occurred:
print("-" * 40)
print("One or more cmsRun jobs failed. Exiting with failure.")
print("-" * 40)
exit(1)
print("All cmsRun jobs submitted.")
# Step 9: Compare all HLT root files using hltDiff
def compare_hlt_results(input_dir, num_events, max_workers=4):
# List all root files starting with "HLT_" or "L1T_" in the output directory
root_files = [f for f in os.listdir(input_dir) if f.endswith(".root") and (f.startswith("HLT_") or f.startswith("L1T_"))]
# Base file (hltrun output) to compare against
base_root_file = os.path.join(input_dir, "hlt.root")
# Check if base_root_file exists
if not os.path.exists(base_root_file):
print(f"Base root file {base_root_file} not found! Exiting...")
exit(1)
# Use ThreadPoolExecutor to run comparisons in parallel
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = []
for root_file in root_files:
futures.append(executor.submit(compare_single_file, root_file, base_root_file, num_events, input_dir))
# Collect results as they complete
for future in as_completed(futures):
result = future.result()
if result: # If there is an error message, print it and exit
print("-" * 40)
print(result)
print("-" * 40)
exit(1)
print("All HLT comparisons passed with no differences.")
# Step 10: Once all cmsRun jobs are completed, perform the hltDiff comparisons
print("Performing HLT result comparisons...")
compare_hlt_results(output_dir,num_events,num_parallel_jobs) # Adjust max_workers based on your CPU cores
# Step 11: Capture the end time and print the end timestamp
end_time = time.time()
end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("------------------------------------------------------------")
print(f"Script ended at {end_timestamp}")
print("------------------------------------------------------------")
# Step 12: Calculate the total execution time and print it
total_time = end_time - start_time
formatted_total_time = time.strftime("%H:%M:%S", time.gmtime(total_time))
print("------------------------------------------------------------")
print(f"Total execution time: {formatted_total_time}")
print("------------------------------------------------------------")
print("All steps completed successfully.")
|