hltPhase2UpgradeIntegrationTests

CMSSW/HLTrigger/Configuration/scripts/hltPhase2UpgradeIntegrationTests

Line Code

Line	Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400	`#!/usr/bin/env python3` `import argparse` `import fnmatch` `import os` `import re` `import shutil` `import subprocess` `import sys` `import time` `from datetime import datetime` `from concurrent.futures import ThreadPoolExecutor, as_completed` `import Configuration.Geometry.defaultPhase2ConditionsEra_cff as _settings` `_PH2_GLOBAL_TAG, _PH2_ERA = _settings.get_era_and_conditions(_settings.DEFAULT_VERSION)` `# Automatically generate the default geometry from DEFAULT_VERSION` `_PH2_GEOMETRY = f"Extended{_settings.DEFAULT_VERSION}"` `# Get the actual era name from the version key` `_PH2_ERA_NAME = _settings.properties['Run4'][_settings.DEFAULT_VERSION]['Era']` `# Function to display help information` `def print_help():` `help_text = """` `This script runs HLT test configurations for the CMS Phase2 upgrade.` `Arguments:` `--globaltag : GlobalTag for the CMS conditions (required)` `--geometry : Geometry setting for the CMS process (required)` `--events : Number of events to process (default: 1)` `--threads : Number of threads to use (default: 1)` `--parallelJobs : Number of parallel cmsRun and hltDiff executions (default: 4)` `--restrictPathsTo : Restrict paths to be run to a user defined subset (e.g. "HLT_Ele")` `--procModifiers : Optionally use one (or more) cmssw processModifier` `--cachedInput : Optionally use an existing RAW data file (do not regenerate it from scratch)` `--dryRun : Optionally do not run any of the configuration created` `Example usage:` `hltPhase2UpgradeIntegrationTests --globaltag auto:phase2_realistic_T33 --geometry Extended2026D110 --events 10 --threads 4` `"""` `print(help_text)` `# Function to run a shell command and handle errors` `def run_command(command, log_file=None, workdir=None):` `try:` `print(f"Running command: {command}")` `with open(log_file, "w") as log:` `subprocess.run(command, shell=True, check=True, cwd=workdir, stdout=log, stderr=log)` `except subprocess.CalledProcessError as e:` `print(f"Error running command: {e}")` `return e.returncode # Return the error code` `return 0 # Return 0 for success` `# Function to compare the single file HLT results with the respect to a the base` `def compare_single_file(root_file, base_root_file, num_events, output_dir):` `root_path = os.path.join(output_dir, root_file)` `print(f"Comparing {root_path} with {base_root_file} using hltDiff...")` `# Run the hltDiff command` `hlt_diff_command = f"hltDiff -o {base_root_file} -n {root_path}"` `result = subprocess.run(hlt_diff_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)` `# Decode and process the output` `output = result.stdout.decode("utf-8")` `print(output) # Print for debug purposes` `# Use a dynamic check based on the number of events configured` `expected_match_string = f"Found {num_events} matching events, out of which 0 have different HLT results"` `# Check if the output contains the expected match string` `if expected_match_string not in output:` `return f"Error: {root_file} has different HLT results!"` `return None # Return None if no issues are found` `# Argument Parser for command-line configuration` `parser = argparse.ArgumentParser(description="Run HLT Test Configurations")` `parser.add_argument("--globaltag", default=_PH2_GLOBAL_TAG, help="GlobalTag for the CMS conditions")` `parser.add_argument("--geometry", default=_PH2_GEOMETRY, help="Geometry setting for the CMS process") # Auto-generated geometry default` `parser.add_argument("--era", default=_PH2_ERA_NAME, help="Era setting for the CMS process") # Convert _PH2_ERA to string` `parser.add_argument("--events", type=int, default=10, help="Number of events to process")` `parser.add_argument("--parallelJobs", type=int, default=4, help="Number of parallel cmsRun HLT jobs")` `parser.add_argument("--threads", type=int, default=1, help="Number of threads to use")` `parser.add_argument("--restrictPathsTo", nargs='+', default=[], help="List of HLT paths to restrict to")` `parser.add_argument("--cachedInput", default=None, help="Predefined input file to use instead of running TTbar GEN,SIM step")` `parser.add_argument("--procModifiers", default=None, help="Optional process modifier for cmsDriver") # New argument for procModifiers` `parser.add_argument("--dryRun", action="store_true", help="Only generate configurations without running them")` `# Step 0: Capture the start time and print the start timestamp` `start_time = time.time()` `start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")` `print("------------------------------------------------------------")` `print(f"Script started at {start_timestamp}")` `print("------------------------------------------------------------")` `# Parse arguments` `try:` `args = parser.parse_args()` `except SystemExit:` `print_help()` `sys.exit(0)` `global_tag = args.globaltag` `era = args.era` `geometry = args.geometry` `num_events = args.events` `num_threads = args.threads` `num_parallel_jobs = args.parallelJobs` `restrict_paths_to = args.restrictPathsTo` `proc_modifiers = args.procModifiers # Store the procModifiers option` `# Print the values in a nice formatted manner` `print(f"{'Configuration Summary':^40}")` `print("=" 40)` `print(f"Global Tag: {global_tag}")` `print(f"Geometry: {geometry}")` `print(f"Era: {era}")` `print(f"Num Events: {num_events}")` `print(f"Num Threads: {num_threads}")` `print(f"Num Parallel Jobs: {num_parallel_jobs}")` `# Print restrictPathsTo if provided` `if restrict_paths_to:` `print(f"Restricting paths to: {', '.join(restrict_paths_to)}")` `# Print procModifiers if provided` `if proc_modifiers:` `print(f"Proc Modifiers: {proc_modifiers}")` `if args.cachedInput:` `print(f"Using cached input file: {args.cachedInput}")` `else:` `print(f"Using regenerated GEN-SIM-DIGI-RAW file from scratch")` `print("=" * 40)` `# Directory where all test configurations will be stored` `output_dir = "hlt_test_configs"` `# If the directory exists, remove it first` `if os.path.exists(output_dir):` `shutil.rmtree(output_dir)` `# Create the directory` `os.makedirs(output_dir)` `# Define the cmsDriver.py command to create the base configuration` `# If cachedInput is provided, use it as the input for the base cmsDriver command` `if args.cachedInput:` `base_cmsdriver_command = (` `f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "` `f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "` `f"--geometry {geometry} --era {era} --filein {args.cachedInput} --fileout {output_dir}/hlt.root --no_exec "` `f"--mc --nThreads {num_threads} "` `f"--processName=HLTX "` `f"--inputCommands='keep , drop _hlt__HLT, drop triggerTriggerFilterObjectWithRefs_l1t__HLT' "` `f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "` `f'--customise_commands "process.options.wantSummary=True"'` `)` `else:` `base_cmsdriver_command = (` `f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "` `f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "` `f"--geometry {geometry} --era {era} --filein file:{output_dir}/step1.root --fileout {output_dir}/hlt.root --no_exec "` `f"--nThreads {num_threads} "` `f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "` `f'--customise_commands "process.options.wantSummary=True"'` `)` `# Add procModifiers if provided` `if proc_modifiers:` `base_cmsdriver_command += f" --procModifiers {proc_modifiers}"` `# The base configuration file and the dumped configuration file` `base_config_file = os.path.join(output_dir, "Phase2_L1P2GT_HLT.py")` `dumped_config_file = os.path.join(output_dir, "Phase2_dump.py")` `log_file = os.path.join(output_dir, "hlt.log")` `# Step 1: Run the cmsDriver.py command to generate the base configuration in the output directory` `print(f"Running cmsDriver.py to generate the base config: {base_config_file}")` `subprocess.run(base_cmsdriver_command, shell=True, cwd=output_dir)` `# Step 2: Use edmConfigDump to dump the full configuration` `print(f"Dumping the full configuration using edmConfigDump to {dumped_config_file}")` `with open(dumped_config_file, "w") as dump_file, open(log_file, "w") as log:` `subprocess.run(f"edmConfigDump {base_config_file}", shell=True, stdout=dump_file, stderr=log)` `# Step 3: Extract the list of HLT paths from the dumped configuration` `print(f"Extracting HLT paths from {dumped_config_file}...")` `# Read the dumped configuration to extract HLT paths` `with open(dumped_config_file, "r") as f:` `config_content = f.read()` `# Use regex to find all HLT and L1T paths defined in process.schedule` `unsorted_hlt_paths = re.findall(r"process\.(HLT_[A-Za-z0-9_]+\|L1T_[A-Za-z0-9_]+\|DST_[A-Za-z0-9_]+)", config_content)` `# Remove duplicates and sort alphabetically` `hlt_paths = sorted(set(unsorted_hlt_paths))` `if not hlt_paths:` `print("No HLT paths found in the schedule!")` `exit(1)` `print(f"Found {len(hlt_paths)} HLT paths.")` `# Step 3b: Restrict paths using wildcard patterns if the option is provided` `if restrict_paths_to:` `valid_paths = set() # Using a set to store matched paths` `# Iterate over each provided pattern` `for pattern in restrict_paths_to:` `# Use fnmatch to match the pattern to hlt_paths` `matched = fnmatch.filter(hlt_paths, pattern)` `valid_paths.update(matched) # Add matches to the set of valid paths` `# If no matches found, emit a warning for that pattern` `if not matched:` `print(f"Warning: No paths matched the pattern: {pattern}")` `# Convert the set to a sorted list` `valid_paths = sorted(valid_paths)` `# If no valid paths remain after filtering, exit` `if not valid_paths:` `print("Error: None of the specified patterns matched any paths. Exiting.")` `exit(1)` `# Update hlt_paths to contain only the valid ones` `hlt_paths = valid_paths` `# Continue using the restricted hlt_paths further down the script` `print(f"Using {len(hlt_paths)} HLT paths after applying restrictions.")` `# Step 4: Broadened Regex for Matching process.schedule` `schedule_match = re.search(` `r"(process\.schedule\s=\scms\.Schedule\(\?\s\[)([\s\S]+?)(\]\s\))",` `config_content` `)` `if not schedule_match:` `print("No schedule match found after tweaking regex! Exiting...")` `exit(1)` `else:` `print(f"Matched schedule section.")` `# Step 5: Generate N configurations by modifying the dumped config to keep only one path at a time` `for path_name in hlt_paths:` `# Create a new configuration file for this path` `config_filename = os.path.join(output_dir, f"Phase2_{path_name}.py")` `# Define regex to find all HLT paths in the cms.Schedule and replace them` `def replace_hlt_paths(match):` `all_paths = match.group(2).split(", ")` `# Keep non-HLT/L1T paths and include only the current HLT or L1T path` `filtered_paths = [path for path in all_paths if not re.match(r"process\.(HLT_\|L1T_)", path) or f"process.{path_name}" in path]` `return match.group(1) + ", ".join(filtered_paths) + match.group(3)` `# Apply the regex to remove all HLT and L1T paths except the current one` `modified_content = re.sub(` `r"(process\.schedule\s=\scms\.Schedule\(\?\s\[)([\s\S]+?)(\]\s\))",` `replace_hlt_paths,` `config_content` `)` `# Modify the fileout parameter to save a unique root file for each path` `modified_content = re.sub(` `r"fileName = cms\.untracked\.string\('.'\)",` `f"fileName = cms.untracked.string('{output_dir}/{path_name}.root')",` `modified_content` `)` `# Write the new config to a file` `with open(config_filename, "w") as new_config:` `new_config.write(modified_content)` `print(f"Generated config: {config_filename}")` `print(f"Generated {len(hlt_paths)} configuration files in the {output_dir} directory.")` `# Step 6: Run cmsDriver.py for TTbar GEN,SIM steps and save the output in output_dir` `ttbar_config_file = os.path.join(output_dir, "TTbar_GEN_SIM_step.py")` `ttbar_command = (` `f"cmsDriver.py TTbar_14TeV_TuneCP5_cfi -s GEN,SIM,DIGI:pdigi_valid,L1TrackTrigger,L1,L1P2GT,DIGI2RAW -n {num_events} "` `f"--conditions {global_tag} --beamspot DBrealisticHLLHC --datatier GEN-SIM-DIGI-RAW "` `f"--eventcontent FEVTDEBUG --geometry {geometry} --era {era} "` `f"--relval 9000,100 --fileout {output_dir}/step1.root --nThreads {num_threads} "` `f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "` `f"--python_filename {ttbar_config_file}"` `)` `if not args.cachedInput and not args.dryRun:` `print("Running TTbar GEN,SIM step...")` `run_command(ttbar_command, log_file=os.path.join(output_dir, "ttbar_gen_sim.log"))` `# Directory containing HLT test configurations` `hlt_configs_dir = output_dir` `# Check if the directory exists` `if not os.path.exists(hlt_configs_dir):` `print(f"Directory {hlt_configs_dir} not found! Exiting...")` `exit(1)` `# Step 7: Function to run cmsRun on a given HLT config file and save the output` `def run_cmsrun(config_file):` `# Extract the HLT path name from the config file (e.g., "Phase2_HLT_IsoMu24_FromL1TkMuon.py")` `base_name = os.path.basename(config_file).replace("Phase2_", "").replace(".py", "")` `log_file = os.path.join(output_dir, f"{base_name}.log")` `# Run the cmsRun command and log the output` `cmsrun_command = f"cmsRun {config_file}"` `result_code = run_command(cmsrun_command, log_file=log_file)` `if result_code != 0:` `print(f"cmsRun failed for {config_file} with exit code {result_code}. Check {log_file} for details.")` `return result_code # Return the error code` `print(f"cmsRun completed for {config_file}")` `return 0 # Return 0 for success` `# Step 8: Loop through all files in hlt_test_configs and run cmsRun on each in parallel` `config_files = [` `f for f in os.listdir(hlt_configs_dir)` `if f.endswith(".py") and f.startswith("Phase2_") and f != "Phase2_dump.py"` `]` `print(f"Found {len(config_files)} configuration files in {hlt_configs_dir}.")` `##### stop here in case it's dryRun mode` `if args.dryRun: # Check if the --dryRun flag is active` `print("Dry run mode activated. All configurations have been created.")` `exit(0)` `# Run cmsRun on all config files in parallel and handle errors` `error_occurred = False` `with ThreadPoolExecutor(max_workers=num_parallel_jobs) as executor:` `futures = {executor.submit(run_cmsrun, os.path.join(output_dir, config_file)): config_file for config_file in config_files}` `for future in as_completed(futures):` `config_file = futures[future]` `try:` `result_code = future.result()` `if result_code != 0:` `error_occurred = True` `print(f"cmsRun for {config_file} exited with code {result_code}")` `except Exception as exc:` `error_occurred = True` `print(f"cmsRun for {config_file} generated an exception: {exc}")` `if error_occurred:` `print("-" 40)` `print("One or more cmsRun jobs failed. Exiting with failure.")` `print("-" * 40)` `exit(1)` `print("All cmsRun jobs submitted.")` `# Step 9: Compare all HLT root files using hltDiff` `def compare_hlt_results(input_dir, num_events, max_workers=4):` `# List all root files starting with "HLT_" or "L1T_" in the output directory` `root_files = [f for f in os.listdir(input_dir) if f.endswith(".root") and (f.startswith("HLT_") or f.startswith("L1T_"))]` `# Base file (hltrun output) to compare against` `base_root_file = os.path.join(input_dir, "hlt.root")` `# Check if base_root_file exists` `if not os.path.exists(base_root_file):` `print(f"Base root file {base_root_file} not found! Exiting...")` `exit(1)` `# Use ThreadPoolExecutor to run comparisons in parallel` `with ThreadPoolExecutor(max_workers=max_workers) as executor:` `futures = []` `for root_file in root_files:` `futures.append(executor.submit(compare_single_file, root_file, base_root_file, num_events, input_dir))` `# Collect results as they complete` `for future in as_completed(futures):` `result = future.result()` `if result: # If there is an error message, print it and exit` `print("-" * 40)` `print(result)` `print("-" * 40)` `exit(1)` `print("All HLT comparisons passed with no differences.")` `# Step 10: Once all cmsRun jobs are completed, perform the hltDiff comparisons` `print("Performing HLT result comparisons...")` `compare_hlt_results(output_dir,num_events,num_parallel_jobs) # Adjust max_workers based on your CPU cores` `# Step 11: Capture the end time and print the end timestamp` `end_time = time.time()` `end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")` `print("------------------------------------------------------------")` `print(f"Script ended at {end_timestamp}")` `print("------------------------------------------------------------")` `# Step 12: Calculate the total execution time and print it` `total_time = end_time - start_time` `formatted_total_time = time.strftime("%H:%M:%S", time.gmtime(total_time))` `print("------------------------------------------------------------")` `print(f"Total execution time: {formatted_total_time}")` `print("------------------------------------------------------------")` `print("All steps completed successfully.")`

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400

#!/usr/bin/env python3
import argparse
import fnmatch
import os
import re
import shutil
import subprocess
import sys
import time
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
import Configuration.Geometry.defaultPhase2ConditionsEra_cff as _settings
_PH2_GLOBAL_TAG, _PH2_ERA = _settings.get_era_and_conditions(_settings.DEFAULT_VERSION)

# Automatically generate the default geometry from DEFAULT_VERSION
_PH2_GEOMETRY = f"Extended{_settings.DEFAULT_VERSION}"

# Get the actual era name from the version key
_PH2_ERA_NAME = _settings.properties['Run4'][_settings.DEFAULT_VERSION]['Era']

# Function to display help information
def print_help():
    help_text = """
    This script runs HLT test configurations for the CMS Phase2 upgrade.

    Arguments:
    --globaltag       : GlobalTag for the CMS conditions (required)
    --geometry        : Geometry setting for the CMS process (required)
    --events          : Number of events to process (default: 1)
    --threads         : Number of threads to use (default: 1)
    --parallelJobs    : Number of parallel cmsRun and hltDiff executions (default: 4)
    --restrictPathsTo : Restrict paths to be run to a user defined subset (e.g. "HLT_Ele*")
    --procModifiers   : Optionally use one (or more) cmssw processModifier
    --cachedInput     : Optionally use an existing RAW data file (do not regenerate it from scratch)
    --dryRun          : Optionally do not run any of the configuration created

    Example usage:
    hltPhase2UpgradeIntegrationTests --globaltag auto:phase2_realistic_T33 --geometry Extended2026D110 --events 10 --threads 4
    """
    print(help_text)

# Function to run a shell command and handle errors
def run_command(command, log_file=None, workdir=None):
    try:
        print(f"Running command: {command}")
        with open(log_file, "w") as log:
            subprocess.run(command, shell=True, check=True, cwd=workdir, stdout=log, stderr=log)
    except subprocess.CalledProcessError as e:
        print(f"Error running command: {e}")
        return e.returncode  # Return the error code
       
    return 0  # Return 0 for success

# Function to compare the single file HLT results with the respect to a the base
def compare_single_file(root_file, base_root_file, num_events, output_dir):
    root_path = os.path.join(output_dir, root_file)
    print(f"Comparing {root_path} with {base_root_file} using hltDiff...")

    # Run the hltDiff command
    hlt_diff_command = f"hltDiff -o {base_root_file} -n {root_path}"
    result = subprocess.run(hlt_diff_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Decode and process the output
    output = result.stdout.decode("utf-8")
    print(output)  # Print for debug purposes

    # Use a dynamic check based on the number of events configured
    expected_match_string = f"Found {num_events} matching events, out of which 0 have different HLT results"

    # Check if the output contains the expected match string
    if expected_match_string not in output:
        return f"Error: {root_file} has different HLT results!"

    return None  # Return None if no issues are found

# Argument Parser for command-line configuration
parser = argparse.ArgumentParser(description="Run HLT Test Configurations")
parser.add_argument("--globaltag", default=_PH2_GLOBAL_TAG, help="GlobalTag for the CMS conditions")
parser.add_argument("--geometry", default=_PH2_GEOMETRY, help="Geometry setting for the CMS process")  # Auto-generated geometry default
parser.add_argument("--era", default=_PH2_ERA_NAME, help="Era setting for the CMS process")  # Convert _PH2_ERA to string
parser.add_argument("--events", type=int, default=10, help="Number of events to process")
parser.add_argument("--parallelJobs", type=int, default=4, help="Number of parallel cmsRun HLT jobs")
parser.add_argument("--threads", type=int, default=1, help="Number of threads to use")
parser.add_argument("--restrictPathsTo", nargs='+', default=[], help="List of HLT paths to restrict to")
parser.add_argument("--cachedInput", default=None, help="Predefined input file to use instead of running TTbar GEN,SIM step")
parser.add_argument("--procModifiers", default=None, help="Optional process modifier for cmsDriver")  # New argument for procModifiers
parser.add_argument("--dryRun", action="store_true", help="Only generate configurations without running them")

# Step 0: Capture the start time and print the start timestamp
start_time = time.time()
start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("------------------------------------------------------------")
print(f"Script started at {start_timestamp}")
print("------------------------------------------------------------")

# Parse arguments
try:
    args = parser.parse_args()
except SystemExit:
    print_help()
    sys.exit(0)

global_tag = args.globaltag
era = args.era
geometry = args.geometry
num_events = args.events
num_threads = args.threads
num_parallel_jobs = args.parallelJobs
restrict_paths_to = args.restrictPathsTo
proc_modifiers = args.procModifiers  # Store the procModifiers option

# Print the values in a nice formatted manner
print(f"{'Configuration Summary':^40}")
print("=" * 40)
print(f"Global Tag:           {global_tag}")
print(f"Geometry:             {geometry}")
print(f"Era:                  {era}")
print(f"Num Events:           {num_events}")
print(f"Num Threads:          {num_threads}")
print(f"Num Parallel Jobs:    {num_parallel_jobs}")
# Print restrictPathsTo if provided
if restrict_paths_to:
    print(f"Restricting paths to: {', '.join(restrict_paths_to)}")
# Print procModifiers if provided
if proc_modifiers:
    print(f"Proc Modifiers:       {proc_modifiers}")
if args.cachedInput:
    print(f"Using cached input file: {args.cachedInput}")
else:
    print(f"Using regenerated GEN-SIM-DIGI-RAW file from scratch")
print("=" * 40)

# Directory where all test configurations will be stored
output_dir = "hlt_test_configs"
# If the directory exists, remove it first
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

# Create the directory
os.makedirs(output_dir)

# Define the cmsDriver.py command to create the base configuration
# If cachedInput is provided, use it as the input for the base cmsDriver command
if args.cachedInput:
    base_cmsdriver_command = (
        f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "
        f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
        f"--geometry {geometry} --era {era} --filein {args.cachedInput} --fileout {output_dir}/hlt.root --no_exec "
        f"--mc --nThreads {num_threads} "
        f"--processName=HLTX "
        f"--inputCommands='keep *, drop *_hlt*_*_HLT, drop triggerTriggerFilterObjectWithRefs_l1t*_*_HLT' "
        f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
        f'--customise_commands "process.options.wantSummary=True"'
    )
else:
    base_cmsdriver_command = (
        f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "
        f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
        f"--geometry {geometry} --era {era} --filein file:{output_dir}/step1.root --fileout {output_dir}/hlt.root --no_exec "
        f"--nThreads {num_threads} "
        f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
        f'--customise_commands "process.options.wantSummary=True"'
    )

# Add procModifiers if provided
if proc_modifiers:
    base_cmsdriver_command += f" --procModifiers {proc_modifiers}"

# The base configuration file and the dumped configuration file
base_config_file = os.path.join(output_dir, "Phase2_L1P2GT_HLT.py")
dumped_config_file = os.path.join(output_dir, "Phase2_dump.py")
log_file = os.path.join(output_dir, "hlt.log")

# Step 1: Run the cmsDriver.py command to generate the base configuration in the output directory
print(f"Running cmsDriver.py to generate the base config: {base_config_file}")
subprocess.run(base_cmsdriver_command, shell=True, cwd=output_dir)

# Step 2: Use edmConfigDump to dump the full configuration
print(f"Dumping the full configuration using edmConfigDump to {dumped_config_file}")
with open(dumped_config_file, "w") as dump_file, open(log_file, "w") as log:
    subprocess.run(f"edmConfigDump {base_config_file}", shell=True, stdout=dump_file, stderr=log)

# Step 3: Extract the list of HLT paths from the dumped configuration
print(f"Extracting HLT paths from {dumped_config_file}...")

# Read the dumped configuration to extract HLT paths
with open(dumped_config_file, "r") as f:
    config_content = f.read()

# Use regex to find all HLT and L1T paths defined in process.schedule
unsorted_hlt_paths = re.findall(r"process\.(HLT_[A-Za-z0-9_]+|L1T_[A-Za-z0-9_]+|DST_[A-Za-z0-9_]+)", config_content)

# Remove duplicates and sort alphabetically
hlt_paths = sorted(set(unsorted_hlt_paths))

if not hlt_paths:
    print("No HLT paths found in the schedule!")
    exit(1)

print(f"Found {len(hlt_paths)} HLT paths.")

# Step 3b: Restrict paths using wildcard patterns if the option is provided
if restrict_paths_to:
    valid_paths = set()  # Using a set to store matched paths

    # Iterate over each provided pattern
    for pattern in restrict_paths_to:
        # Use fnmatch to match the pattern to hlt_paths
        matched = fnmatch.filter(hlt_paths, pattern)
        valid_paths.update(matched)  # Add matches to the set of valid paths

        # If no matches found, emit a warning for that pattern
        if not matched:
            print(f"Warning: No paths matched the pattern: {pattern}")

    # Convert the set to a sorted list
    valid_paths = sorted(valid_paths)

    # If no valid paths remain after filtering, exit
    if not valid_paths:
        print("Error: None of the specified patterns matched any paths. Exiting.")
        exit(1)

    # Update hlt_paths to contain only the valid ones
    hlt_paths = valid_paths

    # Continue using the restricted hlt_paths further down the script
    print(f"Using {len(hlt_paths)} HLT paths after applying restrictions.")

# Step 4: Broadened Regex for Matching process.schedule
schedule_match = re.search(
    r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))", 
    config_content
)

if not schedule_match:
    print("No schedule match found after tweaking regex! Exiting...")
    exit(1)
else:
    print(f"Matched schedule section.")

# Step 5: Generate N configurations by modifying the dumped config to keep only one path at a time
for path_name in hlt_paths:
    # Create a new configuration file for this path
    config_filename = os.path.join(output_dir, f"Phase2_{path_name}.py")
    
    # Define regex to find all HLT paths in the cms.Schedule and replace them
    def replace_hlt_paths(match):
        all_paths = match.group(2).split(", ")
        # Keep non-HLT/L1T paths and include only the current HLT or L1T path
        filtered_paths = [path for path in all_paths if not re.match(r"process\.(HLT_|L1T_)", path) or f"process.{path_name}" in path]
        return match.group(1) + ", ".join(filtered_paths) + match.group(3)

    # Apply the regex to remove all HLT and L1T paths except the current one
    modified_content = re.sub(
        r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))",
        replace_hlt_paths,
        config_content
    )

    # Modify the fileout parameter to save a unique root file for each path
    modified_content = re.sub(
        r"fileName = cms\.untracked\.string\('.*'\)", 
        f"fileName = cms.untracked.string('{output_dir}/{path_name}.root')", 
        modified_content
    )

    # Write the new config to a file
    with open(config_filename, "w") as new_config:
        new_config.write(modified_content)
    
    print(f"Generated config: {config_filename}")

print(f"Generated {len(hlt_paths)} configuration files in the {output_dir} directory.")

# Step 6: Run cmsDriver.py for TTbar GEN,SIM steps and save the output in output_dir
ttbar_config_file = os.path.join(output_dir, "TTbar_GEN_SIM_step.py")
ttbar_command = (
    f"cmsDriver.py TTbar_14TeV_TuneCP5_cfi -s GEN,SIM,DIGI:pdigi_valid,L1TrackTrigger,L1,L1P2GT,DIGI2RAW -n {num_events} "
    f"--conditions {global_tag} --beamspot DBrealisticHLLHC --datatier GEN-SIM-DIGI-RAW "
    f"--eventcontent FEVTDEBUG --geometry {geometry} --era {era} "
    f"--relval 9000,100 --fileout {output_dir}/step1.root --nThreads {num_threads} "
    f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
    f"--python_filename {ttbar_config_file}"
)

if not args.cachedInput and not args.dryRun:
    print("Running TTbar GEN,SIM step...")
    run_command(ttbar_command, log_file=os.path.join(output_dir, "ttbar_gen_sim.log"))

# Directory containing HLT test configurations
hlt_configs_dir = output_dir

# Check if the directory exists
if not os.path.exists(hlt_configs_dir):
    print(f"Directory {hlt_configs_dir} not found! Exiting...")
    exit(1)

# Step 7: Function to run cmsRun on a given HLT config file and save the output
def run_cmsrun(config_file):
    # Extract the HLT path name from the config file (e.g., "Phase2_HLT_IsoMu24_FromL1TkMuon.py")
    base_name = os.path.basename(config_file).replace("Phase2_", "").replace(".py", "")
    log_file = os.path.join(output_dir, f"{base_name}.log")
    
    # Run the cmsRun command and log the output
    cmsrun_command = f"cmsRun {config_file}"
    result_code = run_command(cmsrun_command, log_file=log_file)

    if result_code != 0:
        print(f"cmsRun failed for {config_file} with exit code {result_code}. Check {log_file} for details.")
        return result_code  # Return the error code

    print(f"cmsRun completed for {config_file}")
    return 0  # Return 0 for success
    
# Step 8: Loop through all files in hlt_test_configs and run cmsRun on each in parallel
config_files = [
    f for f in os.listdir(hlt_configs_dir)
    if f.endswith(".py") and f.startswith("Phase2_") and f != "Phase2_dump.py"
]
print(f"Found {len(config_files)} configuration files in {hlt_configs_dir}.")

##### stop here in case it's dryRun mode
if args.dryRun:  # Check if the --dryRun flag is active
    print("Dry run mode activated. All configurations have been created.")
    exit(0)

# Run cmsRun on all config files in parallel and handle errors
error_occurred = False
with ThreadPoolExecutor(max_workers=num_parallel_jobs) as executor:
    futures = {executor.submit(run_cmsrun, os.path.join(output_dir, config_file)): config_file for config_file in config_files}

    for future in as_completed(futures):
        config_file = futures[future]
        try:
            result_code = future.result()
            if result_code != 0:
                error_occurred = True
                print(f"cmsRun for {config_file} exited with code {result_code}")
        except Exception as exc:
            error_occurred = True
            print(f"cmsRun for {config_file} generated an exception: {exc}")

if error_occurred:
    print("-" * 40)
    print("One or more cmsRun jobs failed. Exiting with failure.")
    print("-" * 40)
    exit(1)

print("All cmsRun jobs submitted.")

# Step 9: Compare all HLT root files using hltDiff
def compare_hlt_results(input_dir, num_events, max_workers=4):
    # List all root files starting with "HLT_" or "L1T_" in the output directory
    root_files = [f for f in os.listdir(input_dir) if f.endswith(".root") and (f.startswith("HLT_") or f.startswith("L1T_"))]

    # Base file (hltrun output) to compare against
    base_root_file = os.path.join(input_dir, "hlt.root")

    # Check if base_root_file exists
    if not os.path.exists(base_root_file):
        print(f"Base root file {base_root_file} not found! Exiting...")
        exit(1)

    # Use ThreadPoolExecutor to run comparisons in parallel
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for root_file in root_files:
            futures.append(executor.submit(compare_single_file, root_file, base_root_file, num_events, input_dir))

        # Collect results as they complete
        for future in as_completed(futures):
            result = future.result()
            if result:  # If there is an error message, print it and exit
                print("-" * 40)
                print(result)
                print("-" * 40)
                exit(1)

    print("All HLT comparisons passed with no differences.")

# Step 10: Once all cmsRun jobs are completed, perform the hltDiff comparisons
print("Performing HLT result comparisons...")
compare_hlt_results(output_dir,num_events,num_parallel_jobs)  # Adjust max_workers based on your CPU cores

# Step 11: Capture the end time and print the end timestamp
end_time = time.time()
end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("------------------------------------------------------------")
print(f"Script ended at {end_timestamp}")
print("------------------------------------------------------------")

# Step 12: Calculate the total execution time and print it
total_time = end_time - start_time
formatted_total_time = time.strftime("%H:%M:%S", time.gmtime(total_time))
print("------------------------------------------------------------")
print(f"Total execution time: {formatted_total_time}")
print("------------------------------------------------------------")

print("All steps completed successfully.")