Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /HLTrigger/Configuration/scripts/hltPhase2UpgradeIntegrationTests is written in an unsupported language. File is not indexed.

0001 #!/usr/bin/env python3
0002 import argparse
0003 import fnmatch
0004 import os
0005 import re
0006 import shutil
0007 import subprocess
0008 import sys
0009 import time
0010 from datetime import datetime
0011 from concurrent.futures import ThreadPoolExecutor, as_completed
0012 import Configuration.Geometry.defaultPhase2ConditionsEra_cff as _settings
0013 _PH2_GLOBAL_TAG, _PH2_ERA = _settings.get_era_and_conditions(_settings.DEFAULT_VERSION)
0014 
0015 # Automatically generate the default geometry from DEFAULT_VERSION
0016 _PH2_GEOMETRY = f"Extended{_settings.DEFAULT_VERSION}"
0017 
0018 # Get the actual era name from the version key
0019 _PH2_ERA_NAME = _settings.properties['Run4'][_settings.DEFAULT_VERSION]['Era']
0020 
0021 # Function to display help information
0022 def print_help():
0023     help_text = """
0024     This script runs HLT test configurations for the CMS Phase2 upgrade.
0025 
0026     Arguments:
0027     --globaltag       : GlobalTag for the CMS conditions (required)
0028     --geometry        : Geometry setting for the CMS process (required)
0029     --events          : Number of events to process (default: 1)
0030     --threads         : Number of threads to use (default: 1)
0031     --parallelJobs    : Number of parallel cmsRun and hltDiff executions (default: 4)
0032     --restrictPathsTo : Restrict paths to be run to a user defined subset (e.g. "HLT_Ele*")
0033     --procModifiers   : Optionally use one (or more) cmssw processModifier
0034     --cachedInput     : Optionally use an existing RAW data file (do not regenerate it from scratch)
0035     --dryRun          : Optionally do not run any of the configuration created
0036 
0037     Example usage:
0038     hltPhase2UpgradeIntegrationTests --globaltag auto:phase2_realistic_T33 --geometry Extended2026D110 --events 10 --threads 4
0039     """
0040     print(help_text)
0041 
0042 # Function to run a shell command and handle errors
0043 def run_command(command, log_file=None, workdir=None):
0044     try:
0045         print(f"Running command: {command}")
0046         with open(log_file, "w") as log:
0047             subprocess.run(command, shell=True, check=True, cwd=workdir, stdout=log, stderr=log)
0048     except subprocess.CalledProcessError as e:
0049         print(f"Error running command: {e}")
0050         return e.returncode  # Return the error code
0051        
0052     return 0  # Return 0 for success
0053 
0054 # Function to compare the single file HLT results with the respect to a the base
0055 def compare_single_file(root_file, base_root_file, num_events, output_dir):
0056     root_path = os.path.join(output_dir, root_file)
0057     print(f"Comparing {root_path} with {base_root_file} using hltDiff...")
0058 
0059     # Run the hltDiff command
0060     hlt_diff_command = f"hltDiff -o {base_root_file} -n {root_path}"
0061     result = subprocess.run(hlt_diff_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0062 
0063     # Decode and process the output
0064     output = result.stdout.decode("utf-8")
0065     print(output)  # Print for debug purposes
0066 
0067     # Use a dynamic check based on the number of events configured
0068     expected_match_string = f"Found {num_events} matching events, out of which 0 have different HLT results"
0069 
0070     # Check if the output contains the expected match string
0071     if expected_match_string not in output:
0072         return f"Error: {root_file} has different HLT results!"
0073 
0074     return None  # Return None if no issues are found
0075 
0076 # Argument Parser for command-line configuration
0077 parser = argparse.ArgumentParser(description="Run HLT Test Configurations")
0078 parser.add_argument("--globaltag", default=_PH2_GLOBAL_TAG, help="GlobalTag for the CMS conditions")
0079 parser.add_argument("--geometry", default=_PH2_GEOMETRY, help="Geometry setting for the CMS process")  # Auto-generated geometry default
0080 parser.add_argument("--era", default=_PH2_ERA_NAME, help="Era setting for the CMS process")  # Convert _PH2_ERA to string
0081 parser.add_argument("--events", type=int, default=10, help="Number of events to process")
0082 parser.add_argument("--parallelJobs", type=int, default=4, help="Number of parallel cmsRun HLT jobs")
0083 parser.add_argument("--threads", type=int, default=1, help="Number of threads to use")
0084 parser.add_argument("--restrictPathsTo", nargs='+', default=[], help="List of HLT paths to restrict to")
0085 parser.add_argument("--cachedInput", default=None, help="Predefined input file to use instead of running TTbar GEN,SIM step")
0086 parser.add_argument("--procModifiers", default=None, help="Optional process modifier for cmsDriver")  # New argument for procModifiers
0087 parser.add_argument("--dryRun", action="store_true", help="Only generate configurations without running them")
0088 
0089 # Step 0: Capture the start time and print the start timestamp
0090 start_time = time.time()
0091 start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
0092 print("------------------------------------------------------------")
0093 print(f"Script started at {start_timestamp}")
0094 print("------------------------------------------------------------")
0095 
0096 # Parse arguments
0097 try:
0098     args = parser.parse_args()
0099 except SystemExit:
0100     print_help()
0101     sys.exit(0)
0102 
0103 global_tag = args.globaltag
0104 era = args.era
0105 geometry = args.geometry
0106 num_events = args.events
0107 num_threads = args.threads
0108 num_parallel_jobs = args.parallelJobs
0109 restrict_paths_to = args.restrictPathsTo
0110 proc_modifiers = args.procModifiers  # Store the procModifiers option
0111 
0112 # Print the values in a nice formatted manner
0113 print(f"{'Configuration Summary':^40}")
0114 print("=" * 40)
0115 print(f"Global Tag:           {global_tag}")
0116 print(f"Geometry:             {geometry}")
0117 print(f"Era:                  {era}")
0118 print(f"Num Events:           {num_events}")
0119 print(f"Num Threads:          {num_threads}")
0120 print(f"Num Parallel Jobs:    {num_parallel_jobs}")
0121 # Print restrictPathsTo if provided
0122 if restrict_paths_to:
0123     print(f"Restricting paths to: {', '.join(restrict_paths_to)}")
0124 # Print procModifiers if provided
0125 if proc_modifiers:
0126     print(f"Proc Modifiers:       {proc_modifiers}")
0127 if args.cachedInput:
0128     print(f"Using cached input file: {args.cachedInput}")
0129 else:
0130     print(f"Using regenerated GEN-SIM-DIGI-RAW file from scratch")
0131 print("=" * 40)
0132 
0133 # Directory where all test configurations will be stored
0134 output_dir = "hlt_test_configs"
0135 # If the directory exists, remove it first
0136 if os.path.exists(output_dir):
0137     shutil.rmtree(output_dir)
0138 
0139 # Create the directory
0140 os.makedirs(output_dir)
0141 
0142 # Define the cmsDriver.py command to create the base configuration
0143 # If cachedInput is provided, use it as the input for the base cmsDriver command
0144 if args.cachedInput:
0145     base_cmsdriver_command = (
0146         f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "
0147         f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
0148         f"--geometry {geometry} --era {era} --filein {args.cachedInput} --fileout {output_dir}/hlt.root --no_exec "
0149         f"--mc --nThreads {num_threads} "
0150         f"--processName=HLTX "
0151         f"--inputCommands='keep *, drop *_hlt*_*_HLT, drop triggerTriggerFilterObjectWithRefs_l1t*_*_HLT' "
0152         f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
0153         f'--customise_commands "process.options.wantSummary=True"'
0154     )
0155 else:
0156     base_cmsdriver_command = (
0157         f"cmsDriver.py Phase2 -s L1P2GT,HLT:75e33_timing "
0158         f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
0159         f"--geometry {geometry} --era {era} --filein file:{output_dir}/step1.root --fileout {output_dir}/hlt.root --no_exec "
0160         f"--nThreads {num_threads} "
0161         f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
0162         f'--customise_commands "process.options.wantSummary=True"'
0163     )
0164 
0165 # Add procModifiers if provided
0166 if proc_modifiers:
0167     base_cmsdriver_command += f" --procModifiers {proc_modifiers}"
0168 
0169 # The base configuration file and the dumped configuration file
0170 base_config_file = os.path.join(output_dir, "Phase2_L1P2GT_HLT.py")
0171 dumped_config_file = os.path.join(output_dir, "Phase2_dump.py")
0172 log_file = os.path.join(output_dir, "hlt.log")
0173 
0174 # Step 1: Run the cmsDriver.py command to generate the base configuration in the output directory
0175 print(f"Running cmsDriver.py to generate the base config: {base_config_file}")
0176 subprocess.run(base_cmsdriver_command, shell=True, cwd=output_dir)
0177 
0178 # Step 2: Use edmConfigDump to dump the full configuration
0179 print(f"Dumping the full configuration using edmConfigDump to {dumped_config_file}")
0180 with open(dumped_config_file, "w") as dump_file, open(log_file, "w") as log:
0181     subprocess.run(f"edmConfigDump {base_config_file}", shell=True, stdout=dump_file, stderr=log)
0182 
0183 # Step 3: Extract the list of HLT paths from the dumped configuration
0184 print(f"Extracting HLT paths from {dumped_config_file}...")
0185 
0186 # Read the dumped configuration to extract HLT paths
0187 with open(dumped_config_file, "r") as f:
0188     config_content = f.read()
0189 
0190 # Use regex to find all HLT and L1T paths defined in process.schedule
0191 unsorted_hlt_paths = re.findall(r"process\.(HLT_[A-Za-z0-9_]+|L1T_[A-Za-z0-9_]+)", config_content)
0192 
0193 # Remove duplicates and sort alphabetically
0194 hlt_paths = sorted(set(unsorted_hlt_paths))
0195 
0196 if not hlt_paths:
0197     print("No HLT paths found in the schedule!")
0198     exit(1)
0199 
0200 print(f"Found {len(hlt_paths)} HLT paths.")
0201 
0202 # Step 3b: Restrict paths using wildcard patterns if the option is provided
0203 if restrict_paths_to:
0204     valid_paths = set()  # Using a set to store matched paths
0205 
0206     # Iterate over each provided pattern
0207     for pattern in restrict_paths_to:
0208         # Use fnmatch to match the pattern to hlt_paths
0209         matched = fnmatch.filter(hlt_paths, pattern)
0210         valid_paths.update(matched)  # Add matches to the set of valid paths
0211 
0212         # If no matches found, emit a warning for that pattern
0213         if not matched:
0214             print(f"Warning: No paths matched the pattern: {pattern}")
0215 
0216     # Convert the set to a sorted list
0217     valid_paths = sorted(valid_paths)
0218 
0219     # If no valid paths remain after filtering, exit
0220     if not valid_paths:
0221         print("Error: None of the specified patterns matched any paths. Exiting.")
0222         exit(1)
0223 
0224     # Update hlt_paths to contain only the valid ones
0225     hlt_paths = valid_paths
0226 
0227     # Continue using the restricted hlt_paths further down the script
0228     print(f"Using {len(hlt_paths)} HLT paths after applying restrictions.")
0229 
0230 # Step 4: Broadened Regex for Matching process.schedule
0231 schedule_match = re.search(
0232     r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))", 
0233     config_content
0234 )
0235 
0236 if not schedule_match:
0237     print("No schedule match found after tweaking regex! Exiting...")
0238     exit(1)
0239 else:
0240     print(f"Matched schedule section.")
0241 
0242 # Step 5: Generate N configurations by modifying the dumped config to keep only one path at a time
0243 for path_name in hlt_paths:
0244     # Create a new configuration file for this path
0245     config_filename = os.path.join(output_dir, f"Phase2_{path_name}.py")
0246     
0247     # Define regex to find all HLT paths in the cms.Schedule and replace them
0248     def replace_hlt_paths(match):
0249         all_paths = match.group(2).split(", ")
0250         # Keep non-HLT/L1T paths and include only the current HLT or L1T path
0251         filtered_paths = [path for path in all_paths if not re.match(r"process\.(HLT_|L1T_)", path) or f"process.{path_name}" in path]
0252         return match.group(1) + ", ".join(filtered_paths) + match.group(3)
0253 
0254     # Apply the regex to remove all HLT and L1T paths except the current one
0255     modified_content = re.sub(
0256         r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))",
0257         replace_hlt_paths,
0258         config_content
0259     )
0260 
0261     # Modify the fileout parameter to save a unique root file for each path
0262     modified_content = re.sub(
0263         r"fileName = cms\.untracked\.string\('.*'\)", 
0264         f"fileName = cms.untracked.string('{output_dir}/{path_name}.root')", 
0265         modified_content
0266     )
0267 
0268     # Write the new config to a file
0269     with open(config_filename, "w") as new_config:
0270         new_config.write(modified_content)
0271     
0272     print(f"Generated config: {config_filename}")
0273 
0274 print(f"Generated {len(hlt_paths)} configuration files in the {output_dir} directory.")
0275 
0276 # Step 6: Run cmsDriver.py for TTbar GEN,SIM steps and save the output in output_dir
0277 ttbar_config_file = os.path.join(output_dir, "TTbar_GEN_SIM_step.py")
0278 ttbar_command = (
0279     f"cmsDriver.py TTbar_14TeV_TuneCP5_cfi -s GEN,SIM,DIGI:pdigi_valid,L1TrackTrigger,L1,L1P2GT,DIGI2RAW -n {num_events} "
0280     f"--conditions {global_tag} --beamspot DBrealisticHLLHC --datatier GEN-SIM-DIGI-RAW "
0281     f"--eventcontent FEVTDEBUG --geometry {geometry} --era {era} "
0282     f"--relval 9000,100 --fileout {output_dir}/step1.root --nThreads {num_threads} "
0283     f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
0284     f"--python_filename {ttbar_config_file}"
0285 )
0286 
0287 if not args.cachedInput and not args.dryRun:
0288     print("Running TTbar GEN,SIM step...")
0289     run_command(ttbar_command, log_file=os.path.join(output_dir, "ttbar_gen_sim.log"))
0290 
0291 # Directory containing HLT test configurations
0292 hlt_configs_dir = output_dir
0293 
0294 # Check if the directory exists
0295 if not os.path.exists(hlt_configs_dir):
0296     print(f"Directory {hlt_configs_dir} not found! Exiting...")
0297     exit(1)
0298 
0299 # Step 7: Function to run cmsRun on a given HLT config file and save the output
0300 def run_cmsrun(config_file):
0301     # Extract the HLT path name from the config file (e.g., "Phase2_HLT_IsoMu24_FromL1TkMuon.py")
0302     base_name = os.path.basename(config_file).replace("Phase2_", "").replace(".py", "")
0303     log_file = os.path.join(output_dir, f"{base_name}.log")
0304     
0305     # Run the cmsRun command and log the output
0306     cmsrun_command = f"cmsRun {config_file}"
0307     result_code = run_command(cmsrun_command, log_file=log_file)
0308 
0309     if result_code != 0:
0310         print(f"cmsRun failed for {config_file} with exit code {result_code}. Check {log_file} for details.")
0311         return result_code  # Return the error code
0312 
0313     print(f"cmsRun completed for {config_file}")
0314     return 0  # Return 0 for success
0315     
0316 # Step 8: Loop through all files in hlt_test_configs and run cmsRun on each in parallel
0317 config_files = [
0318     f for f in os.listdir(hlt_configs_dir)
0319     if f.endswith(".py") and f.startswith("Phase2_") and f != "Phase2_dump.py"
0320 ]
0321 print(f"Found {len(config_files)} configuration files in {hlt_configs_dir}.")
0322 
0323 ##### stop here in case it's dryRun mode
0324 if args.dryRun:  # Check if the --dryRun flag is active
0325     print("Dry run mode activated. All configurations have been created.")
0326     exit(0)
0327 
0328 # Run cmsRun on all config files in parallel and handle errors
0329 error_occurred = False
0330 with ThreadPoolExecutor(max_workers=num_parallel_jobs) as executor:
0331     futures = {executor.submit(run_cmsrun, os.path.join(output_dir, config_file)): config_file for config_file in config_files}
0332 
0333     for future in as_completed(futures):
0334         config_file = futures[future]
0335         try:
0336             result_code = future.result()
0337             if result_code != 0:
0338                 error_occurred = True
0339                 print(f"cmsRun for {config_file} exited with code {result_code}")
0340         except Exception as exc:
0341             error_occurred = True
0342             print(f"cmsRun for {config_file} generated an exception: {exc}")
0343 
0344 if error_occurred:
0345     print("-" * 40)
0346     print("One or more cmsRun jobs failed. Exiting with failure.")
0347     print("-" * 40)
0348     exit(1)
0349 
0350 print("All cmsRun jobs submitted.")
0351 
0352 # Step 9: Compare all HLT root files using hltDiff
0353 def compare_hlt_results(input_dir, num_events, max_workers=4):
0354     # List all root files starting with "HLT_" or "L1T_" in the output directory
0355     root_files = [f for f in os.listdir(input_dir) if f.endswith(".root") and (f.startswith("HLT_") or f.startswith("L1T_"))]
0356 
0357     # Base file (hltrun output) to compare against
0358     base_root_file = os.path.join(input_dir, "hlt.root")
0359 
0360     # Check if base_root_file exists
0361     if not os.path.exists(base_root_file):
0362         print(f"Base root file {base_root_file} not found! Exiting...")
0363         exit(1)
0364 
0365     # Use ThreadPoolExecutor to run comparisons in parallel
0366     with ThreadPoolExecutor(max_workers=max_workers) as executor:
0367         futures = []
0368         for root_file in root_files:
0369             futures.append(executor.submit(compare_single_file, root_file, base_root_file, num_events, input_dir))
0370 
0371         # Collect results as they complete
0372         for future in as_completed(futures):
0373             result = future.result()
0374             if result:  # If there is an error message, print it and exit
0375                 print("-" * 40)
0376                 print(result)
0377                 print("-" * 40)
0378                 exit(1)
0379 
0380     print("All HLT comparisons passed with no differences.")
0381 
0382 # Step 10: Once all cmsRun jobs are completed, perform the hltDiff comparisons
0383 print("Performing HLT result comparisons...")
0384 compare_hlt_results(output_dir,num_events,num_parallel_jobs)  # Adjust max_workers based on your CPU cores
0385 
0386 # Step 11: Capture the end time and print the end timestamp
0387 end_time = time.time()
0388 end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
0389 print("------------------------------------------------------------")
0390 print(f"Script ended at {end_timestamp}")
0391 print("------------------------------------------------------------")
0392 
0393 # Step 12: Calculate the total execution time and print it
0394 total_time = end_time - start_time
0395 formatted_total_time = time.strftime("%H:%M:%S", time.gmtime(total_time))
0396 print("------------------------------------------------------------")
0397 print(f"Total execution time: {formatted_total_time}")
0398 print("------------------------------------------------------------")
0399 
0400 print("All steps completed successfully.")