Project CMSSW displayed by LXR

0001 #!/usr/bin/env python3
0002 import argparse
0003 import fnmatch
0004 import os
0005 import re
0006 import shutil
0007 import subprocess
0008 import sys
0009 import time
0010 from datetime import datetime
0011 from concurrent.futures import ThreadPoolExecutor, as_completed
0012 import Configuration.Geometry.defaultPhase2ConditionsEra_cff as _settings
0013 _PH2_GLOBAL_TAG, _PH2_ERA = _settings.get_era_and_conditions(_settings.DEFAULT_VERSION)
0014
0015 # Automatically generate the default geometry from DEFAULT_VERSION
0016 _PH2_GEOMETRY = f"Extended{_settings.DEFAULT_VERSION}"
0017
0018 # Get the actual era name from the version key
0019 _PH2_ERA_NAME = _settings.properties['Run4'][_settings.DEFAULT_VERSION]['Era']
0020
0021 # Function to run a shell command and handle errors
0022 def run_command(command, log_file=None, workdir=None):
0023     try:
0024         print(f"Running command: {command}")
0025         with open(log_file, "w") as log:
0026             subprocess.run(command, shell=True, check=True, cwd=workdir, stdout=log, stderr=log)
0027     except subprocess.CalledProcessError as e:
0028         print(f"Error running command: {e}")
0029         return e.returncode  # Return the error code
0030
0031     return 0  # Return 0 for success
0032
0033 # Function to compare the single file HLT results with the respect to a the base
0034 def compare_single_file(root_file, base_root_file, num_events, output_dir):
0035     root_path = os.path.join(output_dir, root_file)
0036     print(f"Comparing {root_path} with {base_root_file} using hltDiff...")
0037
0038     # Run the hltDiff command
0039     hlt_diff_command = f"hltDiff -o {base_root_file} -n {root_path}"
0040     result = subprocess.run(hlt_diff_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0041
0042     # Decode and process the output
0043     output = result.stdout.decode("utf-8")
0044     print(output)  # Print for debug purposes
0045
0046     # Use a dynamic check based on the number of events configured
0047     expected_match_string = f"Found {num_events} matching events, out of which 0 have different HLT results"
0048
0049     # Check if the output contains the expected match string
0050     if expected_match_string not in output:
0051         return f"Error: {root_file} has different HLT results!"
0052
0053     return None  # Return None if no issues are found
0054
0055 # Argument Parser for command-line configuration
0056 from Configuration.HLT.autoHLT import autoHLT
0057 parser = argparse.ArgumentParser(description="Run HLT Test Configurations")
0058 parser.add_argument("--menu", default="75e33_timing",
0059                     choices=[v for k, v in autoHLT.items() if "Run4" in k], help="HLT menu to test")
0060 parser.add_argument("--globaltag", default=_PH2_GLOBAL_TAG, help="GlobalTag for the CMS conditions")
0061 parser.add_argument("--geometry", default=_PH2_GEOMETRY, help="Geometry setting for the CMS process")  # Auto-generated geometry default
0062 parser.add_argument("--era", default=_PH2_ERA_NAME, help="Era setting for the CMS process")  # Convert _PH2_ERA to string
0063 parser.add_argument("--events", type=int, default=10, help="Number of events to process")
0064 parser.add_argument("--parallelJobs", type=int, default=4, help="Number of parallel cmsRun HLT jobs")
0065 parser.add_argument("--threads", type=int, default=1, help="Number of threads to use")
0066 parser.add_argument("--restrictPathsTo", nargs='+', default=[], help="List of HLT paths to restrict to")
0067 parser.add_argument("--cachedInput", default=None, help="Predefined input file to use instead of running TTbar GEN,SIM step")
0068 parser.add_argument("--procModifiers", default=None, help="Optional process modifier for cmsDriver")  # New argument for procModifiers
0069 parser.add_argument("--dryRun", action="store_true", help="Only generate configurations without running them")
0070
0071 # Step 0: Capture the start time and print the start timestamp
0072 start_time = time.time()
0073 start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
0074 print("------------------------------------------------------------")
0075 print(f"Script started at {start_timestamp}")
0076 print("------------------------------------------------------------")
0077
0078 # Parse arguments
0079 args = parser.parse_args()
0080
0081 menu = args.menu
0082 global_tag = args.globaltag
0083 era = args.era
0084 geometry = args.geometry
0085 num_events = args.events
0086 num_threads = args.threads
0087 num_parallel_jobs = args.parallelJobs
0088 restrict_paths_to = args.restrictPathsTo
0089 proc_modifiers = args.procModifiers  # Store the procModifiers option
0090
0091 # Print the values in a nice formatted manner
0092 print(f"{'Configuration Summary':^40}")
0093 print("=" * 40)
0094 print(f"HLT Menu:             {menu}")
0095 print(f"Global Tag:           {global_tag}")
0096 print(f"Geometry:             {geometry}")
0097 print(f"Era:                  {era}")
0098 print(f"Num Events:           {num_events}")
0099 print(f"Num Threads:          {num_threads}")
0100 print(f"Num Parallel Jobs:    {num_parallel_jobs}")
0101 # Print restrictPathsTo if provided
0102 if restrict_paths_to:
0103     print(f"Restricting paths to: {', '.join(restrict_paths_to)}")
0104 # Print procModifiers if provided
0105 if proc_modifiers:
0106     print(f"Proc Modifiers:       {proc_modifiers}")
0107 if args.cachedInput:
0108     print(f"Using cached input file: {args.cachedInput}")
0109 else:
0110     print(f"Using regenerated GEN-SIM-DIGI-RAW file from scratch")
0111 print("=" * 40)
0112
0113 # Directory where all test configurations will be stored
0114 output_dir = "hlt_test_configs_HLTMENU_" + menu
0115 # If the directory exists, remove it first
0116 if os.path.exists(output_dir):
0117     shutil.rmtree(output_dir)
0118
0119 # Create the directory
0120 os.makedirs(output_dir)
0121
0122 # Define the cmsDriver.py command to create the base configuration
0123 # If cachedInput is provided, use it as the input for the base cmsDriver command
0124 if args.cachedInput:
0125     base_cmsdriver_command = (
0126         f"cmsDriver.py Phase2 -s L1P2GT,HLT:{menu} "
0127         f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
0128         f"--geometry {geometry} --era {era} --filein {args.cachedInput} --fileout {output_dir}/hlt.root --no_exec "
0129         f"--mc --nThreads {num_threads} "
0130         f"--processName=HLTX "
0131         f"--inputCommands='keep *, drop *_hlt*_*_HLT, drop triggerTriggerFilterObjectWithRefs_l1t*_*_HLT' "
0132         f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
0133         f'--customise_commands "process.options.wantSummary=True"'
0134     )
0135 else:
0136     base_cmsdriver_command = (
0137         f"cmsDriver.py Phase2 -s L1P2GT,HLT:{menu} "
0138         f"--conditions {global_tag} -n {num_events} --eventcontent FEVTDEBUGHLT "
0139         f"--geometry {geometry} --era {era} --filein file:{output_dir}/step1.root --fileout {output_dir}/hlt.root --no_exec "
0140         f"--nThreads {num_threads} "
0141         f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
0142         f'--customise_commands "process.options.wantSummary=True"'
0143     )
0144
0145 # Add procModifiers if provided
0146 if proc_modifiers:
0147     base_cmsdriver_command += f" --procModifiers {proc_modifiers}"
0148
0149 # The base configuration file and the dumped configuration file
0150 base_config_file = os.path.join(output_dir, "Phase2_L1P2GT_HLT.py")
0151 dumped_config_file = os.path.join(output_dir, "Phase2_dump.py")
0152 log_file = os.path.join(output_dir, "hlt.log")
0153
0154 # Step 1: Run the cmsDriver.py command to generate the base configuration in the output directory
0155 print(f"Running cmsDriver.py to generate the base config: {base_config_file}")
0156 subprocess.run(base_cmsdriver_command, shell=True, cwd=output_dir)
0157
0158 # Step 2: Use edmConfigDump to dump the full configuration
0159 print(f"Dumping the full configuration using edmConfigDump to {dumped_config_file}")
0160 with open(dumped_config_file, "w") as dump_file, open(log_file, "w") as log:
0161     subprocess.run(f"edmConfigDump --prune {base_config_file}", shell=True, stdout=dump_file, stderr=log)
0162
0163 # Step 3: Extract the list of HLT paths from the dumped configuration
0164 print(f"Extracting HLT paths from {dumped_config_file}...")
0165
0166 # Read the dumped configuration to extract HLT paths
0167 with open(dumped_config_file, "r") as f:
0168     config_content = f.read()
0169
0170 # Use regex to find all HLT and L1T paths defined in process.schedule
0171 unsorted_hlt_paths = re.findall(r"process\.(HLT_[A-Za-z0-9_]+|L1T_[A-Za-z0-9_]+|DST_[A-Za-z0-9_]+|MC_[A-Za-z0-9_]+)", config_content)
0172
0173 # Remove duplicates and sort alphabetically
0174 hlt_paths = sorted(set(unsorted_hlt_paths))
0175
0176 if not hlt_paths:
0177     print("No HLT paths found in the schedule!")
0178     exit(1)
0179
0180 print(f"Found {len(hlt_paths)} HLT paths.")
0181
0182 # Step 3b: Restrict paths using wildcard patterns if the option is provided
0183 if restrict_paths_to:
0184     valid_paths = set()  # Using a set to store matched paths
0185
0186     # Iterate over each provided pattern
0187     for pattern in restrict_paths_to:
0188         # Use fnmatch to match the pattern to hlt_paths
0189         matched = fnmatch.filter(hlt_paths, pattern)
0190         valid_paths.update(matched)  # Add matches to the set of valid paths
0191
0192         # If no matches found, emit a warning for that pattern
0193         if not matched:
0194             print(f"Warning: No paths matched the pattern: {pattern}")
0195
0196     # Convert the set to a sorted list
0197     valid_paths = sorted(valid_paths)
0198
0199     # If no valid paths remain after filtering, exit
0200     if not valid_paths:
0201         print("Error: None of the specified patterns matched any paths. Exiting.")
0202         exit(1)
0203
0204     # Update hlt_paths to contain only the valid ones
0205     hlt_paths = valid_paths
0206
0207     # Continue using the restricted hlt_paths further down the script
0208     print(f"Using {len(hlt_paths)} HLT paths after applying restrictions.")
0209
0210 # Step 4: Broadened Regex for Matching process.schedule
0211 schedule_match = re.search(
0212     r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))",
0213     config_content
0214 )
0215
0216 if not schedule_match:
0217     print("No schedule match found after tweaking regex! Exiting...")
0218     exit(1)
0219 else:
0220     print(f"Matched schedule section.")
0221
0222 # Step 5: Generate N configurations by modifying the dumped config to keep only one path at a time
0223 for path_name in hlt_paths:
0224     # Create a new configuration file for this path
0225     config_filename = os.path.join(output_dir, f"Phase2_{path_name}.py")
0226
0227     # Define regex to find all HLT paths in the cms.Schedule and replace them
0228     def replace_hlt_paths(match):
0229         all_paths = match.group(2).split(", ")
0230         # Keep non-HLT/L1T paths and include only the current HLT or L1T path
0231         filtered_paths = [path for path in all_paths if not re.match(r"process\.(HLT_|L1T_|DST_|MC_)", path) or f"process.{path_name}" in path]
0232         return match.group(1) + ", ".join(filtered_paths) + match.group(3)
0233
0234     # Apply the regex to remove all HLT and L1T paths except the current one
0235     modified_content = re.sub(
0236         r"(process\.schedule\s*=\s*cms\.Schedule\(\*?\s*\[)([\s\S]+?)(\]\s*\))",
0237         replace_hlt_paths,
0238         config_content
0239     )
0240
0241     # Modify the fileout parameter to save a unique root file for each path
0242     modified_content = re.sub(
0243         r"fileName = cms\.untracked\.string\('.*'\)",
0244         f"fileName = cms.untracked.string('{output_dir}/{path_name}.root')",
0245         modified_content
0246     )
0247
0248     # Write the new config to a file
0249     with open(config_filename, "w") as new_config:
0250         new_config.write(modified_content)
0251
0252     print(f"Generated config: {config_filename}")
0253
0254 print(f"Generated {len(hlt_paths)} configuration files in the {output_dir} directory.")
0255
0256 # Step 6: Run cmsDriver.py for TTbar GEN,SIM steps and save the output in output_dir
0257 ttbar_config_file = os.path.join(output_dir, "TTbar_GEN_SIM_step.py")
0258 ttbar_command = (
0259     f"cmsDriver.py TTbar_14TeV_TuneCP5_cfi -s GEN,SIM,DIGI:pdigi_valid,L1TrackTrigger,L1,L1P2GT,DIGI2RAW -n {num_events} "
0260     f"--conditions {global_tag} --beamspot DBrealisticHLLHC --datatier GEN-SIM-DIGI-RAW "
0261     f"--eventcontent FEVTDEBUG --geometry {geometry} --era {era} "
0262     f"--relval 9000,100 --fileout {output_dir}/step1.root --nThreads {num_threads} "
0263     f"--customise SLHCUpgradeSimulations/Configuration/aging.customise_aging_1000 "
0264     f"--python_filename {ttbar_config_file}"
0265 )
0266
0267 if not args.cachedInput and not args.dryRun:
0268     print("Running TTbar GEN,SIM step...")
0269     run_command(ttbar_command, log_file=os.path.join(output_dir, "ttbar_gen_sim.log"))
0270
0271 # Directory containing HLT test configurations
0272 hlt_configs_dir = output_dir
0273
0274 # Check if the directory exists
0275 if not os.path.exists(hlt_configs_dir):
0276     print(f"Directory {hlt_configs_dir} not found! Exiting...")
0277     exit(1)
0278
0279 # Step 7: Function to run cmsRun on a given HLT config file and save the output
0280 def run_cmsrun(config_file):
0281     # Extract the HLT path name from the config file (e.g., "Phase2_HLT_IsoMu24_FromL1TkMuon.py")
0282     base_name = os.path.basename(config_file).replace("Phase2_", "").replace(".py", "")
0283     log_file = os.path.join(output_dir, f"{base_name}.log")
0284
0285     # Run the cmsRun command and log the output
0286     cmsrun_command = f"cmsRun {config_file}"
0287     result_code = run_command(cmsrun_command, log_file=log_file)
0288
0289     if result_code != 0:
0290         print(f"cmsRun failed for {config_file} with exit code {result_code}. Check {log_file} for details.")
0291         return result_code  # Return the error code
0292
0293     print(f"cmsRun completed for {config_file}")
0294     return 0  # Return 0 for success
0295
0296 # Step 8: Loop through all files in hlt_test_configs and run cmsRun on each in parallel
0297 config_files = [
0298     f for f in os.listdir(hlt_configs_dir)
0299     if f.endswith(".py") and f.startswith("Phase2_") and f != "Phase2_dump.py"
0300 ]
0301 print(f"Found {len(config_files)} configuration files in {hlt_configs_dir}.")
0302
0303 ##### stop here in case it's dryRun mode
0304 if args.dryRun:  # Check if the --dryRun flag is active
0305     print("Dry run mode activated. All configurations have been created.")
0306     exit(0)
0307
0308 # Run cmsRun on all config files in parallel and handle errors
0309 error_occurred = False
0310 with ThreadPoolExecutor(max_workers=num_parallel_jobs) as executor:
0311     futures = {executor.submit(run_cmsrun, os.path.join(output_dir, config_file)): config_file for config_file in config_files}
0312
0313     for future in as_completed(futures):
0314         config_file = futures[future]
0315         try:
0316             result_code = future.result()
0317             if result_code != 0:
0318                 error_occurred = True
0319                 print(f"cmsRun for {config_file} exited with code {result_code}")
0320         except Exception as exc:
0321             error_occurred = True
0322             print(f"cmsRun for {config_file} generated an exception: {exc}")
0323
0324 if error_occurred:
0325     print("-" * 40)
0326     print("One or more cmsRun jobs failed. Exiting with failure.")
0327     print("-" * 40)
0328     exit(1)
0329
0330 print("All cmsRun jobs submitted.")
0331
0332 # Step 9: Compare all HLT root files using hltDiff
0333 def compare_hlt_results(input_dir, num_events, max_workers=4):
0334     # List all root files starting with "HLT_" or "L1T_" in the output directory
0335     root_files = [f for f in os.listdir(input_dir) if f.endswith(".root") and (f.startswith("HLT_") \
0336                                                                                or f.startswith("L1T_") \
0337                                                                                or f.startswith("DST_") \
0338                                                                                or f.startswith("MC_"))]
0339
0340     # Base file (hltrun output) to compare against
0341     base_root_file = os.path.join(input_dir, "hlt.root")
0342
0343     # Check if base_root_file exists
0344     if not os.path.exists(base_root_file):
0345         print(f"Base root file {base_root_file} not found! Exiting...")
0346         exit(1)
0347
0348     # Use ThreadPoolExecutor to run comparisons in parallel
0349     with ThreadPoolExecutor(max_workers=max_workers) as executor:
0350         futures = []
0351         for root_file in root_files:
0352             futures.append(executor.submit(compare_single_file, root_file, base_root_file, num_events, input_dir))
0353
0354         # Collect results as they complete
0355         for future in as_completed(futures):
0356             result = future.result()
0357             if result:  # If there is an error message, print it and exit
0358                 print("-" * 40)
0359                 print(result)
0360                 print("-" * 40)
0361                 exit(1)
0362
0363     print("All HLT comparisons passed with no differences.")
0364
0365 # Step 10: Once all cmsRun jobs are completed, perform the hltDiff comparisons
0366 print("Performing HLT result comparisons...")
0367 compare_hlt_results(output_dir,num_events,num_parallel_jobs)  # Adjust max_workers based on your CPU cores
0368
0369 # Step 11: Capture the end time and print the end timestamp
0370 end_time = time.time()
0371 end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
0372 print("------------------------------------------------------------")
0373 print(f"Script ended at {end_timestamp}")
0374 print("------------------------------------------------------------")
0375
0376 # Step 12: Calculate the total execution time and print it
0377 total_time = end_time - start_time
0378 formatted_total_time = time.strftime("%H:%M:%S", time.gmtime(total_time))
0379 print("------------------------------------------------------------")
0380 print(f"Total execution time: {formatted_total_time}")
0381 print("------------------------------------------------------------")
0382
0383 print("All steps completed successfully.")