Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:57:11

0001 import copy
0002 import os
0003 import math
0004 import re
0005 from datetime import date
0006 import Alignment.OfflineValidation.TkAlAllInOneTool.findAndChange as fnc
0007 
0008 # Find number of files on a file list. If the list defines a run number before each file, find the number of unique runs instead and return a list of runs with the number
0009 def findNumberOfUnits(fileList):
0010 
0011     with open(fileList,"r") as inputFiles:
0012 
0013         fileContent = inputFiles.readlines()
0014         firstLine =  fileContent[0].rstrip()
0015         runsInFiles = []
0016 
0017         # If each line only contains one file, return the number of files
0018         if len(firstLine.split()) == 1:
0019             nInputFiles = sum(1 for line in fileContent if line.rstrip())
0020             return runsInFiles, nInputFiles
0021 
0022         # We now know that the input file is in format "run file". Return the number of unique runs together with the list
0023         for line in fileContent:
0024             run = line.split()[0]
0025             if not run in runsInFiles:
0026                 runsInFiles.append(run)
0027 
0028         return runsInFiles, len(runsInFiles)
0029 
0030 def JetHT(config, validationDir):
0031 
0032     # List with all and merge jobs
0033     jobs = []
0034     mergeJobs = []
0035     runType = "single"
0036 
0037     # Find today
0038     today = date.today()
0039     dayFormat = today.strftime("%Y-%m-%d")
0040 
0041     # Start with single JetHT jobs
0042     if not runType in config["validations"]["JetHT"]: 
0043         raise Exception("No 'single' key word in config for JetHT") 
0044 
0045     for datasetName in config["validations"]["JetHT"][runType]:
0046 
0047         for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
0048             # Work directory for each alignment
0049             workDir = "{}/JetHT/{}/{}/{}".format(validationDir, runType, datasetName, alignment)
0050 
0051             # Write local config
0052             local = {}
0053             local["output"] = "{}/{}/JetHT/{}/{}/{}".format(config["LFS"], config["name"], runType, datasetName, alignment)
0054             local["alignment"] = copy.deepcopy(config["alignments"][alignment])
0055             local["validation"] = copy.deepcopy(config["validations"]["JetHT"][runType][datasetName])
0056             local["validation"].pop("alignments")
0057 
0058             useCMSdataset = False
0059             nInputFiles = 1
0060             runsInFiles = []
0061             if "dataset" in config["validations"]["JetHT"][runType][datasetName]:
0062                 inputList = config["validations"]["JetHT"][runType][datasetName]["dataset"]
0063 
0064                 # Check if the input is a CMS dataset instead of filelist
0065                 if re.match( r'^/[^/.]+/[^/.]+/[^/.]+$', inputList ):
0066                     useCMSdataset = True
0067 
0068                 # If it is not, read the number of files in a given filelist
0069                 else:
0070                     runsInFiles, nInputFiles = findNumberOfUnits(inputList)
0071             else:
0072                 inputList = "needToHaveSomeDefaultFileHere.txt"
0073 
0074             if "filesPerJob" in config["validations"]["JetHT"][runType][datasetName]:
0075                 filesPerJob = config["validations"]["JetHT"][runType][datasetName]["filesPerJob"]
0076             else:
0077                 filesPerJob = 5
0078 
0079             # If we have defined which runs can be found from which files, we want to define one condor job for run number. Otherwise we do file based splitting.
0080             oneJobForEachRun = (len(runsInFiles) > 0)
0081             if oneJobForEachRun:
0082                 nCondorJobs = nInputFiles
0083                 local["runsInFiles"] = runsInFiles
0084             else:
0085                 nCondorJobs = math.ceil(nInputFiles / filesPerJob)
0086  
0087             # Define lines that need to be changed from the template crab configuration
0088             crabCustomConfiguration = {"overwrite":[], "remove":[], "add":[]}
0089             crabCustomConfiguration["overwrite"].append("inputList = \'{}\'".format(inputList))
0090             crabCustomConfiguration["overwrite"].append("jobTag = \'TkAlJetHTAnalysis_{}_{}_{}_{}\'".format(runType, datasetName, alignment, dayFormat))
0091             crabCustomConfiguration["overwrite"].append("config.Data.unitsPerJob = {}".format(filesPerJob))
0092 
0093             # If there is a CMS dataset defined instead of input file list, make corresponding changes in the configuration file
0094             if useCMSdataset:
0095                 crabCustomConfiguration["remove"].append("inputList")
0096                 crabCustomConfiguration["remove"].append("config.Data.userInputFiles")
0097                 crabCustomConfiguration["remove"].append("config.Data.totalUnits")
0098                 crabCustomConfiguration["remove"].append("config.Data.outputPrimaryDataset")
0099                 crabCustomConfiguration["overwrite"].pop(0) # Remove inputList from overwrite actions, it is removed for CMS dataset
0100                 crabCustomConfiguration["add"].append("config.Data.inputDataset = \'{}\'".format(inputList))
0101                 crabCustomConfiguration["add"].append("config.Data.inputDBS = \'global\'")
0102                 
0103             local["crabCustomConfiguration"] = crabCustomConfiguration
0104 
0105             # Write job info
0106             job = {
0107                 "name": "JetHT_{}_{}_{}".format(runType, alignment, datasetName),
0108                 "dir": workDir,
0109                 "exe": "cmsRun",
0110                 "cms-config": "{}/src/Alignment/OfflineValidation/python/TkAlAllInOneTool/JetHT_cfg.py".format(os.environ["CMSSW_BASE"]),
0111                 "run-mode": "Condor",
0112                 "nCondorJobs": nCondorJobs,
0113                 "exeArguments": "validation_cfg.py config=validation.json jobNumber=$JOBNUMBER",
0114                 "dependencies": [],
0115                 "config": local, 
0116             }
0117 
0118             jobs.append(job)
0119 
0120     # Merge jobs for JetHT
0121     if "merge" in config["validations"]["JetHT"]:
0122         ##List with merge jobs, will be expanded to jobs after looping
0123         runType = "merge"
0124 
0125         ##Loop over all merge jobs/IOVs which are wished
0126         for datasetName in config["validations"]["JetHT"][runType]:
0127 
0128             for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
0129 
0130                 #Work directory for each alignment
0131                 workDir = "{}/JetHT/{}/{}/{}".format(validationDir, runType, datasetName, alignment)
0132 
0133                 inputDirectory = "{}/{}/JetHT/single/{}/{}".format(config["LFS"], config["name"], datasetName, alignment)
0134                 outputDirectory = "{}/{}/JetHT/{}/{}/{}".format(config["LFS"], config["name"], runType, datasetName, alignment)
0135 
0136                 # Configuration for validateAlignments script
0137                 local = {}
0138                 local["output"] = outputDirectory
0139 
0140                 # For eos directory, remove /eos/cms from the beginning of LFS
0141                 eosInputDirectory = inputDirectory
0142                 eosOutputDirectory = outputDirectory
0143 
0144                 if inputDirectory.startswith("/eos/cms"):
0145                     eosInputDirectory = inputDirectory[8:]
0146                     eosOutputDirectory = outputDirectory[8:]
0147 
0148                 # If the directory name starts with /store, we must be working with eos files
0149                 localRun = "true"
0150                 if eosInputDirectory.startswith("/store"):
0151                     localRun = "false"
0152 
0153                 #Write job info
0154                 job = {
0155                     "name": "JetHT_{}_{}_{}".format(runType, alignment, datasetName),
0156                     "dir": workDir,
0157                     "exe": "addHistograms.sh",
0158                     "exeArguments": "{} {} {} JetHTAnalysis_merged".format(localRun, eosInputDirectory, eosOutputDirectory),
0159                     "run-mode": "Condor",
0160                     "flavour": "espresso",
0161                     "config": local,
0162                     "dependencies": [],
0163                 }
0164 
0165                 ##Loop over all single jobs and set them dependencies for the merge job
0166                 for singleJob in jobs:
0167                     ##Get single job info and append to merge job if requirements fullfilled
0168                     singleAlignment, singleDatasetName = singleJob["name"].split("_")[2:]
0169 
0170                     if singleDatasetName in config["validations"]["JetHT"][runType][datasetName]["singles"]:
0171                         if singleAlignment == alignment:
0172                             job["dependencies"].append(singleJob["name"])
0173 
0174                 mergeJobs.append(job)
0175 
0176         jobs.extend(mergeJobs)
0177 
0178     # Plotting for JetHT
0179     if "plot" in config["validations"]["JetHT"]:
0180         ##List with merge jobs, will be expanded to jobs after looping
0181         plotJobs = []
0182         runType = "plot"
0183 
0184         ##Loop over all merge jobs/IOVs which are wished
0185         for datasetName in config["validations"]["JetHT"][runType]:
0186 
0187             #Work and output directories for each dataset
0188             workDir = "{}/JetHT/{}/{}".format(validationDir, runType, datasetName)
0189             outputDirectory = "{}/{}/JetHT/{}/{}".format(config["LFS"], config["name"], runType, datasetName)
0190 
0191             # Configuration for validateAlignments script
0192             local = {}
0193             if "jethtplot" in config["validations"]["JetHT"][runType][datasetName]:
0194                 local["jethtplot"] = copy.deepcopy(config["validations"]["JetHT"][runType][datasetName]["jethtplot"])
0195             local["output"] = outputDirectory
0196 
0197             # If pT binning changed for validation job, need to change it for plotting also
0198             if "profilePtBorders" in config["validations"]["JetHT"]["single"][datasetName]:
0199                 local["jethtplot"]["widePtBinBorders"] = config["validations"]["JetHT"]["single"][datasetName]["profilePtBorders"]
0200 
0201             local["jethtplot"]["alignments"] = {}
0202 
0203             # Draw all the alignments for each dataset to same plot
0204             for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
0205 
0206                 inputDirectory = "{}/{}/JetHT/merge/{}/{}".format(config["LFS"], config["name"], datasetName, alignment)
0207 
0208                 eosInputFile = inputDirectory + "/JetHTAnalysis_merged.root"
0209 
0210                 # If eos file path is given, remove /eos/cms from the beginning of the file name
0211                 if eosInputFile.startswith("/eos/cms"):
0212                     eosInputFile = eosInputFile[8:]
0213 
0214                 # If the file name starts with /store, add the CERN EOS path to the file name
0215                 if eosInputFile.startswith("/store"):
0216                     eosInputFile = "root://eoscms.cern.ch/" + eosInputFile
0217 
0218                 local["jethtplot"]["alignments"][alignment] = copy.deepcopy(config["alignments"][alignment])
0219                 local["jethtplot"]["alignments"][alignment]["inputFile"] = eosInputFile
0220                 local["jethtplot"]["alignments"][alignment]["legendText"] = config["alignments"][alignment]["title"]
0221 
0222             # Check that luminosity per IOV file is defined
0223             if not "lumiPerIovFile" in local["jethtplot"]:
0224                 local["jethtplot"]["lumiPerIovFile"] = fnc.digest_path("Alignment/OfflineValidation/data/lumiPerRun_Run2.txt")
0225 
0226             #Write job info
0227             job = {
0228                 "name": "JetHT_{}_{}".format(runType, datasetName),
0229                 "dir": workDir,
0230                 "exe": "jetHtPlotter",
0231                 "run-mode": "Condor",
0232                 "flavour": "espresso",
0233                 "config": local,
0234                 "dependencies": [],
0235             }
0236 
0237             ##Loop over all merge jobs and set them dependencies for the plot job
0238             for mergeJob in mergeJobs:
0239                 ##Get merge job info and append to plot job if requirements are fulfilled
0240                 mergeAlignment, mergeDatasetName = mergeJob["name"].split("_")[2:]
0241 
0242                 if mergeDatasetName in config["validations"]["JetHT"][runType][datasetName]["merges"]:
0243                     job["dependencies"].append(mergeJob["name"])
0244 
0245             plotJobs.append(job)
0246 
0247         jobs.extend(plotJobs)
0248         
0249     return jobs