1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
|
import copy
import os
import math
import re
from datetime import date
import Alignment.OfflineValidation.TkAlAllInOneTool.findAndChange as fnc
# Find number of files on a file list. If the list defines a run number before each file, find the number of unique runs instead and return a list of runs with the number
def findNumberOfUnits(fileList):
with open(fileList,"r") as inputFiles:
fileContent = inputFiles.readlines()
firstLine = fileContent[0].rstrip()
runsInFiles = []
# If each line only contains one file, return the number of files
if len(firstLine.split()) == 1:
nInputFiles = sum(1 for line in fileContent if line.rstrip())
return runsInFiles, nInputFiles
# We now know that the input file is in format "run file". Return the number of unique runs together with the list
for line in fileContent:
run = line.split()[0]
if not run in runsInFiles:
runsInFiles.append(run)
return runsInFiles, len(runsInFiles)
def JetHT(config, validationDir):
# List with all and merge jobs
jobs = []
mergeJobs = []
runType = "single"
# Find today
today = date.today()
dayFormat = today.strftime("%Y-%m-%d")
# Start with single JetHT jobs
if not runType in config["validations"]["JetHT"]:
raise Exception("No 'single' key word in config for JetHT")
for datasetName in config["validations"]["JetHT"][runType]:
for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
# Work directory for each alignment
workDir = "{}/JetHT/{}/{}/{}".format(validationDir, runType, datasetName, alignment)
# Write local config
local = {}
local["output"] = "{}/{}/JetHT/{}/{}/{}".format(config["LFS"], config["name"], runType, datasetName, alignment)
local["alignment"] = copy.deepcopy(config["alignments"][alignment])
local["validation"] = copy.deepcopy(config["validations"]["JetHT"][runType][datasetName])
local["validation"].pop("alignments")
useCMSdataset = False
nInputFiles = 1
runsInFiles = []
if "dataset" in config["validations"]["JetHT"][runType][datasetName]:
inputList = config["validations"]["JetHT"][runType][datasetName]["dataset"]
# Check if the input is a CMS dataset instead of filelist
if re.match( r'^/[^/.]+/[^/.]+/[^/.]+$', inputList ):
useCMSdataset = True
# If it is not, read the number of files in a given filelist
else:
runsInFiles, nInputFiles = findNumberOfUnits(inputList)
else:
inputList = "needToHaveSomeDefaultFileHere.txt"
if "filesPerJob" in config["validations"]["JetHT"][runType][datasetName]:
filesPerJob = config["validations"]["JetHT"][runType][datasetName]["filesPerJob"]
else:
filesPerJob = 5
# If we have defined which runs can be found from which files, we want to define one condor job for run number. Otherwise we do file based splitting.
oneJobForEachRun = (len(runsInFiles) > 0)
if oneJobForEachRun:
nCondorJobs = nInputFiles
local["runsInFiles"] = runsInFiles
else:
nCondorJobs = math.ceil(nInputFiles / filesPerJob)
# Define lines that need to be changed from the template crab configuration
crabCustomConfiguration = {"overwrite":[], "remove":[], "add":[]}
crabCustomConfiguration["overwrite"].append("inputList = \'{}\'".format(inputList))
crabCustomConfiguration["overwrite"].append("jobTag = \'TkAlJetHTAnalysis_{}_{}_{}_{}\'".format(runType, datasetName, alignment, dayFormat))
crabCustomConfiguration["overwrite"].append("config.Data.unitsPerJob = {}".format(filesPerJob))
# If there is a CMS dataset defined instead of input file list, make corresponding changes in the configuration file
if useCMSdataset:
crabCustomConfiguration["remove"].append("inputList")
crabCustomConfiguration["remove"].append("config.Data.userInputFiles")
crabCustomConfiguration["remove"].append("config.Data.totalUnits")
crabCustomConfiguration["remove"].append("config.Data.outputPrimaryDataset")
crabCustomConfiguration["overwrite"].pop(0) # Remove inputList from overwrite actions, it is removed for CMS dataset
crabCustomConfiguration["add"].append("config.Data.inputDataset = \'{}\'".format(inputList))
crabCustomConfiguration["add"].append("config.Data.inputDBS = \'global\'")
local["crabCustomConfiguration"] = crabCustomConfiguration
# Write job info
job = {
"name": "JetHT_{}_{}_{}".format(runType, alignment, datasetName),
"dir": workDir,
"exe": "cmsRun",
"cms-config": "{}/src/Alignment/OfflineValidation/python/TkAlAllInOneTool/JetHT_cfg.py".format(os.environ["CMSSW_BASE"]),
"run-mode": "Condor",
"nCondorJobs": nCondorJobs,
"exeArguments": "validation_cfg.py config=validation.json jobNumber=$JOBNUMBER",
"dependencies": [],
"config": local,
}
jobs.append(job)
# Merge jobs for JetHT
if "merge" in config["validations"]["JetHT"]:
##List with merge jobs, will be expanded to jobs after looping
runType = "merge"
##Loop over all merge jobs/IOVs which are wished
for datasetName in config["validations"]["JetHT"][runType]:
for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
#Work directory for each alignment
workDir = "{}/JetHT/{}/{}/{}".format(validationDir, runType, datasetName, alignment)
inputDirectory = "{}/{}/JetHT/single/{}/{}".format(config["LFS"], config["name"], datasetName, alignment)
outputDirectory = "{}/{}/JetHT/{}/{}/{}".format(config["LFS"], config["name"], runType, datasetName, alignment)
# Configuration for validateAlignments script
local = {}
local["output"] = outputDirectory
# For eos directory, remove /eos/cms from the beginning of LFS
eosInputDirectory = inputDirectory
eosOutputDirectory = outputDirectory
if inputDirectory.startswith("/eos/cms"):
eosInputDirectory = inputDirectory[8:]
eosOutputDirectory = outputDirectory[8:]
# If the directory name starts with /store, we must be working with eos files
localRun = "true"
if eosInputDirectory.startswith("/store"):
localRun = "false"
#Write job info
job = {
"name": "JetHT_{}_{}_{}".format(runType, alignment, datasetName),
"dir": workDir,
"exe": "addHistograms.sh",
"exeArguments": "{} {} {} JetHTAnalysis_merged".format(localRun, eosInputDirectory, eosOutputDirectory),
"run-mode": "Condor",
"flavour": "espresso",
"config": local,
"dependencies": [],
}
##Loop over all single jobs and set them dependencies for the merge job
for singleJob in jobs:
##Get single job info and append to merge job if requirements fullfilled
singleAlignment, singleDatasetName = singleJob["name"].split("_")[2:]
if singleDatasetName in config["validations"]["JetHT"][runType][datasetName]["singles"]:
if singleAlignment == alignment:
job["dependencies"].append(singleJob["name"])
mergeJobs.append(job)
jobs.extend(mergeJobs)
# Plotting for JetHT
if "plot" in config["validations"]["JetHT"]:
##List with merge jobs, will be expanded to jobs after looping
plotJobs = []
runType = "plot"
##Loop over all merge jobs/IOVs which are wished
for datasetName in config["validations"]["JetHT"][runType]:
#Work and output directories for each dataset
workDir = "{}/JetHT/{}/{}".format(validationDir, runType, datasetName)
outputDirectory = "{}/{}/JetHT/{}/{}".format(config["LFS"], config["name"], runType, datasetName)
# Configuration for validateAlignments script
local = {}
if "jethtplot" in config["validations"]["JetHT"][runType][datasetName]:
local["jethtplot"] = copy.deepcopy(config["validations"]["JetHT"][runType][datasetName]["jethtplot"])
local["output"] = outputDirectory
# If pT binning changed for validation job, need to change it for plotting also
if "profilePtBorders" in config["validations"]["JetHT"]["single"][datasetName]:
local["jethtplot"]["widePtBinBorders"] = config["validations"]["JetHT"]["single"][datasetName]["profilePtBorders"]
local["jethtplot"]["alignments"] = {}
# Draw all the alignments for each dataset to same plot
for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
inputDirectory = "{}/{}/JetHT/merge/{}/{}".format(config["LFS"], config["name"], datasetName, alignment)
eosInputFile = inputDirectory + "/JetHTAnalysis_merged.root"
# If eos file path is given, remove /eos/cms from the beginning of the file name
if eosInputFile.startswith("/eos/cms"):
eosInputFile = eosInputFile[8:]
# If the file name starts with /store, add the CERN EOS path to the file name
if eosInputFile.startswith("/store"):
eosInputFile = "root://eoscms.cern.ch/" + eosInputFile
local["jethtplot"]["alignments"][alignment] = copy.deepcopy(config["alignments"][alignment])
local["jethtplot"]["alignments"][alignment]["inputFile"] = eosInputFile
local["jethtplot"]["alignments"][alignment]["legendText"] = config["alignments"][alignment]["title"]
# Check that luminosity per IOV file is defined
if not "lumiPerIovFile" in local["jethtplot"]:
local["jethtplot"]["lumiPerIovFile"] = fnc.digest_path("Alignment/OfflineValidation/data/lumiPerRun_Run2.txt")
#Write job info
job = {
"name": "JetHT_{}_{}".format(runType, datasetName),
"dir": workDir,
"exe": "jetHtPlotter",
"run-mode": "Condor",
"flavour": "espresso",
"config": local,
"dependencies": [],
}
##Loop over all merge jobs and set them dependencies for the plot job
for mergeJob in mergeJobs:
##Get merge job info and append to plot job if requirements are fulfilled
mergeAlignment, mergeDatasetName = mergeJob["name"].split("_")[2:]
if mergeDatasetName in config["validations"]["JetHT"][runType][datasetName]["merges"]:
job["dependencies"].append(mergeJob["name"])
plotJobs.append(job)
jobs.extend(plotJobs)
return jobs
|