File indexing completed on 2024-10-03 05:26:34
0001
0002
0003 import json
0004 import argparse
0005 import subprocess
0006 import multiprocessing
0007 from pprint import pprint
0008 from dbs.apis.dbsClient import DbsApi
0009 from random import shuffle
0010 import time
0011 import os
0012
0013 def parser():
0014 parser = argparse.ArgumentParser(description='Create json config files for your defined IOV')
0015
0016 parser.add_argument("--json-input", type = str, help = "Input json file", default = {})
0017 parser.add_argument("--data-txt", type = str, help = "Txt file with data set names", required = True)
0018 parser.add_argument("--N-max-IOV", type = int, help = "Maximum number of events per IOV", default = 1e20)
0019 parser.add_argument("--rm-bad-runs", type = str, help = "Remove bad runs from json config")
0020 parser.add_argument("--iov-txt", type = str, help = "Txt file with IOV boundaries", default = [])
0021 parser.add_argument("--out-data", type = str, help = "Name of skimmed file with list of data file names", default = "skimmed_dataset")
0022 parser.add_argument("--out-dir", type = str, help = "Output dir name", default = "configs_" + "_".join([str(time.localtime()[i]) for i in range(6)]))
0023
0024
0025 return parser.parse_args()
0026
0027
0028
0029 def getFileInfo(filename):
0030 print("Processing: {}".format(filename))
0031
0032
0033 try:
0034 edmFileUtilArgs = ['edmFileUtil', '-f', filename, '--eventsInLumis']
0035 fullRunInfo = subprocess.check_output(edmFileUtilArgs).split()[14:]
0036 runInfo = [tuple(fullRunInfo[index:index+3]) for index in range(0, len(fullRunInfo), 3)]
0037
0038
0039 except:
0040 print("Not at CERN {}".format(filename))
0041 runInfo = filename
0042
0043 return runInfo
0044
0045
0046 def getFileList(dataset):
0047
0048 filelist = []
0049 emptyfiles = []
0050 nEvents = 0
0051
0052
0053 dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
0054
0055 print("Processing: {}".format(dataset))
0056 sites = subprocess.check_output(["dasgoclient", "--query", "site dataset={}".format(dataset)]).split()
0057
0058 if "T2_CH_CERN" in sites:
0059 for f in dbs.listFileArray(dataset=dataset.replace("\n", ""), detail=1):
0060 filename = f['logical_file_name']
0061 nevents = f['event_count']
0062
0063 if nevents != 0:
0064 filelist.append(filename)
0065 nEvents += f['event_count']
0066
0067 else:
0068 emptyfiles.append(filename)
0069
0070 else:
0071 print("Not at CERN {}".format(dataset))
0072
0073 return filelist, emptyfiles, nEvents
0074
0075
0076 def fillJson(runJson, listIOV, filelist, nMax, outDir):
0077
0078 sort = lambda lower, run, upper: lower < int(run) < upper
0079
0080
0081 if listIOV:
0082 lowerBoundaries = [int(run) for run in listIOV[:-1]]
0083 upperBoundaries = [int(run)-1 for run in listIOV[1:]]
0084
0085 else:
0086 lowerBoundaries = [0.]
0087 upperBoundaries = [1e20]
0088
0089
0090 pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
0091 results = [pool.apply_async(getFileInfo, (filename,)) for filename in filelist]
0092 output = [result.get() for result in results]
0093
0094 fileInfo = [result for result in output if type(result) == list]
0095 notAtCern = [result for result in output if type(result) == str]
0096
0097
0098 with open("{}/filesNotAtCern.txt".format(outDir), "w") as filesNotCern:
0099 for filename in notAtCern:
0100 filesNotCern.write(filename)
0101 filesNotCern.write("\n")
0102
0103 runDic = {}
0104
0105
0106 for (runInfo, filename) in zip(fileInfo, filelist):
0107 for (run, lumi, events) in runInfo:
0108 if events not in ["Events", "Lumi"]:
0109 try:
0110 runDic[int(run)][int(lumi)] = (int(events), filename)
0111
0112 except KeyError:
0113 runDic[int(run)] = {int(lumi): (int(events), filename)}
0114
0115
0116 jsonAlign = [{} for index in lowerBoundaries]
0117 jsonVali = [{} for index in lowerBoundaries]
0118 eventsInTotal = [0 for index in lowerBoundaries]
0119 eventsInAlign = [0 for index in lowerBoundaries]
0120 eventsInVali = [0 for index in lowerBoundaries]
0121
0122
0123 if runJson:
0124 runJson = runJson.items()
0125 shuffle(runJson)
0126 filelist = {}
0127
0128 else:
0129 return jsonAlign, jsonVali, set(filelist)
0130
0131
0132 for (run, value) in runJson:
0133 try:
0134
0135 index = [sort(lower, run, upper) for (lower, upper) in zip(lowerBoundaries, upperBoundaries)].index(True)
0136
0137
0138 if int(run) in runDic:
0139 alignLumi = [[]]
0140 valiLumi = [[]]
0141
0142
0143 for (lumi, lumiInfo) in runDic[int(run)].iteritems():
0144 eventsInTotal[index] += lumiInfo[0]
0145
0146
0147 if eventsInAlign[index] < nMax:
0148 if not True in [sort(lower, lumi, upper) for lower, upper in value]:
0149 if len(alignLumi[-1]) != 0:
0150 alignLumi.append([])
0151 continue
0152
0153 eventsInAlign[index] += lumiInfo[0]
0154 filelist.setdefault(index, set()).add(lumiInfo[1])
0155
0156 if len(alignLumi[-1]) == 0:
0157 alignLumi[-1] = [lumi, lumi]
0158
0159 else:
0160 alignLumi[-1][1] = lumi
0161
0162 else:
0163 if not True in [sort(lower, lumi, upper) for lower, upper in value]:
0164 if len(valiLumi[-1]) != 0:
0165 valiLumi.append([])
0166 continue
0167
0168 eventsInVali[index] += lumiInfo[0]
0169 if len(valiLumi[-1]) == 0:
0170 valiLumi[-1] = [lumi, lumi]
0171
0172 else:
0173 valiLumi[-1][1] = lumi
0174
0175 alignLumi = [element for element in alignLumi if len(element) != 0]
0176 valiLumi = [element for element in valiLumi if len(element) != 0]
0177
0178 if len(alignLumi) != 0:
0179 jsonAlign[index][str(run)] = alignLumi
0180
0181 if len(valiLumi) != 0:
0182 jsonVali[index][str(run)] = valiLumi
0183
0184
0185 except ValueError:
0186
0187 pass
0188
0189
0190
0191 with open("{}/eventsUsed.txt".format(outDir), "w") as eventsUsed:
0192 for index in range(len(eventsInTotal)):
0193 eventsUsed.write("Events used in Total for IOV {}: {}".format(lowerBoundaries[index], eventsInTotal[index]) + "\n")
0194 eventsUsed.write("Events used for Alignment for IOV {}: {}".format(lowerBoundaries[index], eventsInAlign[index]) + "\n")
0195 eventsUsed.write("Events used for Validation for IOV {}: {}".format(lowerBoundaries[index], eventsInVali[index]) + "\n")
0196
0197 return jsonAlign, jsonVali, filelist
0198
0199
0200 def main():
0201
0202 args = parser()
0203
0204
0205 os.system("mkdir -p {}".format(args.out_dir))
0206
0207
0208 filelist = []
0209 emptyfiles = []
0210 nEvents = []
0211 pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
0212
0213 with open(args.data_txt, "r") as datasets:
0214 results = [pool.apply_async(getFileList, (dataset.replace("\n", ""),)) for dataset in datasets.readlines()]
0215
0216 for result in results:
0217 files, empties, events = result.get()
0218 filelist.extend(files)
0219 emptyfiles.extend(empties)
0220 nEvents.append(events)
0221
0222 with open("{}/emptyFiles.txt".format(args.out_dir), "w") as empty:
0223 for emptyFile in emptyfiles:
0224 empty.write(emptyFile + '\n')
0225
0226
0227 if args.iov_txt:
0228 with open(args.iov_txt) as fIOV:
0229 listIOV = [line.strip() for line in fIOV]
0230
0231 else:
0232 listIOV = args.iov_txt
0233
0234
0235 if args.json_input:
0236 with open(args.json_input) as fJson:
0237 runJson = json.load(fJson)
0238
0239 else:
0240 runJson = args.json_input
0241
0242
0243 jsonAlign, jsonVali, filelist = fillJson(runJson, listIOV, filelist, args.N_max_IOV, args.out_dir)
0244
0245
0246 if args.rm_bad_runs != None:
0247 with open(args.rm_bad_runs, "r") as badRuns:
0248 for badRun in badRuns:
0249 for dic in jsonAlign:
0250 dic.pop(int(badRun), None)
0251
0252 for dic in jsonVali:
0253 dic.pop(int(badRun), None)
0254
0255
0256
0257 pyTempl = """import FWCore.ParameterSet.Config as cms
0258 import FWCore.PythonUtilities.LumiList as LumiList
0259
0260 lumiSecs = cms.untracked.VLuminosityBlockRange()
0261 goodLumiSecs = LumiList.LumiList(filename = '{json}').getCMSSWString().split(',')
0262 readFiles = cms.untracked.vstring()
0263 source = cms.Source("PoolSource",
0264 lumisToProcess = lumiSecs,
0265 fileNames = readFiles)
0266 readFiles.extend([
0267 {filenames}
0268 ])
0269 lumiSecs.extend(goodLumiSecs)
0270 maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1))
0271 """
0272
0273
0274 if not args.iov_txt:
0275 with open("{}/{}.txt".format(args.out_dir, args.out_data), "w") as outData:
0276 for filename in filelist:
0277 outData.write(filename + '\n')
0278
0279
0280 if args.iov_txt and args.json_input:
0281 for index, (jsonContent, runNumber) in enumerate(zip(jsonAlign, [int(run) for run in listIOV[:-1]])):
0282 with open("{}/IOV_Align_{}.json".format(args.out_dir, runNumber), "w") as fAlignJson:
0283 json.dump(jsonContent, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
0284
0285 for (jsonContent, runNumber) in zip(jsonVali, [int(run) for run in listIOV[:-1]]):
0286 with open("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber), "w") as fValiJson:
0287 json.dump(jsonContent, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
0288
0289 with open("{}/{}_since{}_cff.py".format(args.out_dir, args.out_data, runNumber), "w") as outData:
0290 outData.write(pyTempl.format(json=os.path.abspath("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber)), filenames=",\n".join(["'{}'".format(filename) for filename in filelist[index]])))
0291
0292 if args.json_input:
0293 mergeJsonAlign = {}
0294 [mergeJsonAlign.update(jsonDic) for jsonDic in jsonAlign]
0295
0296 mergeJsonVali = {}
0297 [mergeJsonVali.update(jsonDic) for jsonDic in jsonVali]
0298
0299 with open("{}/Align.json".format(args.out_dir, runNumber), "w") as fAlignJson:
0300 json.dump(mergeJsonAlign, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
0301
0302 with open("{}/Vali.json".format(args.out_dir, runNumber), "w") as fValiJson:
0303 json.dump(mergeJsonVali, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
0304
0305 if not os.path.exists("{}/eventsUsed.txt".format(args.out_dir)):
0306 with open("{}/eventsUsed.txt".format(args.out_dir), "w") as eventsUsed:
0307 eventsUsed.write("Events used for Alignment: {}".format(sum(nEvents)) + "\n")
0308 eventsUsed.write("Events used for Validation: {}".format(0) + "\n")
0309
0310 if __name__ == "__main__":
0311 main()