Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:57:12

0001 #!/usr/bin/env python
0002 
0003 import json
0004 import argparse
0005 import subprocess
0006 import multiprocessing
0007 from pprint import pprint
0008 from dbs.apis.dbsClient import DbsApi
0009 from random import shuffle
0010 import time
0011 import os
0012 
0013 def parser():
0014     parser = argparse.ArgumentParser(description='Create json config files for your defined IOV')
0015     
0016     parser.add_argument("--json-input", type = str, help = "Input json file", default = {})
0017     parser.add_argument("--data-txt", type = str, help = "Txt file with data set names", required = True)
0018     parser.add_argument("--N-max-IOV", type = int, help = "Maximum number of events per IOV", default = 1e20) 
0019     parser.add_argument("--rm-bad-runs", type = str, help = "Remove bad runs from json config")
0020     parser.add_argument("--iov-txt", type = str, help = "Txt file with IOV boundaries", default = [])
0021     parser.add_argument("--out-data", type = str, help = "Name of skimmed file with list of data file names", default = "skimmed_dataset")
0022     parser.add_argument("--out-dir", type = str, help = "Output dir name", default = "configs_" + "_".join([str(time.localtime()[i]) for i in range(6)]))
0023 
0024 
0025     return parser.parse_args()
0026 
0027 
0028 ##Called in fillJson function in parallel
0029 def getFileInfo(filename):
0030     print "Processing: {}".format(filename)
0031 
0032     ##Get file info
0033     try:
0034         edmFileUtilArgs = ['edmFileUtil', '-f', filename, '--eventsInLumis']
0035         fullRunInfo = subprocess.check_output(edmFileUtilArgs).split()[14:]
0036         runInfo = [tuple(fullRunInfo[index:index+3]) for index in range(0, len(fullRunInfo), 3)]
0037 
0038     ##File not at CERN
0039     except:
0040         print "Not at CERN {}".format(filename)
0041         runInfo = filename
0042 
0043     return runInfo
0044 
0045 ##Called in paralell in the main function
0046 def getFileList(dataset):
0047 ##File list which will contain skimmed file names
0048     filelist = []
0049     emptyfiles = []
0050     nEvents = 0
0051 
0052     ##Find files in dataset
0053     dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
0054 
0055     print "Processing: {}".format(dataset)
0056     sites = subprocess.check_output(["dasgoclient", "--query", "site dataset={}".format(dataset)]).split()
0057 
0058     if "T2_CH_CERN" in sites:
0059         for f in dbs.listFileArray(dataset=dataset.replace("\n", ""), detail=1):
0060             filename = f['logical_file_name']
0061             nevents = f['event_count']
0062 
0063             if nevents != 0:    
0064                 filelist.append(filename)
0065                 nEvents += f['event_count']
0066 
0067             else:
0068                 emptyfiles.append(filename)
0069 
0070     else:
0071         print "Not at CERN {}".format(dataset)
0072 
0073     return filelist, emptyfiles, nEvents
0074 
0075 
0076 def fillJson(runJson, listIOV, filelist, nMax, outDir):
0077     ##Function for finding run in IOV intervall
0078     sort = lambda lower, run, upper: lower < int(run) < upper
0079 
0080     ##Boundaries of IOVS
0081     if listIOV:
0082         lowerBoundaries = [int(run) for run in listIOV[:-1]]
0083         upperBoundaries = [int(run)-1 for run in listIOV[1:]]
0084 
0085     else:
0086         lowerBoundaries = [0.]
0087         upperBoundaries = [1e20]
0088    
0089     ##Get file information (run number, events) in paralell
0090     pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
0091     results = [pool.apply_async(getFileInfo, (filename,)) for filename in filelist]
0092     output = [result.get() for result in results]
0093 
0094     fileInfo = [result for result in output if type(result) == list]
0095     notAtCern = [result for result in output if type(result) == str]
0096 
0097     ##Write out files which are not at CERN
0098     with open("{}/filesNotAtCern.txt".format(outDir), "w") as filesNotCern:
0099         for filename in notAtCern:
0100             filesNotCern.write(filename)
0101             filesNotCern.write("\n")
0102 
0103     runDic = {}
0104 
0105     ##Fill dic like {runNumber: {lumi: (events, filenames), lumi2: (...)}}
0106     for (runInfo, filename) in zip(fileInfo, filelist):
0107         for (run, lumi, events) in runInfo:
0108             if events not in ["Events", "Lumi"]:
0109                 try:
0110                     runDic[int(run)][int(lumi)] = (int(events), filename)
0111 
0112                 except KeyError:
0113                     runDic[int(run)] = {int(lumi): (int(events), filename)}
0114 
0115     ##json configs for IOV
0116     jsonAlign = [{} for index in lowerBoundaries]
0117     jsonVali = [{} for index in lowerBoundaries]
0118     eventsInTotal = [0 for index in lowerBoundaries]
0119     eventsInAlign = [0 for index in lowerBoundaries]
0120     eventsInVali = [0 for index in lowerBoundaries]
0121 
0122     ##Shuffle runJson to have random run number position
0123     if runJson:
0124         runJson = runJson.items()
0125         shuffle(runJson)
0126         filelist = {}
0127 
0128     else:
0129         return jsonAlign, jsonVali, set(filelist)
0130 
0131     ##Loop over json input file
0132     for (run, value) in runJson:
0133         try:
0134             ##Check if run is in IOV boundaries and check in which IOV
0135             index = [sort(lower, run, upper) for (lower, upper) in zip(lowerBoundaries, upperBoundaries)].index(True)
0136         
0137             ##Check if run is one of files
0138             if int(run) in runDic:
0139                 alignLumi = [[]]
0140                 valiLumi = [[]]
0141 
0142                 ##Loop over all lumi section of a run
0143                 for (lumi, lumiInfo) in runDic[int(run)].iteritems():
0144                     eventsInTotal[index] += lumiInfo[0]
0145 
0146                     ##Add events from lumi section
0147                     if eventsInAlign[index] < nMax:
0148                         if not True in [sort(lower, lumi, upper) for lower, upper in value]:
0149                             if len(alignLumi[-1]) != 0:
0150                                 alignLumi.append([])
0151                             continue
0152 
0153                         eventsInAlign[index] += lumiInfo[0]
0154                         filelist.setdefault(index, set()).add(lumiInfo[1])
0155 
0156                         if len(alignLumi[-1]) == 0:
0157                             alignLumi[-1] = [lumi, lumi]
0158 
0159                         else:
0160                             alignLumi[-1][1] = lumi
0161 
0162                     else:
0163                         if not True in [sort(lower, lumi, upper) for lower, upper in value]:
0164                             if len(valiLumi[-1]) != 0:
0165                                 valiLumi.append([])
0166                             continue
0167 
0168                         eventsInVali[index] += lumiInfo[0]
0169                         if len(valiLumi[-1]) == 0:
0170                             valiLumi[-1] = [lumi, lumi]
0171 
0172                         else:
0173                             valiLumi[-1][1] = lumi
0174 
0175                 alignLumi = [element for element in alignLumi if len(element) != 0]
0176                 valiLumi = [element for element in valiLumi if len(element) != 0]
0177 
0178                 if len(alignLumi) != 0:
0179                     jsonAlign[index][str(run)] = alignLumi
0180 
0181                 if len(valiLumi) != 0:
0182                     jsonVali[index][str(run)] = valiLumi
0183                         
0184 
0185         except ValueError:
0186             ##run of json file is not in IOV boundaries
0187             pass
0188 
0189     
0190     ##Write out events for Alignment/Validation
0191     with open("{}/eventsUsed.txt".format(outDir), "w") as eventsUsed:
0192         for index in range(len(eventsInTotal)):
0193             eventsUsed.write("Events used in Total for IOV {}: {}".format(lowerBoundaries[index], eventsInTotal[index]) + "\n")
0194             eventsUsed.write("Events used for Alignment for IOV {}: {}".format(lowerBoundaries[index], eventsInAlign[index]) + "\n")
0195             eventsUsed.write("Events used for Validation for IOV {}: {}".format(lowerBoundaries[index], eventsInVali[index]) + "\n")
0196 
0197     return jsonAlign, jsonVali, filelist
0198         
0199 
0200 def main():
0201     ##Get parser arguments
0202     args = parser()
0203 
0204     ##create dir for all the configs
0205     os.system("mkdir -p {}".format(args.out_dir))
0206 
0207     ##Read out files from datasets which are at CERN in parallel
0208     filelist = []
0209     emptyfiles = []
0210     nEvents = []
0211     pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
0212 
0213     with open(args.data_txt, "r") as datasets:
0214         results = [pool.apply_async(getFileList, (dataset.replace("\n", ""),)) for dataset in datasets.readlines()]
0215     
0216     for result in results:
0217         files, empties, events = result.get()
0218         filelist.extend(files)
0219         emptyfiles.extend(empties)
0220         nEvents.append(events)
0221 
0222     with open("{}/emptyFiles.txt".format(args.out_dir), "w") as empty:
0223         for emptyFile in emptyfiles:
0224             empty.write(emptyFile + '\n')
0225 
0226     ##Load IOV boundaries
0227     if args.iov_txt:
0228         with open(args.iov_txt) as fIOV:
0229             listIOV = [line.strip() for line in fIOV]
0230 
0231     else:
0232         listIOV = args.iov_txt
0233 
0234     ##Load json file   
0235     if args.json_input:
0236         with open(args.json_input) as fJson:
0237             runJson = json.load(fJson)
0238 
0239     else:
0240         runJson = args.json_input
0241 
0242     ##Fill json configs
0243     jsonAlign, jsonVali, filelist = fillJson(runJson, listIOV, filelist, args.N_max_IOV, args.out_dir)
0244 
0245     ##Remove bad runs if wished
0246     if args.rm_bad_runs != None:
0247         with open(args.rm_bad_runs, "r") as badRuns:
0248             for badRun in badRuns:
0249                 for dic in jsonAlign:
0250                     dic.pop(int(badRun), None)
0251 
0252                 for dic in jsonVali:
0253                     dic.pop(int(badRun), None)
0254 
0255 
0256     ##Template for python configuration files with file names for each IOV
0257     pyTempl = """import FWCore.ParameterSet.Config as cms
0258 import FWCore.PythonUtilities.LumiList as LumiList
0259 
0260 lumiSecs = cms.untracked.VLuminosityBlockRange()
0261 goodLumiSecs = LumiList.LumiList(filename = '{json}').getCMSSWString().split(',')
0262 readFiles = cms.untracked.vstring()
0263 source = cms.Source("PoolSource",
0264                             lumisToProcess = lumiSecs,
0265                             fileNames = readFiles)
0266 readFiles.extend([
0267     {filenames}
0268 ])
0269 lumiSecs.extend(goodLumiSecs)
0270 maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1))
0271     """
0272 
0273     ##Write out skimmed file set:
0274     if not args.iov_txt:
0275         with open("{}/{}.txt".format(args.out_dir, args.out_data), "w") as outData:
0276             for filename in filelist:
0277                 outData.write(filename + '\n')
0278     
0279     ##Write json IOV files if wished
0280     if args.iov_txt and args.json_input:
0281         for index, (jsonContent, runNumber) in enumerate(zip(jsonAlign, [int(run) for run in listIOV[:-1]])):
0282             with open("{}/IOV_Align_{}.json".format(args.out_dir, runNumber), "w") as fAlignJson:
0283                 json.dump(jsonContent, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
0284 
0285         for (jsonContent, runNumber) in zip(jsonVali, [int(run) for run in listIOV[:-1]]):
0286             with open("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber), "w") as fValiJson:
0287                 json.dump(jsonContent, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
0288 
0289             with open("{}/{}_since{}_cff.py".format(args.out_dir, args.out_data, runNumber), "w") as outData:
0290                 outData.write(pyTempl.format(json=os.path.abspath("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber)), filenames=",\n".join(["'{}'".format(filename) for filename in filelist[index]])))
0291 
0292     if args.json_input:
0293         mergeJsonAlign = {}
0294         [mergeJsonAlign.update(jsonDic) for jsonDic in jsonAlign]
0295 
0296         mergeJsonVali = {}
0297         [mergeJsonVali.update(jsonDic) for jsonDic in jsonVali]
0298 
0299         with open("{}/Align.json".format(args.out_dir, runNumber), "w") as fAlignJson:
0300             json.dump(mergeJsonAlign, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
0301 
0302         with open("{}/Vali.json".format(args.out_dir, runNumber), "w") as fValiJson:
0303             json.dump(mergeJsonVali, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
0304 
0305     if not os.path.exists("{}/eventsUsed.txt".format(args.out_dir)):
0306         with open("{}/eventsUsed.txt".format(args.out_dir), "w") as eventsUsed:
0307             eventsUsed.write("Events used for Alignment: {}".format(sum(nEvents)) + "\n")
0308             eventsUsed.write("Events used for Validation: {}".format(0) + "\n")
0309 
0310 if __name__ == "__main__":
0311     main()