File indexing completed on 2023-03-17 10:40:27
0001
0002
0003 import Utilities.General.cmssw_das_client as das_client
0004 import json
0005 import os
0006 import sys
0007 import subprocess
0008 import argparse
0009
0010
0011 def parseArguments():
0012
0013 """Parse the control line arguments"""
0014
0015 parser = argparse.ArgumentParser(description = "Tool to find which runs are included in files. Used to generate input dataset for JetHT validation tool in case of run based splitting for condor jobs.", formatter_class=argparse.RawTextHelpFormatter)
0016 parser.add_argument("-i", "--input", action="store", help="Name of the input file list. Has one file name in each line.", required=True)
0017 parser.add_argument("-o", "--output", action = "store", help ="Name of the output file in which the produced file list is stored", default = "myFileListWithRuns.txt")
0018
0019 return parser.parse_args()
0020
0021
0022
0023 def check_proxy():
0024
0025 """Check if GRID proxy has been initialized."""
0026
0027 try:
0028 with open(os.devnull, "w") as dump:
0029 subprocess.check_call(["voms-proxy-info", "--exists"],
0030 stdout = dump, stderr = dump)
0031 except subprocess.CalledProcessError:
0032 return False
0033 return True
0034
0035
0036 def findInJson(jsondict, strings):
0037
0038 """ Find string from json file. Code copy-pasted from dataset.py """
0039
0040 if isinstance(strings, str):
0041 strings = [ strings ]
0042
0043 if len(strings) == 0:
0044 return jsondict
0045 if isinstance(jsondict,dict):
0046 if strings[0] in jsondict:
0047 try:
0048 return findInJson(jsondict[strings[0]], strings[1:])
0049 except KeyError:
0050 pass
0051 else:
0052 for a in jsondict:
0053 if strings[0] in a:
0054 try:
0055 return findInJson(a[strings[0]], strings[1:])
0056 except (TypeError, KeyError):
0057 pass
0058
0059 raise KeyError("Can't find " + strings[0])
0060
0061
0062 def getData( dasQuery, dasLimit = 0 ):
0063
0064 """ Get data from DAS query. Code copy-pasted from dataset.py """
0065
0066 dasData = das_client.get_data(dasQuery, dasLimit)
0067 if isinstance(dasData, str):
0068 jsondict = json.loads( dasData )
0069 else:
0070 jsondict = dasData
0071
0072 try:
0073 error = findInJson(jsondict,["data","error"])
0074 except KeyError:
0075 error = None
0076 if error or findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
0077 try:
0078 jsonstr = findInJson(jsondict,"reason")
0079 except KeyError:
0080 jsonstr = str(jsondict)
0081 if len(jsonstr) > 10000:
0082 jsonfile = "das_query_output_%i.txt"
0083 i = 0
0084 while os.path.lexists(jsonfile % i):
0085 i += 1
0086 jsonfile = jsonfile % i
0087 theFile = open( jsonfile, "w" )
0088 theFile.write( jsonstr )
0089 theFile.close()
0090 msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
0091 else:
0092 msg = "The DAS query returned a error. Here is the output\n" + jsonstr
0093 msg += "\nIt's possible that this was a server error. If so, it may work if you try again later"
0094 raise KeyError(msg)
0095 return findInJson(jsondict,"data")
0096
0097
0098 def main():
0099
0100 """ Main program """
0101
0102
0103 if not check_proxy():
0104 print("Grid proxy is required to connect to DAS. Cannot run the tool without it.")
0105 print("Please create a proxy via 'voms-proxy-init -voms cms'.")
0106 sys.exit(1)
0107
0108
0109 commandLineArguments = parseArguments()
0110
0111
0112 inputFile = open(commandLineArguments.input,"r")
0113 inputFileList = inputFile.readlines()
0114 inputFile.close()
0115
0116
0117 runDictionary = {}
0118 for rawInputFile in inputFileList:
0119
0120 inputFile = rawInputFile.rstrip()
0121 myData = getData("run file={}".format(inputFile))
0122
0123 myRunsArray = []
0124 for dasInstance in myData:
0125 myRunsArray.append(findInJson(dasInstance,"run"))
0126
0127 for innerArray in myRunsArray:
0128 for jsonDictionary in innerArray:
0129 runNumber = jsonDictionary["run_number"]
0130 if runNumber in runDictionary:
0131 runDictionary[runNumber].append(inputFile)
0132 else:
0133 runDictionary[runNumber] = [inputFile]
0134
0135
0136
0137 outputFileName = commandLineArguments.output
0138 outputFile = open(outputFileName, "w")
0139
0140 for runNumber in runDictionary:
0141 for fileName in runDictionary[runNumber]:
0142 outputFile.write("{} {}\n".format(runNumber, fileName))
0143
0144 outputFile.close()
0145
0146
0147 if __name__ == "__main__":
0148
0149 main()