Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:57:13

0001 #!/usr/bin/env python3
0002 
0003 import Utilities.General.cmssw_das_client as das_client
0004 import json
0005 import os
0006 import sys
0007 import subprocess
0008 import argparse
0009 
0010 ##############################################
0011 def parseArguments():
0012 ##############################################
0013     """Parse the control line arguments"""
0014 
0015     parser = argparse.ArgumentParser(description = "Tool to find which runs are included in files. Used to generate input dataset for JetHT validation tool in case of run based splitting for condor jobs.", formatter_class=argparse.RawTextHelpFormatter)
0016     parser.add_argument("-i", "--input", action="store", help="Name of the input file list. Has one file name in each line.", required=True)
0017     parser.add_argument("-o", "--output", action = "store", help ="Name of the output file in which the produced file list is stored", default = "myFileListWithRuns.txt")
0018 
0019     return parser.parse_args()
0020 
0021 
0022 ##############################################
0023 def check_proxy():
0024 ##############################################
0025     """Check if GRID proxy has been initialized."""
0026 
0027     try:
0028         with open(os.devnull, "w") as dump:
0029             subprocess.check_call(["voms-proxy-info", "--exists"],
0030                                   stdout = dump, stderr = dump)
0031     except subprocess.CalledProcessError:
0032         return False
0033     return True
0034 
0035 ##############################################
0036 def findInJson(jsondict, strings):
0037 ##############################################
0038     """ Find string from json file. Code copy-pasted from dataset.py  """
0039 
0040     if isinstance(strings, str):
0041         strings = [ strings ]
0042 
0043     if len(strings) == 0:
0044         return jsondict
0045     if isinstance(jsondict,dict):
0046         if strings[0] in jsondict:
0047             try:
0048                 return findInJson(jsondict[strings[0]], strings[1:])
0049             except KeyError:
0050                 pass
0051     else:
0052         for a in jsondict:
0053             if strings[0] in a:
0054                 try:
0055                     return findInJson(a[strings[0]], strings[1:])
0056                 except (TypeError, KeyError):  #TypeError because a could be a string and contain strings[0]
0057                     pass
0058     #if it's not found
0059     raise KeyError("Can't find " + strings[0])
0060 
0061 ##############################################
0062 def getData( dasQuery, dasLimit = 0 ):
0063 ##############################################
0064     """ Get data from DAS query. Code copy-pasted from dataset.py """
0065 
0066     dasData = das_client.get_data(dasQuery, dasLimit)
0067     if isinstance(dasData, str):
0068         jsondict = json.loads( dasData )
0069     else:
0070         jsondict = dasData
0071     # Check, if the DAS query fails
0072     try:
0073         error = findInJson(jsondict,["data","error"])
0074     except KeyError:
0075         error = None
0076     if error or findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
0077         try:
0078             jsonstr = findInJson(jsondict,"reason")
0079         except KeyError: 
0080             jsonstr = str(jsondict)
0081         if len(jsonstr) > 10000:
0082             jsonfile = "das_query_output_%i.txt"
0083             i = 0
0084             while os.path.lexists(jsonfile % i):
0085                 i += 1
0086             jsonfile = jsonfile % i
0087             theFile = open( jsonfile, "w" )
0088             theFile.write( jsonstr )
0089             theFile.close()
0090             msg = "The DAS query returned an error.  The output is very long, and has been stored in:\n" + jsonfile
0091         else:
0092             msg = "The DAS query returned a error.  Here is the output\n" + jsonstr
0093         msg += "\nIt's possible that this was a server error.  If so, it may work if you try again later"
0094         raise KeyError(msg)
0095     return findInJson(jsondict,"data")
0096 
0097 ##############################################
0098 def main():
0099 ##############################################
0100     """ Main program """
0101 
0102     # Before doing anything, check that grip proxy exists
0103     if not check_proxy():
0104         print("Grid proxy is required to connect to DAS. Cannot run the tool without it.")
0105         print("Please create a proxy via 'voms-proxy-init -voms cms'.")
0106         sys.exit(1)
0107 
0108     # Read the command line argument
0109     commandLineArguments = parseArguments()
0110 
0111     # Read the file list from the input file
0112     inputFile = open(commandLineArguments.input,"r")
0113     inputFileList = inputFile.readlines()
0114     inputFile.close()
0115 
0116     # Find which runs are included in each of the files in the file list
0117     runDictionary = {}  # Dictionary telling which files contain each run
0118     for rawInputFile in inputFileList:
0119 
0120         inputFile = rawInputFile.rstrip()
0121         myData = getData("run file={}".format(inputFile))
0122 
0123         myRunsArray = []
0124         for dasInstance in myData:
0125             myRunsArray.append(findInJson(dasInstance,"run"))
0126 
0127         for innerArray in myRunsArray:
0128             for jsonDictionary in innerArray:
0129                 runNumber = jsonDictionary["run_number"]
0130                 if runNumber in runDictionary:
0131                     runDictionary[runNumber].append(inputFile)
0132                 else:
0133                     runDictionary[runNumber] = [inputFile]
0134 
0135 
0136     # Create an output file indicating which runs can be found from each file
0137     outputFileName = commandLineArguments.output
0138     outputFile = open(outputFileName, "w")
0139 
0140     for runNumber in runDictionary:
0141         for fileName in runDictionary[runNumber]:
0142             outputFile.write("{} {}\n".format(runNumber, fileName))
0143 
0144     outputFile.close()
0145 
0146 ##############################################
0147 if __name__ == "__main__":
0148 ##############################################
0149     main()