Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:12:54

0001 from __future__ import print_function
0002 # This CMS code is based on previous work done by Toby Dickenson, as indiciated below
0003 #
0004 # for questions: Benedikt.Hegner@cern.ch
0005 
0006 # Copyright 2004 Toby Dickenson
0007 #
0008 # Permission is hereby granted, free of charge, to any person obtaining
0009 # a copy of this software and associated documentation files (the
0010 # "Software"), to deal in the Software without restriction, including
0011 # without limitation the rights to use, copy, modify, merge, publish,
0012 # distribute, sublicense, and/or sell copies of the Software, and to
0013 # permit persons to whom the Software is furnished to do so, subject
0014 # to the following conditions:
0015 #
0016 # The above copyright notice and this permission notice shall be included
0017 # in all copies or substantial portions of the Software.
0018 #
0019 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0020 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0021 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
0022 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
0023 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
0024 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
0025 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0026 
0027 from builtins import range
0028 import sys, os, inspect, copy, struct, dis, imp
0029 import modulefinder
0030 
0031 def packageNameFromFilename(name:str) -> str:
0032     return ".".join(name.replace("python/","").replace(".py","").split("/")[-3:])
0033 
0034 
0035 class Color:
0036   """ANSI escape display sequences"""
0037   info          = "\033[1;34m"
0038   hilight       = "\033[31m"
0039   alternate     = "\033[32m"
0040   extra         = "\033[33m"
0041   backlight     = "\033[43m"
0042   underline     = "\033[4m"
0043   lessemphasis  = "\033[30m"
0044   deemphasis    = "\033[1;30m"
0045   none          = "\033[0m"
0046 
0047 _stack = []
0048 
0049 class SearchHit:
0050     pass
0051 
0052 class Package(object):
0053     def __init__(self,name,top=False):
0054         self.name = name
0055         self.dependencies = []
0056         self.searched = False
0057         self.stack = []
0058         if top:
0059             self.module = None
0060         else:    
0061             self.module = __import__(name,[],[],"*")
0062     def dump(self,level:int):
0063         indent = "  " * level
0064         print(indent, "+", Color.info, self.name, Color.none)
0065         # sort dependencies alphabetically
0066         self.dependencies.sort(key = lambda x: x.name)
0067         for package in self.dependencies:
0068             package.dump(level+1)
0069     def search(self,pattern,result):
0070         """ recursive search for pattern in source files"""
0071         # first start searching in the package itself / do this only once
0072         if self.module:
0073             for number, line in enumerate(inspect.getsource(self.module).splitlines()):
0074                 if pattern in line:
0075                      filename = packageNameFromFilename(inspect.getsourcefile(self.module))
0076                      if not self.searched:
0077                          # save the hit, so we can add later stacks to it
0078                          self.hit = SearchHit()
0079                          self.hit.number = number
0080                          self.hit.filename = filename
0081                          self.hit.line = line
0082                          self.hit.stacks = list()
0083                          result.append(self.hit)
0084                      self.hit.stacks.append(copy.copy(_stack)) 
0085         # then go on with dependencies
0086         _stack.append(self.name)
0087         for package in self.dependencies:
0088             package.search(pattern,result)
0089         _stack.pop() 
0090         self.searched = True    
0091 
0092 
0093 class mymf(modulefinder.ModuleFinder):
0094     def __init__(self,*args,**kwargs):
0095         self._depgraph = {}
0096         self._types = {}
0097         self._last_caller = None
0098         #TODO - replace by environment variables CMSSW_BASE and CMSSW_RELEASE_BASE (*and* do it only if the global one is not empty like for IB areas)  
0099         self._localarea = os.path.expandvars('$CMSSW_BASE')
0100         self._globalarea = os.path.expandvars('$CMSSW_RELEASE_BASE')
0101         modulefinder.ModuleFinder.__init__(self,*args,**kwargs)
0102     def import_hook(self, name, caller=None, fromlist=None, level=-1):
0103         old_last_caller = self._last_caller
0104         try:
0105             self._last_caller = caller
0106             return modulefinder.ModuleFinder.import_hook(self,name,caller,fromlist, level=level)  
0107         finally:
0108             self._last_caller = old_last_caller
0109 
0110     def import_module(self,partnam,fqname,parent):
0111                               
0112         if partnam in ("os","unittest"):
0113             r = None
0114         else:
0115             r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
0116             # since the modulefinder is not able to look into the global area when coming from the local area, we force a second try   
0117             if parent and not r and self._localarea != '' and self._globalarea != '':
0118                  parent.__file__ = parent.__file__.replace(self._localarea,self._globalarea)
0119                  parent.__path__[0] = parent.__path__[0].replace(self._localarea,self._globalarea)
0120             r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
0121                                                          
0122         if r is not None:
0123             self._depgraph.setdefault(self._last_caller.__name__,{})[r.__name__] = 1
0124         return r
0125     def load_module(self, fqname, fp, pathname, aux_info):
0126         (suffix, mode, type) = aux_info
0127         r = modulefinder.ModuleFinder.load_module(self, fqname, fp, pathname, (suffix, mode, type))
0128         if r is not None:
0129             self._types[r.__name__] = type
0130         return r
0131 
0132     def scan_opcodes_25(self, co, unpack = struct.unpack):
0133         """
0134         This is basically just the default opcode scanner from ModuleFinder, but extended to also
0135         look for "process.load(<module>)' commands. Since the Process object might not necassarily
0136         be called "process", it scans for a call to a "load" method with a single parameter on
0137         *any* object. If one is found it checks if the parameter is a string that refers to a valid
0138         python module in the local or global area. If it does, the scanner assumes this was a call
0139         to a Process object and yields the module name.
0140         It's not possible to scan first for Process object declarations to get the name of the
0141         objects since often (e.g. for customisation functions) the object is passed to a function
0142         in a different file.
0143 
0144         The ModuleFinder.scan_opcodes_25 implementation this is based was taken from
0145         https://hg.python.org/cpython/file/2.7/Lib/modulefinder.py#l364
0146         """
0147         # Scan the code, and yield 'interesting' opcode combinations
0148         # Python 2.5 version (has absolute and relative imports)
0149         code = co.co_code
0150         names = co.co_names
0151         consts = co.co_consts
0152         LOAD_CONST = modulefinder.LOAD_CONST
0153         IMPORT_NAME = modulefinder.IMPORT_NAME
0154         STORE_OPS = modulefinder.STORE_OPS
0155         HAVE_ARGUMENT = modulefinder.HAVE_ARGUMENT
0156         LOAD_ATTR = chr(dis.opname.index('LOAD_ATTR'))
0157         LOAD_NAME = chr(dis.opname.index('LOAD_NAME'))
0158         CALL_FUNCTION = chr(dis.opname.index('CALL_FUNCTION'))
0159         LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
0160         
0161         try :
0162             indexOfLoadConst = names.index("load") # This might throw a ValueError
0163             # These are the opcodes required to access the "load" attribute. This might
0164             # not even be a function, but I check for that later.
0165             loadMethodOpcodes = LOAD_ATTR+struct.pack('<H',indexOfLoadConst)
0166         except ValueError :
0167             # doesn't look like "load" is used anywhere in this file
0168             loadMethodOpcodes=None
0169 
0170         while code:
0171             c = code[0]
0172             
0173             # Check to see if this is a call to a "load" method
0174             if loadMethodOpcodes!=None and len(code)>=9 : # Need at least 9 codes for the full call
0175                 if code[:3]==loadMethodOpcodes :
0176                     # The attribute "load" is being accessed, need to make sure this is a function call.
0177                     # I'll look ahead and see if the CALL_FUNCTION code is used - this could be in a different
0178                     # place depending on the number of arguments, but I'm only interested in methods with a
0179                     # single argument so I know exactly where CALL_FUNCTION should be.
0180                     if code[6]==CALL_FUNCTION :
0181                         # I know this is calling a method called "load" with one argument. I need
0182                         # to find out what the argument is. Note that I still don't know if this is
0183                         # on a cms.Process object.
0184                         indexInTable=unpack('<H',code[4:6])[0]
0185                         if code[3]==LOAD_CONST :
0186                             # The argument is a constant, so retrieve that from the table
0187                             loadMethodArgument=consts[indexInTable]
0188                             # I know a load method with one argument has been called on *something*, but I don't
0189                             # know if it was a cms.Process object. All I can do is check to see if the argument is
0190                             # a string, and if so if it refers to a python file in the user or global areas.
0191                             try :
0192                                 loadMethodArgument = loadMethodArgument.replace("/",".")
0193                                 # I can only use imp.find_module on submodules (i.e. each bit between a "."), so try
0194                                 # that on each submodule in turn using the previously found filename. Note that I have
0195                                 # to try this twice, because if the first pass traverses into a package in the local
0196                                 # area but the subpackage has not been checked out it will report that the subpackage
0197                                 # doesn't exist, even though it is available in the global area.
0198                                 try :
0199                                     parentFilename=[self._localarea+"/python"]
0200                                     for subModule in loadMethodArgument.split(".") :
0201                                         moduleInfo=imp.find_module( subModule, parentFilename )
0202                                         parentFilename=[moduleInfo[1]]
0203                                     # If control got this far without raising an exception, then it must be a valid python module
0204                                     yield "import", (None, loadMethodArgument)
0205                                 except ImportError :
0206                                     # Didn't work in the local area, try in the global area.
0207                                     parentFilename=[self._globalarea+"/python"]
0208                                     for subModule in loadMethodArgument.split(".") :
0209                                         moduleInfo=imp.find_module( subModule, parentFilename )
0210                                         parentFilename=[moduleInfo[1]]
0211                                     # If control got this far without raising an exception, then it must be a valid python module
0212                                     yield "import", (None, loadMethodArgument)
0213                             except Exception as error:
0214                                 # Either there was an import error (not a python module) or there was a string
0215                                 # manipulaton error (argument not a string). Assume this wasn't a call on a
0216                                 # cms.Process object and move on silently.
0217                                 pass
0218                         
0219                         elif code[3]==LOAD_NAME :
0220                             # The argument is a variable. I can get the name of the variable quite easily but
0221                             # not the value, unless I execute all of the opcodes. Not sure what to do here,
0222                             # guess I'll just print a warning so that the user knows?
0223                             print("Unable to determine the value of variable '"+names[indexInTable]+"' to see if it is a proces.load(...) statement in file "+co.co_filename)
0224                         
0225                         code=code[9:]
0226                         continue
0227 
0228             if c in STORE_OPS:
0229                 oparg, = unpack('<H', code[1:3])
0230                 yield "store", (names[oparg],)
0231                 code = code[3:]
0232                 continue
0233             if code[:9:3] == LOAD_LOAD_AND_IMPORT:
0234                 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
0235                 level = consts[oparg_1]
0236                 if level == -1: # normal import
0237                     yield "import", (consts[oparg_2], names[oparg_3])
0238                 elif level == 0: # absolute import
0239                     yield "absolute_import", (consts[oparg_2], names[oparg_3])
0240                 else: # relative import
0241                     yield "relative_import", (level, consts[oparg_2], names[oparg_3])
0242                 code = code[9:]
0243                 continue
0244             if c >= HAVE_ARGUMENT:
0245                 code = code[3:]
0246             else:
0247                 code = code[1:]
0248 
0249 def removeRecursiveLoops( node, verbose=False, currentStack=None ) :
0250     if currentStack is None : currentStack=[]
0251     try :
0252         duplicateIndex=currentStack.index( node ) # If there isn't a recursive loop this will raise a ValueError
0253         if verbose :
0254             print("Removing recursive loop in:")
0255             for index in range(duplicateIndex,len(currentStack)) :
0256                 print("   ",currentStack[index].name,"-->")
0257             print("   ",node.name)
0258         currentStack[-1].dependencies.remove(node)
0259     except ValueError:
0260         # No recursive loop found, so continue traversing the tree
0261         currentStack.append( node )
0262         for subnode in node.dependencies :
0263             removeRecursiveLoops( subnode, verbose, currentStack[:] )
0264 
0265 def transformIntoGraph(depgraph,toplevel):
0266     packageDict = {}
0267     # create the top level config
0268     packageDict[toplevel] = Package(toplevel, top = True) 
0269 
0270     # create package objects
0271     for key, value in depgraph.items():
0272         if key.count(".") == 2 and key != toplevel: 
0273             packageDict[key] = Package(key)
0274         for name in value.keys():
0275             if name.count(".") == 2: packageDict[name] = Package(name)
0276     # now create dependencies
0277     for key, value in depgraph.items():
0278         if key.count(".") == 2 or key == toplevel:
0279             package = packageDict[key]
0280             package.dependencies = [packageDict[name] for name in value.keys() if name.count(".") == 2]
0281 
0282     removeRecursiveLoops( packageDict[toplevel] )
0283     # find and return the top level config
0284     return packageDict[toplevel]
0285 
0286 
0287 def getDependenciesFromPythonFile(filename:str,toplevelname,path):
0288     modulefinder = mymf(path)
0289     modulefinder.run_script(filename)
0290     globalDependencyDict = modulefinder._depgraph
0291     globalDependencyDict[toplevelname] = globalDependencyDict["__main__"] 
0292     return globalDependencyDict
0293 
0294 
0295 def getImportTree(filename:str,path):
0296     toplevelname = packageNameFromFilename(filename)
0297     # get dependencies from given file
0298     globalDependencyDict = getDependenciesFromPythonFile(filename,toplevelname,path)
0299         
0300     # transform this flat structure in a dependency tree
0301     dependencyGraph = transformIntoGraph(globalDependencyDict,toplevelname)
0302     return dependencyGraph