Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-12-01 23:40:20

0001 # This CMS code is based on previous work done by Toby Dickenson, as indiciated below
0002 #
0003 # for questions: Benedikt.Hegner@cern.ch
0004 
0005 # Copyright 2004 Toby Dickenson
0006 #
0007 # Permission is hereby granted, free of charge, to any person obtaining
0008 # a copy of this software and associated documentation files (the
0009 # "Software"), to deal in the Software without restriction, including
0010 # without limitation the rights to use, copy, modify, merge, publish,
0011 # distribute, sublicense, and/or sell copies of the Software, and to
0012 # permit persons to whom the Software is furnished to do so, subject
0013 # to the following conditions:
0014 #
0015 # The above copyright notice and this permission notice shall be included
0016 # in all copies or substantial portions of the Software.
0017 #
0018 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0019 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0020 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
0021 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
0022 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
0023 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
0024 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0025 
0026 from builtins import range
0027 import sys, os, inspect, copy, struct, dis, importlib
0028 import modulefinder
0029 
0030 def packageNameFromFilename(name:str) -> str:
0031     return ".".join(name.replace("python/","").replace(".py","").split("/")[-3:])
0032 
0033 
0034 class Color:
0035   """ANSI escape display sequences"""
0036   info          = "\033[1;34m"
0037   hilight       = "\033[31m"
0038   alternate     = "\033[32m"
0039   extra         = "\033[33m"
0040   backlight     = "\033[43m"
0041   underline     = "\033[4m"
0042   lessemphasis  = "\033[30m"
0043   deemphasis    = "\033[1;30m"
0044   none          = "\033[0m"
0045 
0046 _stack = []
0047 
0048 class SearchHit:
0049     pass
0050 
0051 class Package(object):
0052     def __init__(self,name,top=False):
0053         self.name = name
0054         self.dependencies = []
0055         self.searched = False
0056         self.stack = []
0057         if top:
0058             self.module = None
0059         else:    
0060             self.module = __import__(name,[],[],"*")
0061     def dump(self,level:int):
0062         indent = "  " * level
0063         print(indent, "+", Color.info, self.name, Color.none)
0064         # sort dependencies alphabetically
0065         self.dependencies.sort(key = lambda x: x.name)
0066         for package in self.dependencies:
0067             package.dump(level+1)
0068     def search(self,pattern,result):
0069         """ recursive search for pattern in source files"""
0070         # first start searching in the package itself / do this only once
0071         if self.module:
0072             for number, line in enumerate(inspect.getsource(self.module).splitlines()):
0073                 if pattern in line:
0074                      filename = packageNameFromFilename(inspect.getsourcefile(self.module))
0075                      if not self.searched:
0076                          # save the hit, so we can add later stacks to it
0077                          self.hit = SearchHit()
0078                          self.hit.number = number
0079                          self.hit.filename = filename
0080                          self.hit.line = line
0081                          self.hit.stacks = list()
0082                          result.append(self.hit)
0083                      self.hit.stacks.append(copy.copy(_stack)) 
0084         # then go on with dependencies
0085         _stack.append(self.name)
0086         for package in self.dependencies:
0087             package.search(pattern,result)
0088         _stack.pop() 
0089         self.searched = True    
0090 
0091 
0092 class mymf(modulefinder.ModuleFinder):
0093     def __init__(self,*args,**kwargs):
0094         self._depgraph = {}
0095         self._types = {}
0096         self._last_caller = None
0097         #TODO - replace by environment variables CMSSW_BASE and CMSSW_RELEASE_BASE (*and* do it only if the global one is not empty like for IB areas)  
0098         self._localarea = os.path.expandvars('$CMSSW_BASE')
0099         self._globalarea = os.path.expandvars('$CMSSW_RELEASE_BASE')
0100         modulefinder.ModuleFinder.__init__(self,*args,**kwargs)
0101     def import_hook(self, name, caller=None, fromlist=None, level=-1):
0102         old_last_caller = self._last_caller
0103         try:
0104             self._last_caller = caller
0105             return modulefinder.ModuleFinder.import_hook(self,name,caller,fromlist, level=level)  
0106         finally:
0107             self._last_caller = old_last_caller
0108 
0109     def import_module(self,partnam,fqname,parent):
0110                               
0111         if partnam in ("os","unittest"):
0112             r = None
0113         else:
0114             r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
0115             # since the modulefinder is not able to look into the global area when coming from the local area, we force a second try   
0116             if parent and not r and self._localarea != '' and self._globalarea != '':
0117                  parent.__file__ = parent.__file__.replace(self._localarea,self._globalarea)
0118                  parent.__path__[0] = parent.__path__[0].replace(self._localarea,self._globalarea)
0119             r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent)
0120                                                          
0121         if r is not None:
0122             self._depgraph.setdefault(self._last_caller.__name__,{})[r.__name__] = 1
0123         return r
0124     def load_module(self, fqname, fp, pathname, aux_info):
0125         (suffix, mode, type) = aux_info
0126         r = modulefinder.ModuleFinder.load_module(self, fqname, fp, pathname, (suffix, mode, type))
0127         if r is not None:
0128             self._types[r.__name__] = type
0129         return r
0130 
0131     def scan_opcodes_25(self, co, unpack = struct.unpack):
0132         """
0133         This is basically just the default opcode scanner from ModuleFinder, but extended to also
0134         look for "process.load(<module>)' commands. Since the Process object might not necassarily
0135         be called "process", it scans for a call to a "load" method with a single parameter on
0136         *any* object. If one is found it checks if the parameter is a string that refers to a valid
0137         python module in the local or global area. If it does, the scanner assumes this was a call
0138         to a Process object and yields the module name.
0139         It's not possible to scan first for Process object declarations to get the name of the
0140         objects since often (e.g. for customisation functions) the object is passed to a function
0141         in a different file.
0142 
0143         The ModuleFinder.scan_opcodes_25 implementation this is based was taken from
0144         https://hg.python.org/cpython/file/2.7/Lib/modulefinder.py#l364
0145         """
0146         # Scan the code, and yield 'interesting' opcode combinations
0147         # Python 2.5 version (has absolute and relative imports)
0148         code = co.co_code
0149         names = co.co_names
0150         consts = co.co_consts
0151         LOAD_CONST = modulefinder.LOAD_CONST
0152         IMPORT_NAME = modulefinder.IMPORT_NAME
0153         STORE_OPS = modulefinder.STORE_OPS
0154         HAVE_ARGUMENT = modulefinder.HAVE_ARGUMENT
0155         LOAD_ATTR = chr(dis.opname.index('LOAD_ATTR'))
0156         LOAD_NAME = chr(dis.opname.index('LOAD_NAME'))
0157         CALL_FUNCTION = chr(dis.opname.index('CALL_FUNCTION'))
0158         LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
0159         
0160         try :
0161             indexOfLoadConst = names.index("load") # This might throw a ValueError
0162             # These are the opcodes required to access the "load" attribute. This might
0163             # not even be a function, but I check for that later.
0164             loadMethodOpcodes = LOAD_ATTR+struct.pack('<H',indexOfLoadConst)
0165         except ValueError :
0166             # doesn't look like "load" is used anywhere in this file
0167             loadMethodOpcodes=None
0168 
0169         while code:
0170             c = code[0]
0171             
0172             # Check to see if this is a call to a "load" method
0173             if loadMethodOpcodes!=None and len(code)>=9 : # Need at least 9 codes for the full call
0174                 if code[:3]==loadMethodOpcodes :
0175                     # The attribute "load" is being accessed, need to make sure this is a function call.
0176                     # I'll look ahead and see if the CALL_FUNCTION code is used - this could be in a different
0177                     # place depending on the number of arguments, but I'm only interested in methods with a
0178                     # single argument so I know exactly where CALL_FUNCTION should be.
0179                     if code[6]==CALL_FUNCTION :
0180                         # I know this is calling a method called "load" with one argument. I need
0181                         # to find out what the argument is. Note that I still don't know if this is
0182                         # on a cms.Process object.
0183                         indexInTable=unpack('<H',code[4:6])[0]
0184                         if code[3]==LOAD_CONST :
0185                             # The argument is a constant, so retrieve that from the table
0186                             loadMethodArgument=consts[indexInTable]
0187                             # I know a load method with one argument has been called on *something*, but I don't
0188                             # know if it was a cms.Process object. All I can do is check to see if the argument is
0189                             # a string, and if so if it refers to a python file in the user or global areas.
0190                             try :
0191                                 loadMethodArgument = loadMethodArgument.replace("/",".")
0192                                 # I can only use imp.find_module on submodules (i.e. each bit between a "."), so try
0193                                 # that on each submodule in turn using the previously found filename. Note that I have
0194                                 # to try this twice, because if the first pass traverses into a package in the local
0195                                 # area but the subpackage has not been checked out it will report that the subpackage
0196                                 # doesn't exist, even though it is available in the global area.
0197                                 try :
0198                                     parentFilename=[self._localarea+"/python"]
0199                                     for subModule in loadMethodArgument.split(".") :
0200                                         moduleInfo=importlib.machinery.PathFinder.find_spec( subModule, parentFilename )
0201                                         parentFilename=[moduleInfo.origin]
0202                                     # If control got this far without raising an exception, then it must be a valid python module
0203                                     yield "import", (None, loadMethodArgument)
0204                                 except ImportError :
0205                                     # Didn't work in the local area, try in the global area.
0206                                     parentFilename=[self._globalarea+"/python"]
0207                                     for subModule in loadMethodArgument.split(".") :
0208                                         moduleInfo=importlib.machinery.PathFinder.find_spec( subModule, parentFilename )
0209                                         parentFilename=[moduleInfo.origin]
0210                                     # If control got this far without raising an exception, then it must be a valid python module
0211                                     yield "import", (None, loadMethodArgument)
0212                             except Exception as error:
0213                                 # Either there was an import error (not a python module) or there was a string
0214                                 # manipulaton error (argument not a string). Assume this wasn't a call on a
0215                                 # cms.Process object and move on silently.
0216                                 pass
0217                         
0218                         elif code[3]==LOAD_NAME :
0219                             # The argument is a variable. I can get the name of the variable quite easily but
0220                             # not the value, unless I execute all of the opcodes. Not sure what to do here,
0221                             # guess I'll just print a warning so that the user knows?
0222                             print("Unable to determine the value of variable '"+names[indexInTable]+"' to see if it is a proces.load(...) statement in file "+co.co_filename)
0223                         
0224                         code=code[9:]
0225                         continue
0226 
0227             if c in STORE_OPS:
0228                 oparg, = unpack('<H', code[1:3])
0229                 yield "store", (names[oparg],)
0230                 code = code[3:]
0231                 continue
0232             if code[:9:3] == LOAD_LOAD_AND_IMPORT:
0233                 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
0234                 level = consts[oparg_1]
0235                 if level == -1: # normal import
0236                     yield "import", (consts[oparg_2], names[oparg_3])
0237                 elif level == 0: # absolute import
0238                     yield "absolute_import", (consts[oparg_2], names[oparg_3])
0239                 else: # relative import
0240                     yield "relative_import", (level, consts[oparg_2], names[oparg_3])
0241                 code = code[9:]
0242                 continue
0243             if c >= HAVE_ARGUMENT:
0244                 code = code[3:]
0245             else:
0246                 code = code[1:]
0247 
0248 def removeRecursiveLoops( node, verbose=False, currentStack=None ) :
0249     if currentStack is None : currentStack=[]
0250     try :
0251         duplicateIndex=currentStack.index( node ) # If there isn't a recursive loop this will raise a ValueError
0252         if verbose :
0253             print("Removing recursive loop in:")
0254             for index in range(duplicateIndex,len(currentStack)) :
0255                 print("   ",currentStack[index].name,"-->")
0256             print("   ",node.name)
0257         currentStack[-1].dependencies.remove(node)
0258     except ValueError:
0259         # No recursive loop found, so continue traversing the tree
0260         currentStack.append( node )
0261         for subnode in node.dependencies :
0262             removeRecursiveLoops( subnode, verbose, currentStack[:] )
0263 
0264 def transformIntoGraph(depgraph,toplevel):
0265     packageDict = {}
0266     # create the top level config
0267     packageDict[toplevel] = Package(toplevel, top = True) 
0268 
0269     # create package objects
0270     for key, value in depgraph.items():
0271         if key.count(".") == 2 and key != toplevel: 
0272             packageDict[key] = Package(key)
0273         for name in value.keys():
0274             if name.count(".") == 2: packageDict[name] = Package(name)
0275     # now create dependencies
0276     for key, value in depgraph.items():
0277         if key.count(".") == 2 or key == toplevel:
0278             package = packageDict[key]
0279             package.dependencies = [packageDict[name] for name in value.keys() if name.count(".") == 2]
0280 
0281     removeRecursiveLoops( packageDict[toplevel] )
0282     # find and return the top level config
0283     return packageDict[toplevel]
0284 
0285 
0286 def getDependenciesFromPythonFile(filename:str,toplevelname,path):
0287     modulefinder = mymf(path)
0288     modulefinder.run_script(filename)
0289     globalDependencyDict = modulefinder._depgraph
0290     globalDependencyDict[toplevelname] = globalDependencyDict["__main__"] 
0291     return globalDependencyDict
0292 
0293 
0294 def getImportTree(filename:str,path):
0295     toplevelname = packageNameFromFilename(filename)
0296     # get dependencies from given file
0297     globalDependencyDict = getDependenciesFromPythonFile(filename,toplevelname,path)
0298         
0299     # transform this flat structure in a dependency tree
0300     dependencyGraph = transformIntoGraph(globalDependencyDict,toplevelname)
0301     return dependencyGraph