Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-07-23 02:25:32

0001 #!/usr/bin/env python3
0002 from __future__ import print_function
0003 import os
0004 import re
0005 import sys
0006 import atexit
0007 import tempfile
0008 import subprocess
0009 from shutil import copy, rmtree
0010 from collections import defaultdict
0011 
0012 # only needed to locate CMSSW
0013 import FWCore
0014 import FWCore.ParameterSet.Types
0015 
0016 OUTFILE_TREE = "calltree"
0017 OUTFILE_FILES = "callfiles"
0018 FLAT_OUTPUT = False
0019 # cmsRun alsways gets special handling, but also trace these scripts
0020 WRAP_SCRIPTS = ["cmsDriver.py" ]
0021 IGNORE_DIRS = [
0022   os.path.dirname(os.__file__),
0023   FWCore.ParameterSet.Types.__file__,
0024 ]
0025 STRIPPATHS = [ # we will add the base dir from CMSSWCALLBASE env var here
0026   os.environ["CMSSW_BASE"] + "/python/", os.environ["CMSSW_RELEASE_BASE"] + "/python/",
0027   os.environ["CMSSW_BASE"] + "/cfipython/", os.environ["CMSSW_RELEASE_BASE"] + "/cfipython/"]
0028 PREFIXINFO = []
0029 ARGV0 = "" # set in main
0030 
0031 def addprefixinfo(argv):
0032   cwd = os.path.abspath(os.getcwd())
0033   wf = re.match(".*/(\d+\.\d+)_", cwd)
0034   if wf: 
0035     PREFIXINFO.append("wf")
0036     PREFIXINFO.append(wf.groups()[0])
0037   online = re.match("(.*/)?(.*)_dqm_sourceclient-live_cfg\.py", argv[0])
0038   if online:
0039     PREFIXINFO.append("online")
0040     PREFIXINFO.append(online.groups()[1])
0041   step = re.match("(step\d+)_.*\.py", argv[0])
0042   if step:
0043     PREFIXINFO.append(step.groups()[0])
0044   processing = re.match("step\d+_.*(RECO|ALCA|HARVEST).*\.py", argv[0])
0045   if processing:
0046     PREFIXINFO.append(processing.groups()[0])
0047   if not PREFIXINFO:
0048     PREFIXINFO.append(argv[0])
0049 
0050 def setupenv():
0051   bindir = tempfile.mkdtemp()
0052   print("+Setting up in ", bindir)
0053   for s in WRAP_SCRIPTS:
0054     os.symlink(ARGV0, bindir + "/" + s)
0055   os.symlink(ARGV0, bindir + "/cmsRun")
0056   os.environ["PATH"] = bindir + ":" + os.environ["PATH"]
0057   os.environ["CMSSWCALLTREE"] = bindir + "/" + OUTFILE_TREE
0058   os.environ["CMSSWCALLFILES"] = bindir + "/" + OUTFILE_FILES
0059   os.environ["CMSSWCALLBASE"] = os.path.abspath(os.getcwd()) + "/"
0060   with open(os.environ["CMSSWCALLTREE"], "w") as f:
0061     pass
0062   with open(os.environ["CMSSWCALLFILES"], "w") as f:
0063     pass
0064   return bindir
0065 
0066 def cleanupenv(tmpdir):
0067   #with open(os.environ["CMSSWCALLTREE"], "a") as f:
0068   #  print("}", file=f)
0069   print("+Cleaning up ", tmpdir)
0070   copy(os.environ["CMSSWCALLTREE"], ".")
0071   copy(os.environ["CMSSWCALLFILES"], ".")
0072   rmtree(tmpdir)
0073 
0074 
0075 def trace_command(argv):
0076   tmpdir = None
0077   if not "CMSSWCALLTREE" in os.environ:
0078     tmpdir = setupenv()
0079 
0080   subprocess.call(argv)
0081 
0082   if tmpdir:
0083     cleanupenv(tmpdir)
0084 
0085 def formatfile(filename):
0086   filename = os.path.abspath(filename)
0087   for pfx in STRIPPATHS:
0088     if filename.startswith(pfx):
0089       filename = filename[len(pfx):]
0090   return filename
0091 
0092 def searchinpath(progname, path):
0093   # Search $PATH. There seems to be no pre-made function for this.
0094   for entry in path:
0095     file_path = os.path.join(entry, progname)
0096     if os.path.isfile(file_path):
0097       break
0098   if not os.path.isfile(file_path):
0099     print("+Cannot find program (%s) in modified $PATH (%s)." % (progname, path))
0100     sys.exit(1)
0101   print("+Found %s as %s in %s." % (progname, file_path, path))
0102   return file_path
0103 
0104 def writeoutput(callgraph, files):
0105   progname = ", ".join(PREFIXINFO)
0106   print("+Done running %s, writing output..." % progname)
0107 
0108   def format(func):
0109     filename, funcname = func
0110     return "%s::%s" % (formatfile(filename), funcname)
0111 
0112   def callpath(func):
0113     # climb up in the call graph until we find a node without callers (this is
0114     # the entry point, the traced call itself). There may be cycles, but any
0115     # node is reachable from the entry point, so no backtracking required.
0116     path = []
0117     seen = set()
0118     parents = {func}
0119     timeout = 100 # go no more than this deep
0120     while parents:
0121       if len(parents) == 1:
0122         func = next(iter(parents))
0123         seen.add(func)
0124         path.append(format(func))
0125       if len(parents) > 1:
0126         for func in parents:
0127           if not func in seen:
0128             break
0129         if func in seen:
0130           # somehow we got stuck in a loop and can't get out. So maybe
0131           # backtracking is needed in some situations?
0132           # Abort with a partial path for now.
0133           return path
0134         seen.add(func)
0135         path.append(format(func) + "+")
0136       parents = callgraph[func]
0137       timeout -= 1
0138       if timeout == 0:
0139         print(seen, path, parents, func)
0140         raise Exception('Call path too deep, aborting')
0141     return path[:-1]
0142 
0143   with open(os.environ["CMSSWCALLFILES"], "a") as outfile:
0144       for f in files:
0145         print("%s: %s" % (progname, formatfile(f)), file=outfile)
0146   with open(os.environ["CMSSWCALLTREE"], "a") as outfile:
0147     if FLAT_OUTPUT:
0148       for func in callgraph.keys():
0149         print("%s: %s 1" % (progname, ";".join(reversed(callpath(func)))), file=outfile)
0150     else:
0151       for func in callgraph.keys():
0152         for pfunc in callgraph[func]:
0153           print("%s: %s -> %s" % (progname, format(func), format(pfunc)), file=outfile)
0154 
0155 def trace_python(prog_argv, path):
0156   files = set()
0157   callgraph = defaultdict(lambda: set())
0158 
0159   def nop_trace(frame, why, arg):
0160     pass
0161 
0162   def tracefunc(frame, why, arg):
0163     if why == 'call':
0164       code = frame.f_code
0165       # compared to the `trace` module, we don't attempt to find class names here 
0166       filename = code.co_filename
0167 
0168       for d in IGNORE_DIRS:
0169         if filename.startswith(d):
0170           sys.settrace(nop_trace)
0171           return wait_for_return
0172 
0173       funcname = code.co_name
0174       code = frame.f_back.f_code
0175       p_filename = code.co_filename
0176       p_funcname = code.co_name
0177 
0178       files.add(filename)
0179       callgraph[(filename, funcname)].add((p_filename, p_funcname))
0180     return None
0181 
0182   def wait_for_return(frame, why, arg):
0183     if why == 'return':
0184       sys.settrace(tracefunc)
0185     return wait_for_return
0186 
0187   sys.argv = prog_argv
0188   progname = prog_argv[0]
0189 
0190 
0191   file_path = searchinpath(progname, path)
0192   try:
0193     with open(file_path) as fp:
0194       code = compile(fp.read(), progname, 'exec')
0195       # try to emulate __main__ namespace as much as possible
0196       globals = {
0197       '__file__': progname,
0198       '__name__': '__main__',
0199       '__package__': None,
0200       '__cached__': None,
0201       }
0202 
0203       # would be too easy if this covered all the cases...
0204       atexit.register(lambda: writeoutput(callgraph, files))
0205       # cmsDriver calls cmsRun via exec (execvpe specifically), so we also need
0206       # to hook that...
0207       old_execvpe = os.execvpe
0208       def exec_hook(*args):
0209         writeoutput(callgraph, files)
0210         old_execvpe(*args)
0211       os.execvpe = exec_hook
0212 
0213       # now turn on the traceing
0214       sys.settrace(tracefunc)
0215       try:
0216         exec code in globals, globals
0217       finally:
0218         sys.settrace(None)
0219 
0220   except OSError as err:
0221     print("+Cannot run file %r because: %s" % (sys.argv[0], err))
0222     sys.exit(1)
0223   except SystemExit:
0224     pass
0225   # this is not necessarily reached at all. 
0226   sys.exit(0)
0227 
0228 def help():
0229   print("Usage: %s <some cmssw commandline>" % (sys.argv[0]))
0230   print("  The given programs will be executed, instrumenting calls to %s and cmsRun." % (", ".join(WRAP_SCRIPTS)))
0231   print("  cmsRun will not actually run cmssw, but all the Python code will be executed and instrumentd. The results are written to the files `%s` and `%s` in the same directory." % (OUTFILE_FILES, OUTFILE_TREE))
0232   if FLAT_OUTPUT:
0233     print("  The callgraph output file can be processed with Brendan Gregg's FlameGraph tool.")
0234   else:
0235     print("  The callgraph output lists edges pointing from each function to the one calling it.")
0236 
0237   print("Examples:")
0238   print("  %s runTheMatrix.py -l 1000 --ibeos" % sys.argv[0])
0239   print(  "%s cmsRun rpc_dqm_sourceclient-live_cfg.py" % sys.argv[0])
0240 
0241 def main():
0242   print("+Running cmsswfiletrace...")
0243   global ARGV0
0244   ARGV0 = sys.argv[0]
0245   for s in WRAP_SCRIPTS:
0246     if sys.argv[0].endswith(s):
0247       print("+Wrapping %s..." % s)
0248       addprefixinfo(sys.argv)
0249       tmppath = os.path.dirname(sys.argv[0])
0250       path = filter(
0251         lambda s: not s.startswith(tmppath),
0252         os.environ["PATH"].split(":")
0253       )
0254       STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
0255       trace_python([s] + sys.argv[1:], path)
0256       return
0257   if sys.argv[0].endswith('cmsRun'):
0258       print("+Wrapping cmsRun...")
0259       addprefixinfo(sys.argv[1:])
0260       STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
0261       trace_python(sys.argv[1:], ["."])
0262       return
0263   if len(sys.argv) <= 1:
0264     help()
0265     return
0266   # else
0267   print("+Running command with tracing %s..." % sys.argv[1:])
0268   trace_command(sys.argv[1:])
0269 
0270 
0271 if __name__ == '__main__':
0272   main()
0273