Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:11

0001 #!/usr/bin/env python3
0002 import os
0003 import re
0004 import sys
0005 import atexit
0006 import tempfile
0007 import subprocess
0008 from shutil import copy, rmtree
0009 from collections import defaultdict
0010 
0011 # only needed to locate CMSSW
0012 import FWCore
0013 import FWCore.ParameterSet.Types
0014 
0015 OUTFILE_TREE = "calltree"
0016 OUTFILE_FILES = "callfiles"
0017 FLAT_OUTPUT = False
0018 # cmsRun alsways gets special handling, but also trace these scripts
0019 WRAP_SCRIPTS = ["cmsDriver.py" ]
0020 IGNORE_DIRS = [
0021   os.path.dirname(os.__file__),
0022   FWCore.ParameterSet.Types.__file__,
0023 ]
0024 STRIPPATHS = [ # we will add the base dir from CMSSWCALLBASE env var here
0025   os.environ["CMSSW_BASE"] + "/python/", os.environ["CMSSW_RELEASE_BASE"] + "/python/",
0026   os.environ["CMSSW_BASE"] + "/cfipython/", os.environ["CMSSW_RELEASE_BASE"] + "/cfipython/"]
0027 PREFIXINFO = []
0028 ARGV0 = "" # set in main
0029 
0030 def addprefixinfo(argv):
0031   cwd = os.path.abspath(os.getcwd())
0032   wf = re.match(".*/(\d+\.\d+)_", cwd)
0033   if wf: 
0034     PREFIXINFO.append("wf")
0035     PREFIXINFO.append(wf.groups()[0])
0036   online = re.match("(.*/)?(.*)_dqm_sourceclient-live_cfg\.py", argv[0])
0037   if online:
0038     PREFIXINFO.append("online")
0039     PREFIXINFO.append(online.groups()[1])
0040   step = re.match("(step\d+)_.*\.py", argv[0])
0041   if step:
0042     PREFIXINFO.append(step.groups()[0])
0043   processing = re.match("step\d+_.*(RECO|ALCA|HARVEST).*\.py", argv[0])
0044   if processing:
0045     PREFIXINFO.append(processing.groups()[0])
0046   if not PREFIXINFO:
0047     PREFIXINFO.append(argv[0])
0048 
0049 def setupenv():
0050   bindir = tempfile.mkdtemp()
0051   print("+Setting up in ", bindir)
0052   for s in WRAP_SCRIPTS:
0053     os.symlink(ARGV0, bindir + "/" + s)
0054   os.symlink(ARGV0, bindir + "/cmsRun")
0055   os.environ["PATH"] = bindir + ":" + os.environ["PATH"]
0056   os.environ["CMSSWCALLTREE"] = bindir + "/" + OUTFILE_TREE
0057   os.environ["CMSSWCALLFILES"] = bindir + "/" + OUTFILE_FILES
0058   os.environ["CMSSWCALLBASE"] = os.path.abspath(os.getcwd()) + "/"
0059   with open(os.environ["CMSSWCALLTREE"], "w") as f:
0060     pass
0061   with open(os.environ["CMSSWCALLFILES"], "w") as f:
0062     pass
0063   return bindir
0064 
0065 def cleanupenv(tmpdir):
0066   #with open(os.environ["CMSSWCALLTREE"], "a") as f:
0067   #  print("}", file=f)
0068   print("+Cleaning up ", tmpdir)
0069   copy(os.environ["CMSSWCALLTREE"], ".")
0070   copy(os.environ["CMSSWCALLFILES"], ".")
0071   rmtree(tmpdir)
0072 
0073 
0074 def trace_command(argv):
0075   tmpdir = None
0076   if not "CMSSWCALLTREE" in os.environ:
0077     tmpdir = setupenv()
0078 
0079   subprocess.call(argv)
0080 
0081   if tmpdir:
0082     cleanupenv(tmpdir)
0083 
0084 def formatfile(filename):
0085   filename = os.path.abspath(filename)
0086   for pfx in STRIPPATHS:
0087     if filename.startswith(pfx):
0088       filename = filename[len(pfx):]
0089   return filename
0090 
0091 def searchinpath(progname, path):
0092   # Search $PATH. There seems to be no pre-made function for this.
0093   for entry in path:
0094     file_path = os.path.join(entry, progname)
0095     if os.path.isfile(file_path):
0096       break
0097   if not os.path.isfile(file_path):
0098     print("+Cannot find program (%s) in modified $PATH (%s)." % (progname, path))
0099     sys.exit(1)
0100   print("+Found %s as %s in %s." % (progname, file_path, path))
0101   return file_path
0102 
0103 def writeoutput(callgraph, files):
0104   progname = ", ".join(PREFIXINFO)
0105   print("+Done running %s, writing output..." % progname)
0106 
0107   def format(func):
0108     filename, funcname = func
0109     return "%s::%s" % (formatfile(filename), funcname)
0110 
0111   def callpath(func):
0112     # climb up in the call graph until we find a node without callers (this is
0113     # the entry point, the traced call itself). There may be cycles, but any
0114     # node is reachable from the entry point, so no backtracking required.
0115     path = []
0116     seen = set()
0117     parents = {func}
0118     timeout = 100 # go no more than this deep
0119     while parents:
0120       if len(parents) == 1:
0121         func = next(iter(parents))
0122         seen.add(func)
0123         path.append(format(func))
0124       if len(parents) > 1:
0125         for func in parents:
0126           if not func in seen:
0127             break
0128         if func in seen:
0129           # somehow we got stuck in a loop and can't get out. So maybe
0130           # backtracking is needed in some situations?
0131           # Abort with a partial path for now.
0132           return path
0133         seen.add(func)
0134         path.append(format(func) + "+")
0135       parents = callgraph[func]
0136       timeout -= 1
0137       if timeout == 0:
0138         print(seen, path, parents, func)
0139         raise Exception('Call path too deep, aborting')
0140     return path[:-1]
0141 
0142   with open(os.environ["CMSSWCALLFILES"], "a") as outfile:
0143       for f in files:
0144         print("%s: %s" % (progname, formatfile(f)), file=outfile)
0145   with open(os.environ["CMSSWCALLTREE"], "a") as outfile:
0146     if FLAT_OUTPUT:
0147       for func in callgraph.keys():
0148         print("%s: %s 1" % (progname, ";".join(reversed(callpath(func)))), file=outfile)
0149     else:
0150       for func in callgraph.keys():
0151         for pfunc in callgraph[func]:
0152           print("%s: %s -> %s" % (progname, format(func), format(pfunc)), file=outfile)
0153 
0154 def trace_python(prog_argv, path):
0155   files = set()
0156   callgraph = defaultdict(lambda: set())
0157 
0158   def nop_trace(frame, why, arg):
0159     pass
0160 
0161   def tracefunc(frame, why, arg):
0162     if why == 'call':
0163       code = frame.f_code
0164       # compared to the `trace` module, we don't attempt to find class names here 
0165       filename = code.co_filename
0166 
0167       for d in IGNORE_DIRS:
0168         if filename.startswith(d):
0169           sys.settrace(nop_trace)
0170           return wait_for_return
0171 
0172       funcname = code.co_name
0173       code = frame.f_back.f_code
0174       p_filename = code.co_filename
0175       p_funcname = code.co_name
0176 
0177       files.add(filename)
0178       callgraph[(filename, funcname)].add((p_filename, p_funcname))
0179     return None
0180 
0181   def wait_for_return(frame, why, arg):
0182     if why == 'return':
0183       sys.settrace(tracefunc)
0184     return wait_for_return
0185 
0186   sys.argv = prog_argv
0187   progname = prog_argv[0]
0188 
0189 
0190   file_path = searchinpath(progname, path)
0191   try:
0192     with open(file_path) as fp:
0193       code = compile(fp.read(), progname, 'exec')
0194       # try to emulate __main__ namespace as much as possible
0195       globals = {
0196       '__file__': progname,
0197       '__name__': '__main__',
0198       '__package__': None,
0199       '__cached__': None,
0200       }
0201 
0202       # would be too easy if this covered all the cases...
0203       atexit.register(lambda: writeoutput(callgraph, files))
0204       # cmsDriver calls cmsRun via exec (execvpe specifically), so we also need
0205       # to hook that...
0206       old_execvpe = os.execvpe
0207       def exec_hook(*args):
0208         writeoutput(callgraph, files)
0209         old_execvpe(*args)
0210       os.execvpe = exec_hook
0211 
0212       # now turn on the traceing
0213       sys.settrace(tracefunc)
0214       try:
0215         exec(code, globals, globals)
0216       finally:
0217         sys.settrace(None)
0218 
0219   except OSError as err:
0220     print("+Cannot run file %r because: %s" % (sys.argv[0], err))
0221     sys.exit(1)
0222   except SystemExit:
0223     pass
0224   # this is not necessarily reached at all. 
0225   sys.exit(0)
0226 
0227 def help():
0228   print("Usage: %s <some cmssw commandline>" % (sys.argv[0]))
0229   print("  The given programs will be executed, instrumenting calls to %s and cmsRun." % (", ".join(WRAP_SCRIPTS)))
0230   print("  cmsRun will not actually run cmssw, but all the Python code will be executed and instrumentd. The results are written to the files `%s` and `%s` in the same directory." % (OUTFILE_FILES, OUTFILE_TREE))
0231   if FLAT_OUTPUT:
0232     print("  The callgraph output file can be processed with Brendan Gregg's FlameGraph tool.")
0233   else:
0234     print("  The callgraph output lists edges pointing from each function to the one calling it.")
0235 
0236   print("Examples:")
0237   print("  %s runTheMatrix.py -l 1000 --ibeos" % sys.argv[0])
0238   print(  "%s cmsRun rpc_dqm_sourceclient-live_cfg.py" % sys.argv[0])
0239 
0240 def main():
0241   print("+Running cmsswfiletrace...")
0242   global ARGV0
0243   ARGV0 = sys.argv[0]
0244   for s in WRAP_SCRIPTS:
0245     if sys.argv[0].endswith(s):
0246       print("+Wrapping %s..." % s)
0247       addprefixinfo(sys.argv)
0248       tmppath = os.path.dirname(sys.argv[0])
0249       path = filter(
0250         lambda s: not s.startswith(tmppath),
0251         os.environ["PATH"].split(":")
0252       )
0253       STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
0254       trace_python([s] + sys.argv[1:], path)
0255       return
0256   if sys.argv[0].endswith('cmsRun'):
0257       print("+Wrapping cmsRun...")
0258       addprefixinfo(sys.argv[1:])
0259       STRIPPATHS.append(os.environ["CMSSWCALLBASE"])
0260       trace_python(sys.argv[1:], ["."])
0261       return
0262   if len(sys.argv) <= 1:
0263     help()
0264     return
0265   # else
0266   print("+Running command with tracing %s..." % sys.argv[1:])
0267   trace_command(sys.argv[1:])
0268 
0269 
0270 if __name__ == '__main__':
0271   main()
0272