Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:10:08

0001 #!/usr/bin/env python3
0002 
0003 import os
0004 import collections
0005 import logging
0006 import resource
0007 import time
0008 import argparse
0009 import subprocess
0010 import signal
0011 import json
0012 import inspect
0013 import shutil
0014 
0015 LOG_FORMAT='%(asctime)s: %(name)-20s - %(levelname)-8s - %(message)s'
0016 logging.basicConfig(format=LOG_FORMAT)
0017 log = logging.getLogger("mbProfile")
0018 log.setLevel(logging.INFO)
0019 
0020 def read_procfs(ppath, only_ppid=True):
0021     def read(f):
0022         fp = os.path.join(ppath, f)
0023         with open(fp) as fd:
0024             return fd.read()
0025 
0026     def read_status():
0027         st = {}
0028 
0029         fp = os.path.join(ppath, "status")
0030         with open(fp) as fd:
0031             for line in fd.readlines():
0032                 if not line: continue
0033 
0034                 key, value = line.split(":", 1)
0035                 st[key] = value.strip()
0036 
0037         return st
0038 
0039     try:
0040         dct = {}
0041 
0042         dct["statm"] = read("statm").strip()
0043         dct["stat"] = read("stat").strip()
0044         dct["cmdline"] = read("cmdline").strip().replace("\0", " ")
0045 
0046         status = read_status()
0047         dct["status"] = status
0048         dct["pid"] = int(status["Pid"])
0049         dct["parent_pid"] = int(status["PPid"])
0050 
0051         return dct
0052     except:
0053         log.warning("Exception in read_procfs.", exc_info=True)
0054         pass
0055 
0056 def build_process_list():
0057     lst = os.listdir("/proc/")
0058     for f in lst:
0059         if not f.isdigit(): continue
0060 
0061         proc = read_procfs(os.path.join("/proc", f))
0062         if proc:
0063             yield proc
0064 
0065 def get_children(ppid):
0066     """ Select all processes which are descendant from ppid (exclusive). """
0067 
0068     pid_dct = {}
0069     for proc in build_process_list():
0070         proc["_children"] = []
0071         pid_dct[proc["pid"]] = proc
0072 
0073     # fill in children array
0074     for pid in list(pid_dct.keys()):
0075         parent_pid = pid_dct[pid]["parent_pid"]
0076 
0077         if parent_pid in pid_dct:
0078             pid_dct[parent_pid]["_children"].append(pid)
0079 
0080     # now just walk down the tree
0081     if ppid is None or ppid not in pid_dct:
0082         # process has quit, we exit
0083         return []
0084 
0085     accepted = []
0086     to_accept = collections.deque([ppid, ])
0087     
0088     while to_accept:
0089         head = pid_dct[to_accept.popleft()]
0090 
0091         # do not include the monitoring pid
0092         if head["pid"] != ppid:
0093             accepted.append(head)
0094 
0095         to_accept.extend(head.get("_children", []))
0096         head["children"] = head["_children"]
0097         del head["_children"]
0098 
0099         # deleting children breaks infinite loops
0100         # but Dima, can a process tree contain a loop? yes - via race-condition in reading procfs
0101 
0102     return accepted
0103 
0104 class Profile(object):
0105     def __init__(self, args):
0106         self.time = time.time()
0107         self.final = False
0108         self.pid = None 
0109         self.known_pids = {}
0110 
0111         self.ru = {}
0112         self.ru_diff = {}
0113 
0114         self._offset_ru = None
0115         self._args = args
0116 
0117         if self._args.file:
0118             self._file = open(self._args.file, "w")
0119         else:
0120             self._file = None
0121 
0122         self.update()
0123 
0124     def update_ru(self):
0125         fields_to_subtract = (
0126             "ru_utime", "ru_stime", "ru_maxrss", "ru_minflt", "ru_majflt", "ru_nswap",
0127             "ru_inblock", "ru_oublock", "ru_msgsnd", "ru_msgrcv", "ru_nsignals", "ru_nvcsw", "ru_nivcsw",
0128         )
0129 
0130         rusage = resource.getrusage(resource.RUSAGE_CHILDREN)
0131         self.ru = rusage
0132 
0133         if self._offset_ru is None:
0134             self._offset_ru = rusage
0135 
0136         for field in fields_to_subtract:
0137             current = getattr(self.ru, field)
0138             base = getattr(self._offset_ru, field)
0139 
0140             self.ru_diff[field] = current - base
0141 
0142     # this is taken from: http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py
0143     def read_smaps(self, proc_dict):
0144         Private, Shared, Pss = 0, 0, 0
0145  
0146         fp = os.path.join("/proc/%d" % proc_dict["pid"], "smaps")
0147         with open(fp) as fd:
0148             for line in fd.readlines():
0149                 if line.startswith("Shared"):
0150                     Shared += int(line.split()[1])
0151                 elif line.startswith("Private"):
0152                     Private += int(line.split()[1])
0153                 elif line.startswith("Pss"):
0154                     Pss += int(line.split()[1])
0155     
0156         proc_dict["smaps_shared"] = Shared * 1024
0157         proc_dict["smaps_private"] = Private * 1024
0158         proc_dict["smaps_pss"] = Pss * 1024
0159 
0160     def update_proc(self):
0161         procs = get_children(os.getpid())
0162 
0163         # we can only do it here, permision-wise
0164         # ie only for owned processes
0165         for proc in procs:
0166             try:
0167                 self.read_smaps(proc)
0168             except:
0169                 log.warning("Exception in read_smaps.", exc_info=True)
0170 
0171         # we need to mark not-running ones as such
0172         stopped = set(self.known_pids.keys())
0173         for proc in procs:
0174             proc["running"] = True
0175 
0176             pid = proc["pid"]
0177             self.known_pids[pid] = proc
0178 
0179             if pid in stopped:
0180                 stopped.remove(pid)
0181 
0182         for pid in stopped:
0183             self.known_pids[pid]["running"] = False
0184 
0185     def update(self):
0186         self.time = time.time()
0187 
0188         self.update_ru()
0189         self.update_proc()
0190 
0191         if self._file:
0192             json.dump(self.to_dict(), self._file)
0193             self._file.write("\n")
0194             self._file.flush()
0195 
0196         log.info("Written profile to: %s, took=%.03f", self._args.file, time.time() - self.time)
0197 
0198     def to_dict(self):
0199         dct = collections.OrderedDict()
0200         dct['time']         = self.time
0201         dct['pid']          = self.pid
0202         dct['final']        = self.final
0203         
0204         dct['ru_diff']      = dict(self.ru_diff)
0205         dct['ru']           = dict((k, v) for k, v in inspect.getmembers(self.ru) if k.startswith('ru_'))
0206         dct['known_pids']   = dict(self.known_pids)
0207         return dct
0208     
0209     def finish(self):
0210         self.final = True
0211         self.update()
0212 
0213         if self._file:
0214             self._file.close()
0215             self._file = None
0216         else:
0217             log.info("ru_diff: %s", self.ru_diff)
0218 
0219 
0220 ALARM_TIMER = 1
0221 ALARM_P_OBJECT = None
0222 
0223 def handle_alarm(num, frame):
0224     if ALARM_P_OBJECT:
0225         ALARM_P_OBJECT.update()
0226 
0227     signal.alarm(ALARM_TIMER)
0228 
0229 def run_and_monitor(args):
0230     profile = Profile(args)
0231 
0232     proc = subprocess.Popen(args.pargs)
0233     profile.pid = proc.pid
0234 
0235     global ALARM_P_OBJECT
0236     ALARM_P_OBJECT = profile
0237 
0238     signal.signal(signal.SIGALRM, handle_alarm)
0239     signal.alarm(ALARM_TIMER)
0240 
0241     proc.wait()
0242     profile.finish()
0243 
0244 def find_and_write_html(p, args):
0245     # create the dir if necessary
0246     if p and not os.path.exists(p):
0247         os.makedirs(p)
0248 
0249     html_paths = [
0250         os.path.join(os.getenv("CMSSW_BASE"),"src/DQMServices/Components/data/html"),
0251         os.path.join(os.getenv("CMSSW_RELEASE_BASE"), "src/DQMServices/Components/data/html"),
0252     ]
0253 
0254     def find_file(f):
0255         fails = []
0256         for p in html_paths:
0257             x = os.path.join(p, f)
0258             if os.path.exists(x):
0259                 return x
0260             else:
0261                 fails.append(x)
0262 
0263         log.warning("Could not find html file: %s (%s)", f, fails)
0264 
0265     for f in ['mbGraph.js', 'mbGraph.html']:
0266         target_fn = os.path.join(p, f)
0267         source_fn = find_file(f)
0268         if source_fn:
0269             log.info("Copying %s to %s", source_fn, target_fn)
0270             shutil.copyfile(source_fn, target_fn)
0271 
0272     # create json file
0273     target_fn = os.path.join(p, "mbGraph.json")
0274     log.info("Creating %s", target_fn)
0275     with open(target_fn, "w") as fp:
0276         dct = {
0277             "file": os.path.basename(args.file),
0278             "interval": args.i,
0279             "env": {
0280                 "CMSSW_GIT_HASH": os.getenv("CMSSW_GIT_HASH"),
0281                 "CMSSW_RELEASE_BASE": os.getenv("CMSSW_RELEASE_BASE"),
0282                 "SCRAM_ARCH": os.getenv("SCRAM_ARCH"),
0283             },
0284         }
0285 
0286         json.dump(dct, fp, indent=2)
0287 
0288 
0289 if __name__ == "__main__":
0290     parser = argparse.ArgumentParser(description="Profile child processes and produce data for rss and such graphs.")
0291     parser.add_argument("-f", "--file", type=str, default="performance.json", help="Filename to write.", metavar="performance.json")
0292     parser.add_argument("-i", type=int, help="Time interval between profiles.", default=15)
0293     parser.add_argument('-q', action='store_true', help="Reduce logging.")
0294     parser.add_argument('-w', action='store_true', help="Write html helper files for rendering the performance file.")
0295     parser.add_argument('pargs', nargs=argparse.REMAINDER)
0296 
0297     args = parser.parse_args()
0298 
0299     if not args.pargs:
0300         parser.print_help()
0301         sys.exit(-1)
0302     elif args.pargs[0] == "--":
0303         # compat with 2.6
0304         args.pargs = args.pargs[1:]
0305 
0306     ALARM_TIMER = args.i
0307 
0308     if args.q:
0309         log.setLevel(logging.WARNING)
0310 
0311     if args.w:
0312         p = os.path.dirname(args.file)
0313         find_and_write_html(p, args)
0314 
0315     ## do some signal magic
0316     #signal.signal(signal.SIGINT, handle_signal)
0317     #signal.signal(signal.SIGTERM, handle_signal)
0318 
0319     run_and_monitor(args)
0320