File indexing completed on 2024-04-06 12:10:08
0001
0002
0003 import os
0004 import collections
0005 import logging
0006 import resource
0007 import time
0008 import argparse
0009 import subprocess
0010 import signal
0011 import json
0012 import inspect
0013 import shutil
0014
0015 LOG_FORMAT='%(asctime)s: %(name)-20s - %(levelname)-8s - %(message)s'
0016 logging.basicConfig(format=LOG_FORMAT)
0017 log = logging.getLogger("mbProfile")
0018 log.setLevel(logging.INFO)
0019
0020 def read_procfs(ppath, only_ppid=True):
0021 def read(f):
0022 fp = os.path.join(ppath, f)
0023 with open(fp) as fd:
0024 return fd.read()
0025
0026 def read_status():
0027 st = {}
0028
0029 fp = os.path.join(ppath, "status")
0030 with open(fp) as fd:
0031 for line in fd.readlines():
0032 if not line: continue
0033
0034 key, value = line.split(":", 1)
0035 st[key] = value.strip()
0036
0037 return st
0038
0039 try:
0040 dct = {}
0041
0042 dct["statm"] = read("statm").strip()
0043 dct["stat"] = read("stat").strip()
0044 dct["cmdline"] = read("cmdline").strip().replace("\0", " ")
0045
0046 status = read_status()
0047 dct["status"] = status
0048 dct["pid"] = int(status["Pid"])
0049 dct["parent_pid"] = int(status["PPid"])
0050
0051 return dct
0052 except:
0053 log.warning("Exception in read_procfs.", exc_info=True)
0054 pass
0055
0056 def build_process_list():
0057 lst = os.listdir("/proc/")
0058 for f in lst:
0059 if not f.isdigit(): continue
0060
0061 proc = read_procfs(os.path.join("/proc", f))
0062 if proc:
0063 yield proc
0064
0065 def get_children(ppid):
0066 """ Select all processes which are descendant from ppid (exclusive). """
0067
0068 pid_dct = {}
0069 for proc in build_process_list():
0070 proc["_children"] = []
0071 pid_dct[proc["pid"]] = proc
0072
0073
0074 for pid in list(pid_dct.keys()):
0075 parent_pid = pid_dct[pid]["parent_pid"]
0076
0077 if parent_pid in pid_dct:
0078 pid_dct[parent_pid]["_children"].append(pid)
0079
0080
0081 if ppid is None or ppid not in pid_dct:
0082
0083 return []
0084
0085 accepted = []
0086 to_accept = collections.deque([ppid, ])
0087
0088 while to_accept:
0089 head = pid_dct[to_accept.popleft()]
0090
0091
0092 if head["pid"] != ppid:
0093 accepted.append(head)
0094
0095 to_accept.extend(head.get("_children", []))
0096 head["children"] = head["_children"]
0097 del head["_children"]
0098
0099
0100
0101
0102 return accepted
0103
0104 class Profile(object):
0105 def __init__(self, args):
0106 self.time = time.time()
0107 self.final = False
0108 self.pid = None
0109 self.known_pids = {}
0110
0111 self.ru = {}
0112 self.ru_diff = {}
0113
0114 self._offset_ru = None
0115 self._args = args
0116
0117 if self._args.file:
0118 self._file = open(self._args.file, "w")
0119 else:
0120 self._file = None
0121
0122 self.update()
0123
0124 def update_ru(self):
0125 fields_to_subtract = (
0126 "ru_utime", "ru_stime", "ru_maxrss", "ru_minflt", "ru_majflt", "ru_nswap",
0127 "ru_inblock", "ru_oublock", "ru_msgsnd", "ru_msgrcv", "ru_nsignals", "ru_nvcsw", "ru_nivcsw",
0128 )
0129
0130 rusage = resource.getrusage(resource.RUSAGE_CHILDREN)
0131 self.ru = rusage
0132
0133 if self._offset_ru is None:
0134 self._offset_ru = rusage
0135
0136 for field in fields_to_subtract:
0137 current = getattr(self.ru, field)
0138 base = getattr(self._offset_ru, field)
0139
0140 self.ru_diff[field] = current - base
0141
0142
0143 def read_smaps(self, proc_dict):
0144 Private, Shared, Pss = 0, 0, 0
0145
0146 fp = os.path.join("/proc/%d" % proc_dict["pid"], "smaps")
0147 with open(fp) as fd:
0148 for line in fd.readlines():
0149 if line.startswith("Shared"):
0150 Shared += int(line.split()[1])
0151 elif line.startswith("Private"):
0152 Private += int(line.split()[1])
0153 elif line.startswith("Pss"):
0154 Pss += int(line.split()[1])
0155
0156 proc_dict["smaps_shared"] = Shared * 1024
0157 proc_dict["smaps_private"] = Private * 1024
0158 proc_dict["smaps_pss"] = Pss * 1024
0159
0160 def update_proc(self):
0161 procs = get_children(os.getpid())
0162
0163
0164
0165 for proc in procs:
0166 try:
0167 self.read_smaps(proc)
0168 except:
0169 log.warning("Exception in read_smaps.", exc_info=True)
0170
0171
0172 stopped = set(self.known_pids.keys())
0173 for proc in procs:
0174 proc["running"] = True
0175
0176 pid = proc["pid"]
0177 self.known_pids[pid] = proc
0178
0179 if pid in stopped:
0180 stopped.remove(pid)
0181
0182 for pid in stopped:
0183 self.known_pids[pid]["running"] = False
0184
0185 def update(self):
0186 self.time = time.time()
0187
0188 self.update_ru()
0189 self.update_proc()
0190
0191 if self._file:
0192 json.dump(self.to_dict(), self._file)
0193 self._file.write("\n")
0194 self._file.flush()
0195
0196 log.info("Written profile to: %s, took=%.03f", self._args.file, time.time() - self.time)
0197
0198 def to_dict(self):
0199 dct = collections.OrderedDict()
0200 dct['time'] = self.time
0201 dct['pid'] = self.pid
0202 dct['final'] = self.final
0203
0204 dct['ru_diff'] = dict(self.ru_diff)
0205 dct['ru'] = dict((k, v) for k, v in inspect.getmembers(self.ru) if k.startswith('ru_'))
0206 dct['known_pids'] = dict(self.known_pids)
0207 return dct
0208
0209 def finish(self):
0210 self.final = True
0211 self.update()
0212
0213 if self._file:
0214 self._file.close()
0215 self._file = None
0216 else:
0217 log.info("ru_diff: %s", self.ru_diff)
0218
0219
0220 ALARM_TIMER = 1
0221 ALARM_P_OBJECT = None
0222
0223 def handle_alarm(num, frame):
0224 if ALARM_P_OBJECT:
0225 ALARM_P_OBJECT.update()
0226
0227 signal.alarm(ALARM_TIMER)
0228
0229 def run_and_monitor(args):
0230 profile = Profile(args)
0231
0232 proc = subprocess.Popen(args.pargs)
0233 profile.pid = proc.pid
0234
0235 global ALARM_P_OBJECT
0236 ALARM_P_OBJECT = profile
0237
0238 signal.signal(signal.SIGALRM, handle_alarm)
0239 signal.alarm(ALARM_TIMER)
0240
0241 proc.wait()
0242 profile.finish()
0243
0244 def find_and_write_html(p, args):
0245
0246 if p and not os.path.exists(p):
0247 os.makedirs(p)
0248
0249 html_paths = [
0250 os.path.join(os.getenv("CMSSW_BASE"),"src/DQMServices/Components/data/html"),
0251 os.path.join(os.getenv("CMSSW_RELEASE_BASE"), "src/DQMServices/Components/data/html"),
0252 ]
0253
0254 def find_file(f):
0255 fails = []
0256 for p in html_paths:
0257 x = os.path.join(p, f)
0258 if os.path.exists(x):
0259 return x
0260 else:
0261 fails.append(x)
0262
0263 log.warning("Could not find html file: %s (%s)", f, fails)
0264
0265 for f in ['mbGraph.js', 'mbGraph.html']:
0266 target_fn = os.path.join(p, f)
0267 source_fn = find_file(f)
0268 if source_fn:
0269 log.info("Copying %s to %s", source_fn, target_fn)
0270 shutil.copyfile(source_fn, target_fn)
0271
0272
0273 target_fn = os.path.join(p, "mbGraph.json")
0274 log.info("Creating %s", target_fn)
0275 with open(target_fn, "w") as fp:
0276 dct = {
0277 "file": os.path.basename(args.file),
0278 "interval": args.i,
0279 "env": {
0280 "CMSSW_GIT_HASH": os.getenv("CMSSW_GIT_HASH"),
0281 "CMSSW_RELEASE_BASE": os.getenv("CMSSW_RELEASE_BASE"),
0282 "SCRAM_ARCH": os.getenv("SCRAM_ARCH"),
0283 },
0284 }
0285
0286 json.dump(dct, fp, indent=2)
0287
0288
0289 if __name__ == "__main__":
0290 parser = argparse.ArgumentParser(description="Profile child processes and produce data for rss and such graphs.")
0291 parser.add_argument("-f", "--file", type=str, default="performance.json", help="Filename to write.", metavar="performance.json")
0292 parser.add_argument("-i", type=int, help="Time interval between profiles.", default=15)
0293 parser.add_argument('-q', action='store_true', help="Reduce logging.")
0294 parser.add_argument('-w', action='store_true', help="Write html helper files for rendering the performance file.")
0295 parser.add_argument('pargs', nargs=argparse.REMAINDER)
0296
0297 args = parser.parse_args()
0298
0299 if not args.pargs:
0300 parser.print_help()
0301 sys.exit(-1)
0302 elif args.pargs[0] == "--":
0303
0304 args.pargs = args.pargs[1:]
0305
0306 ALARM_TIMER = args.i
0307
0308 if args.q:
0309 log.setLevel(logging.WARNING)
0310
0311 if args.w:
0312 p = os.path.dirname(args.file)
0313 find_and_write_html(p, args)
0314
0315
0316
0317
0318
0319 run_and_monitor(args)
0320