Last active
October 28, 2016 19:27
-
-
Save joshisa/9101d3e318e4f611a0d1a64ceb7aa71c to your computer and use it in GitHub Desktop.
Helps enumerate and summarize running notebooks on your Jupyter server
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Modified from http://stackoverflow.com/questions/34685825/jupyter-notebook-memory-usage-for-each-notebook | |
| # | |
| # Usage: | |
| # Default Mode: Extrovert = False. Snoop only your own notebook instance | |
| # df_mem_breakdown, df_mem_summary, ports = get_proc_info() | |
| # df_mem_breakdown, df_mem_summary, ports = get_proc_info(False) | |
| # | |
| # Extrovert Mode: Snoop all of your sibling notebooks | |
| # df_mem_breakdown, df_mem_summary, ports = get_proc_info(True) | |
| # | |
| !pip install --user psutil | |
| !pip install --user tabulate | |
| import os | |
| import pwd | |
| import psutil | |
| import re | |
| import string | |
| import requests | |
| import socket | |
| import argparse | |
| import tabulate | |
| import pandas as pd | |
| from collections import Counter | |
| UID = 1 | |
| regex = re.compile(r'.+kernel-(.+)\.json') | |
| port_regex = re.compile(r'port=(\d+)') | |
| def get_proc_info(extrovert = False): | |
| if extrovert: | |
| pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] | |
| else: | |
| pids = [str(os.getpid())] | |
| # memory info from psutil.Process | |
| df_mem = [] | |
| df_summary = [] | |
| # running ports | |
| memsum = [] | |
| ports = [] | |
| processTypes = [] | |
| default_port = 8888 | |
| for pid in pids: | |
| try: | |
| ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read() | |
| except IOError: # proc has already terminated | |
| continue | |
| # jupyter notebook processes | |
| if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret): | |
| port_match = re.search(port_regex, ret) | |
| if port_match: | |
| port = port_match.group(1) | |
| ports.append(int(port)) | |
| else: | |
| ports.append(default_port) | |
| default_port += 1 | |
| if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret: | |
| # kernel | |
| kernel_ID = re.sub(regex, r'\1', ret) | |
| kernel_ID = filter(lambda x: x in string.printable, kernel_ID) | |
| # memory | |
| process = psutil.Process(int(pid)) | |
| processTypes.append(process.name()) | |
| mem = process.memory_info()[0] / float(1e9) | |
| memsum.append(mem) | |
| # user name for pid | |
| for ln in open('/proc/{0}/status'.format(int(pid))): | |
| if ln.startswith('Uid:'): | |
| uid = int(ln.split()[UID]) | |
| oname = str(uid) | |
| try: | |
| uname = pwd.getpwuid(uid).pw_name | |
| except: | |
| uname = "unknown" | |
| # user, pid, memory, kernel_ID | |
| df_mem.append([uname, pid, process.name(), mem, kernel_ID]) | |
| df_mem = pd.DataFrame(df_mem) | |
| types = Counter(processTypes) | |
| df_summary.append([types["R"], types["java"], types["python"], sum(memsum)]) | |
| df_summary = pd.DataFrame(df_summary) | |
| df_summary.columns = ['R', 'java/Scala','Python', 'memory_GB (Sum)'] | |
| df_mem.columns = ['user', 'pid', 'process_Type', 'memory_GB', 'kernel_ID'] | |
| return df_mem, df_summary, ports |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment