Skip to content

Instantly share code, notes, and snippets.

@joshisa
Last active October 28, 2016 19:27
Show Gist options
  • Select an option

  • Save joshisa/9101d3e318e4f611a0d1a64ceb7aa71c to your computer and use it in GitHub Desktop.

Select an option

Save joshisa/9101d3e318e4f611a0d1a64ceb7aa71c to your computer and use it in GitHub Desktop.
Helps enumerate and summarize running notebooks on your Jupyter server
# Modified from http://stackoverflow.com/questions/34685825/jupyter-notebook-memory-usage-for-each-notebook
#
# Usage:
# Default Mode: Extrovert = False. Snoop only your own notebook instance
# df_mem_breakdown, df_mem_summary, ports = get_proc_info()
# df_mem_breakdown, df_mem_summary, ports = get_proc_info(False)
#
# Extrovert Mode: Snoop all of your sibling notebooks
# df_mem_breakdown, df_mem_summary, ports = get_proc_info(True)
#
!pip install --user psutil
!pip install --user tabulate
import os
import pwd
import psutil
import re
import string
import requests
import socket
import argparse
import tabulate
import pandas as pd
from collections import Counter
UID = 1
regex = re.compile(r'.+kernel-(.+)\.json')
port_regex = re.compile(r'port=(\d+)')
def get_proc_info(extrovert = False):
if extrovert:
pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
else:
pids = [str(os.getpid())]
# memory info from psutil.Process
df_mem = []
df_summary = []
# running ports
memsum = []
ports = []
processTypes = []
default_port = 8888
for pid in pids:
try:
ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
except IOError: # proc has already terminated
continue
# jupyter notebook processes
if len(ret) > 0 and ('jupyter-notebook' in ret or 'ipython notebook' in ret):
port_match = re.search(port_regex, ret)
if port_match:
port = port_match.group(1)
ports.append(int(port))
else:
ports.append(default_port)
default_port += 1
if len(ret) > 0 and ('jupyter' in ret or 'ipython' in ret) and 'kernel' in ret:
# kernel
kernel_ID = re.sub(regex, r'\1', ret)
kernel_ID = filter(lambda x: x in string.printable, kernel_ID)
# memory
process = psutil.Process(int(pid))
processTypes.append(process.name())
mem = process.memory_info()[0] / float(1e9)
memsum.append(mem)
# user name for pid
for ln in open('/proc/{0}/status'.format(int(pid))):
if ln.startswith('Uid:'):
uid = int(ln.split()[UID])
oname = str(uid)
try:
uname = pwd.getpwuid(uid).pw_name
except:
uname = "unknown"
# user, pid, memory, kernel_ID
df_mem.append([uname, pid, process.name(), mem, kernel_ID])
df_mem = pd.DataFrame(df_mem)
types = Counter(processTypes)
df_summary.append([types["R"], types["java"], types["python"], sum(memsum)])
df_summary = pd.DataFrame(df_summary)
df_summary.columns = ['R', 'java/Scala','Python', 'memory_GB (Sum)']
df_mem.columns = ['user', 'pid', 'process_Type', 'memory_GB', 'kernel_ID']
return df_mem, df_summary, ports
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment