Last active
July 30, 2018 20:39
-
-
Save kylebarron/1de85eda04d3ee0114e1c9bc094b8226 to your computer and use it in GitHub Desktop.
Email user with notification if he/she is using all memory on a node
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" | |
--------------------------------------------------------------------- | |
Program: bigmem.py | |
Author: Kyle Barron <[email protected]> | |
Created: 3/21/2018, 11:56:17 AM | |
Updated: 3/21/2018, 11:56:20 AM | |
Purpose: Email user with notification if he/she is using all memory on a node | |
""" | |
import psutil | |
import pandas as pd | |
import socket | |
from os import system | |
from pathlib import Path | |
from os.path import expanduser | |
from textwrap import dedent | |
from datetime import datetime | |
def main(mem_pct=95): | |
if psutil.virtual_memory().percent >= mem_pct: | |
find_memory_usage() | |
def find_memory_usage(): | |
"""Find memory usage on this node by user | |
Calls send_email() if offending user has not been notified in the last day | |
""" | |
all_processes = {'name': [], 'vms': [], 'user': []} | |
for p in psutil.process_iter(): | |
with p.oneshot(): | |
all_processes['name'].append(p.name()) | |
all_processes['vms'].append(p.memory_info().vms) | |
all_processes['user'].append(p.username()) | |
df = pd.DataFrame(all_processes) | |
df_user = df.groupby('user')['vms'].aggregate('sum').to_frame() | |
df_user['pct_mem'] = df_user['vms'] / psutil.virtual_memory().total * 100 | |
df_user['pct_mem'] = (df_user['pct_mem']).astype(int) | |
df_user = df_user.sort_values('pct_mem', ascending=False) | |
df_user = df_user.iloc[0] | |
# Get name of program with highest memory usage | |
df = df[df['user'] == df_user.name] | |
df = df.sort_values('vms', ascending=False) | |
offending_program = df['name'].iloc[0] | |
# Create ~/.email_memory_notifications.csv if not exists | |
Path(expanduser('~/.email_memory_notifications.csv')).touch() | |
# Check if user has already been notified within the last day | |
notif = pd.read_csv( | |
expanduser('~/.email_memory_notifications.csv'), | |
header=None, | |
names=['user', 'time']) | |
notif['time'] = pd.to_datetime(notif['time'], format='%Y-%m-%d %H:%M:%S.%f') | |
notif['now'] = pd.to_datetime(datetime.now()) | |
notif['diff'] = (notif['now'] - notif['time']).dt.days | |
# If notified within the last day, exit program | |
if df_user.name in notif[notif['diff'] == 0]['user'].values: | |
return | |
# Otherwise, send email to user | |
if df_user['pct_mem'] >= 75: | |
send_email( | |
to_user=df_user.name, | |
offending_program=offending_program, | |
mem_use='very high') | |
elif df_user['pct_mem'] >= 50: | |
send_email( | |
to_user=df_user.name, | |
offending_program=offending_program, | |
mem_use='high') | |
def send_email(to_user, offending_program, mem_use): | |
"""Send email to user | |
Args: | |
to_user: username of person to send email to | |
offending_program: name of program with highest memory usage | |
mem_use: 'very_high' (or, not yet implemented, 'high', etc) | |
""" | |
hostname = socket.gethostname() | |
very_high_msg = f"""\ | |
Your jobs on {hostname}, especially {offending_program}, are asking for | |
more memory than is available on {hostname}, and are therefore running | |
very slowly. They may not complete in finite time. | |
You should probably kill them and restart them on a machine with | |
more available memory. You can use "showload" to find a machine with | |
available memory and "top -a" to see how your job is doing on the | |
current machine. | |
Mohan and I are always available to discuss ways to make Stata | |
programs | |
more efficient. Also see: | |
http://www.nber.org/stata/efficient | |
dan feenberg | |
617-863-0343 | |
This is an automated email.""" | |
if mem_use == 'very high': | |
mail_msg = dedent(very_high_msg) | |
elif mem_use == 'high': | |
# Currently does nothing | |
return | |
# Alternately could have a separate message for someone | |
# who is using between 50-75% of memory, where someone else | |
# is using the rest | |
# mail_msg = high_msg | |
else: | |
return | |
# mail_to = to_user + '@nber.org' | |
mail_to = 'feenberg+' + to_user + '@nber.org' | |
mail_sub = 'Out of Memory Notification' | |
mail_cmd = f'echo \'{mail_msg}\' ' | |
mail_cmd += f'| mail -s "{mail_sub}" {mail_to}' | |
system(mail_cmd) | |
with open(expanduser('~/.email_memory_notifications.csv'), 'a') as file: | |
file.write(f'{to_user},{datetime.now()}\n') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment