Last active
May 2, 2018 09:54
-
-
Save microlinux/0b3f47cb657cb61bb89b to your computer and use it in GitHub Desktop.
OpenVZ container abusive process killer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
nfcpud: a daemon for killing abusive container processes on openvz nodes | |
requires: python daemon module (centos: yum install python-daemon) | |
ncpud measures average process cpu usage over 5 second intervals. based on | |
parameters defined in config files, processes are killed if they meet any of 3 | |
criteria. processes belonging to the node itself are exempted. | |
cpu usage is measured relative to total available cpu power. if a process is | |
measured at 50% cpu usage, the remaining 50% could be either idle or used by | |
other processes. | |
to run, execute nfcpud.py | |
to install, create the following directory structure: | |
/opt/nforge/ | |
/opt/nforge/bin/ | |
/opt/nforge/etc/nfcpud/ | |
/opt/nforge/log/ | |
/opt/nforge/run/ | |
activity is logged to /opt/nforge/log/nfcpud.log (dont't forget to rotate!) | |
create banned.conf and nfcpud.conf in /opt/nforge/etc/nfcpud/ | |
place nfcpud.py in /opt/nforge/bin/ | |
banned.conf: this file is not required, processes listed one per line are | |
killed immediately | |
e.g. minecraft | |
java | |
superduperircbouncer | |
nfcpud.conf: this file is required, all listed variables must be defined | |
defines user and group to run as, must be able to kill any | |
process | |
e.g. user = root | |
group = root | |
defines two criteria for killing processes, hard limit and soft | |
limit | |
proccesses exceeding the "hard limit" are killed immediately | |
e.g. hard_limit = 60 | |
this would immediately kill processes exceeding 60% cpu | |
usage | |
processes exceeding the "soft limit" X times within Y checks are | |
killed, allows processes to burst between the soft and hard limits | |
e.g. soft_limit = 40 | |
soft_limit_count = 6 | |
soft_limit_checks = 24 | |
this would kill processes bursting over 40% of cpu usage 6 | |
times in 120 seconds (5 secs x 24) before being killed, hard | |
limit still applies | |
""" | |
from collections import namedtuple | |
from daemon import DaemonContext | |
from glob import glob | |
from grp import getgrnam | |
from lockfile import FileLock | |
from os import kill | |
from os.path import isfile | |
from pwd import getpwnam | |
from re import match | |
from signal import SIGKILL | |
from sys import argv, stderr, stdout | |
from textwrap import TextWrapper | |
from time import sleep, strftime | |
from traceback import format_exc | |
class Logger(object): | |
def __init__(self, file): | |
self.buffer = '' | |
self.file = file | |
with open(self.file, 'a+') as file: | |
pass | |
def __del__(self): | |
self.flush() | |
def flush(self): | |
if self.buffer: | |
with open(self.file, 'a+') as file: | |
file.write(self.buffer) | |
self.buffer = '' | |
def write(self, message, flush=True): | |
time = strftime('%m/%d %H:%M:%S') | |
wrapper = TextWrapper(subsequent_indent=' ', width=100) | |
if isinstance(message, str): | |
message = message.splitlines() | |
for line in message: | |
for wrapped in wrapper.wrap('%s %s' % (time, line)): | |
self.buffer = self.buffer + '%s\n' % wrapped | |
if flush: | |
self.flush() | |
class nfcpud(object): | |
def __init__(self): | |
if isfile('/opt/nforge/run/nfcpud.lock'): | |
print 'existing lock file found' | |
exit(1) | |
self.load_config('/opt/nforge/etc/nfcpud/nfcpud.conf') | |
self.context = DaemonContext(working_directory='/opt/nforge/', | |
umask=0o002, | |
pidfile=FileLock('/opt/nforge/run/nfcpud'), | |
uid=getpwnam(self.config.user).pw_uid, | |
gid=getgrnam(self.config.group).gr_gid, | |
stdout=stdout) | |
def load_config(self, config_file): | |
vars = [] | |
vals = [] | |
with open(config_file) as file: | |
for line in file: | |
line = line.strip() | |
if line and not line.startswith('#'): | |
line = map(str.strip, line.split('=')) | |
if match('^[0-9]+$', line[1]): | |
line[1] = int(line[1]) | |
elif match('^[0-9]+\.[0-9]+$', line[1]): | |
line[1] = float(line[1]) | |
else: | |
line[1] = str(line[1]) | |
vars.append(line[0]) | |
vals.append(line[1]) | |
self.config = namedtuple('Config', vars)._make(vals) | |
def banned_kill(self): | |
with open('/opt/nforge/etc/nfcpud/banned.conf') as file: | |
banned = map(str.strip, file.readlines()) | |
for pid, info in self.procs.items(): | |
if info['prog'] in banned: | |
try: | |
kill(pid, SIGKILL) | |
self.ban_killed[pid] = info | |
del self.procs[pid] | |
except: | |
pass | |
def get_procs(self): | |
for dir in glob('/proc/[0-9]*'): | |
try: | |
with open(dir + '/status') as file: | |
status = map(str.split, file.readlines()) | |
if len(status) == 46: | |
with open(dir + '/cmdline') as file: | |
ctid = int(status[11][1]) | |
if ctid != 0: | |
self.procs[int(status[3][1])] = {'prog': status[0][1], | |
'ctid': ctid, | |
'cmd': file.read().replace('\x00', ' ')} | |
except: | |
pass | |
def get_sys_jiff(self): | |
with open('/proc/stat') as file: | |
cpu = map(float, map(str.strip, file.readline().split()[1:])) | |
return (sum(cpu), cpu[2]) | |
def get_proc_jiff(self): | |
jiffs = {} | |
for pid in self.procs: | |
try: | |
with open('/proc/%s/stat' % pid) as file: | |
data = file.read().split() | |
jiffs[pid] = float(data[13]) + float(data[14]) | |
except: | |
pass | |
return jiffs | |
def get_stats(self, period=5): | |
total_jiff_1, idle_jiff_1 = self.get_sys_jiff() | |
proc_jiff_1 = self.get_proc_jiff() | |
sleep(5) | |
total_jiff_2, idle_jiff_2 = self.get_sys_jiff() | |
proc_jiff_2 = self.get_proc_jiff() | |
total_jiff = total_jiff_2 - total_jiff_1 | |
idle_jiff = idle_jiff_2 - idle_jiff_1 | |
for pid in self.procs.keys(): | |
if pid in proc_jiff_1 and pid in proc_jiff_2: | |
self.procs[pid]['cpu'] = round((proc_jiff_2[pid] - proc_jiff_1[pid]) / total_jiff * 100.0, 1) | |
else: | |
del self.procs[pid] | |
self.sys_cpu = round(100.0 - (idle_jiff / total_jiff * 100.0), 1) | |
def hard_limit_kill(self): | |
for pid, info in self.procs.items(): | |
if info['cpu'] > self.config.hard_limit: | |
try: | |
kill(pid, SIGKILL) | |
self.hard_limit_killed[pid] = info | |
del self.procs[pid] | |
except: | |
pass | |
def soft_limit_kill(self): | |
expiry = self.run_id - self.config.soft_limit_checks | |
for pid, info in self.procs.items(): | |
if info['cpu'] > self.config.soft_limit: | |
if pid in self.history: | |
self.history[pid].append([self.run_id, info['cpu']]) | |
else: | |
self.history[pid] = [[self.run_id, info['cpu']]] | |
for pid in self.history.keys(): | |
expired = 0 | |
for i in xrange(len(self.history[pid])): | |
if self.history[pid][i][0] < expiry: | |
expired += 1 | |
else: | |
break | |
if expired > 0: | |
self.history[pid] = self.history[pid][expired:] | |
for pid in self.history.keys(): | |
entry_count = len(self.history[pid]) | |
if entry_count > self.config.soft_limit_count: | |
try: | |
kill(pid, SIGKILL) | |
self.soft_limit_killed[pid] = self.procs[pid] | |
self.soft_limit_killed[pid]['entries'] = self.history[pid] | |
del self.procs[pid] | |
del self.history[pid] | |
except: | |
pass | |
elif entry_count == 0: | |
del self.history[pid] | |
def run(self): | |
self.run_log = Logger('/opt/nforge/log/nfcpud.log') | |
self.debug_log = Logger('/opt/nforge/log/nfcpud_debug.log') | |
self.history = {} | |
self.run_id = 1 | |
while True: | |
self.ban_killed = {} | |
self.hard_limit_killed = {} | |
self.soft_limit_killed = {} | |
self.procs = {} | |
self.run_log.write('starting check %s' % self.run_id) | |
self.get_procs() | |
self.run_log.write('found %s processes' % len(self.procs)) | |
if isfile('/opt/nforge/etc/nfcpud/banned.conf'): | |
self.banned_kill() | |
if len(self.ban_killed) > 0: | |
for pid, info in self.ban_killed.items(): | |
self.run_log.write('killed banned process \'%s\' in container %s' % (info['prog'], info['ctid'])) | |
self.get_stats() | |
self.hard_limit_kill() | |
if len(self.hard_limit_killed) > 0: | |
for pid, info in self.hard_limit_killed.items(): | |
self.run_log.write('hard killed process \'%s\' in container %s at %s%% cpu' % (info['prog'], info['ctid'], info['cpu'])) | |
self.soft_limit_kill() | |
if len(self.soft_limit_killed) > 0: | |
for pid, info in self.soft_limit_killed.items(): | |
self.run_log.write('soft killed process \'%s\' in container %s' % (info['prog'], info['ctid'])) | |
self.run_log.write('checks complete') | |
self.run_id += 1 | |
if __name__ == '__main__': | |
try: | |
daemon = nfcpud() | |
with daemon.context: | |
daemon.run() | |
except: | |
print format_exc() | |
exit(254) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment