Last active
August 29, 2015 14:02
-
-
Save Packetslave/186a2d3c916c61968f7b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
""" | |
Twitter Coding Challenge | |
#SREcon 2014 | |
Collect samples of Linux kernel network statistics and report the minimum, | |
maximum, and average delta for each over a given interval. | |
Uses the Google gflags module for parsing command-line arguments. | |
Available on PyPI (pip install python-gflags) | |
Usage: netstat.py [options] | |
--stats: comma-separated list of stats to sample | |
(default: '') | |
--count: samples to collect | |
(default: '5') | |
(an integer) | |
--interval: seconds to sleep between samples | |
(default: '1') | |
(an integer) | |
--[no]names: include the name of each stat in the output | |
(default: 'false') | |
--[no]debug: log debug info | |
(default: 'false') | |
--netstat_file: for testing | |
(default: '/proc/net/netstat') | |
The output is designed to be machine readable: | |
$ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1 | |
52 312 182 | |
100 732 395 | |
$ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1 --names | |
InOctets 52 312 182 | |
OutOctets 100 732 395 | |
Written and tested using Python 2.7 using PEP8 style. Python 3 is untested. | |
""" | |
__author__ = 'Brian Landers <[email protected]>' | |
import collections | |
import itertools | |
import logging | |
import sys | |
import time | |
import gflags | |
FLAGS = gflags.FLAGS | |
gflags.DEFINE_string('stats', '', 'comma-separated list of stats to sample') | |
gflags.DEFINE_integer('interval', 1, 'seconds to sleep between samples') | |
gflags.DEFINE_integer('count', 5, 'samples to collect') | |
gflags.DEFINE_boolean( | |
'names', False, 'include the name of each stat in the output') | |
gflags.DEFINE_string('netstat_file', '/proc/net/netstat', 'for testing') | |
gflags.DEFINE_boolean('debug', False, 'log debug info') | |
def get_raw_stats(filename): | |
"""Read the kernel network stats from /proc and return them in a dict. | |
Note that this makes several assumptions about the format of the data | |
in /proc/net/netstat. Real production code should probably be more | |
paranoid and/or generic. Tested on Ubuntu 12.04 LTS. | |
Args: | |
filename: path to the file to read, for testing in isolation | |
Returns: | |
dict (string->int) mapping stat to current value | |
""" | |
with open(filename) as stats: | |
# Read the file in one shot so we get a consistent view | |
contents = stats.readlines() | |
# Note: you could do this in a one-liner, inside the dict() below, | |
# but the return statement is already a bit ugly. Would be nice if | |
# there was a generator version of str.split() to avoid creating | |
# those lists needlessly, but http://bugs.python.org/issue17343 | |
# is still being debated. | |
tcp_fields = contents[0].split()[1:] | |
ip_fields = contents[2].split()[1:] | |
tcp_values = [int(x) for x in contents[1].split()[1:]] | |
ip_values = [int(x) for x in contents[3].split()[1:]] | |
return dict( | |
itertools.chain( | |
itertools.izip(tcp_fields, tcp_values), | |
itertools.izip(ip_fields, ip_values))) | |
def sample(filename, stats, interval, count): | |
"""Collect each kernel stat and track the deltas. | |
Note that the 'all' value for each kernel stat will be one less than | |
'count', since the first sample will have no delta. | |
Args: | |
filename: path to the file to read, for testing in isolation | |
stats: list of kernel stats to sample | |
interval: time in seconds between each sample | |
count: number of samples to collect | |
Returns: | |
list of dicts containing statistics for each kernel stat | |
- avg: the average (mean) delta per second (NOTE: not per interval!) | |
- min: the smallest non-zero delta | |
- max: the numerically largest delta | |
- all: list of all delta values (for testing) | |
""" | |
min_delta = collections.defaultdict(int) | |
max_delta = collections.defaultdict(int) | |
all_deltas = collections.defaultdict(list) | |
# Grab the first sample outside the loop so that the min/max stats | |
# have a valid initial value to compare to. Otherwise, we end up | |
# with a drastically skewed max delta from the first value. | |
logging.info('sample #1 of %i', count) | |
old_vals = get_raw_stats(filename) | |
time.sleep(interval) | |
for i in xrange(1, count): | |
logging.info('sample #%i of %i', i+1, count) | |
values = get_raw_stats(filename) | |
for stat in stats: | |
if stat not in values: | |
logging.warn('%s not found in sample!', stat) | |
continue | |
delta = values[stat] - old_vals[stat] | |
logging.debug( | |
'Delta for %s is %i (%i - %i)', | |
stat, delta, values[stat], old_vals[stat]) | |
if delta > max_delta[stat]: | |
logging.debug('%s: New max delta (%i)', stat, delta) | |
max_delta[stat] = delta | |
if not min_delta[stat] or delta < min_delta[stat]: | |
logging.debug('%s: New min delta (%i)', stat, delta) | |
min_delta[stat] = delta | |
all_deltas[stat].append(delta) | |
old_vals[stat] = values[stat] | |
time.sleep(interval) | |
out = {} | |
for stat in stats: | |
out[stat] = { | |
# Average is per second, not per interval. We're assuming here | |
# that all stats are integers, so we don't need float division. | |
'avg': sum(all_deltas[stat]) / ((count-1) * interval), | |
'max': max_delta[stat], | |
'min': min_delta[stat], | |
'all': sorted(all_deltas[stat]), | |
} | |
return out | |
def main(argv): | |
"""Main Entry Point.""" | |
try: | |
argv = FLAGS(argv) | |
except gflags.FlagsError as ex: | |
print >>sys.stderr, '%s\n\nUsage: %s\n%s' % ( | |
ex, sys.argv[0], FLAGS) | |
sys.exit(1) | |
logging.basicConfig( | |
format='%(asctime)-15s %(levelname)-8s %(message)s', | |
level=logging.DEBUG if FLAGS.debug else logging.INFO) | |
stats = FLAGS.stats.split(',') | |
samples = sample(FLAGS.netstat_file, stats, FLAGS.interval, FLAGS.count) | |
# We deliberately don't use samples.iteritems() here, because we want the | |
# display order to be the same as was specified in FLAGS.stats. Another | |
# option would be to use an OrderedDict as the return value of sample() | |
for stat in stats: | |
vals = samples[stat] | |
logging.debug('%s: %s', stat, vals) | |
prefix = stat + ' ' if FLAGS.names else '' | |
print "%s%i %i %i" % ( | |
prefix, vals['min'], vals['max'], vals['avg']) | |
if __name__ == '__main__': | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment