Skip to content

Instantly share code, notes, and snippets.

@byronyi
Created May 11, 2015 07:37
Show Gist options
  • Select an option

  • Save byronyi/f2cba140972bba72fcaf to your computer and use it in GitHub Desktop.

Select an option

Save byronyi/f2cba140972bba72fcaf to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import division, print_function, unicode_literals
import os
from datetime import datetime
from tempfile import mkdtemp
import joblib
import tempfile
from joblib import Memory
from joblib import Parallel, delayed
from joblib.pool import has_shareable_memory
import numpy as np
import scipy.sparse as sp
from matplotlib import pyplot as plt
import pandas as pd
# Cache result for fast IO
cachedir = tempfile.mkdtemp()
memory = Memory(cachedir=cachedir, verbose=0)
@memory.cache()
def get_data(path):
df = pd.read_csv(path, delim_whitespace=True, index_col='Start',
date_parser=lambda t: datetime.strptime(t[:len('00:00:00')], '%H:%M:%S'))
dname = path.split(os.sep)[-1].split('.')[0]
for col in ['SrcIPaddress', 'SrcP', 'DstIPaddress', 'DstP']:
df[col] = pd.Categorical(df[col])
return df, dname
def plot_traffic_distribution_over_last_octet(path):
df, dname = get_data(path)
last_octet = df['SrcIPaddress'].astype(str).map(lambda x: x.split('.')[-1])
ts = df.set_index(last_octet.astype(int)).groupby(level=0).size()
plt.figure(figsize=(12, 4))
ts.plot()
plt.xlabel(dname)
plt.savefig('{}-last_octet.eps'.format(dname), format='eps', dpi=1000)
def plot_all_trace_traffic_over_time(trace_files):
f, axes = plt.subplots(len(trace_files), sharey=True,
figsize=(20, 2*len(trace_files)))
for ax, (i, path) in zip(axes, enumerate(trace_files)):
df, dname = get_data(path)
df_series = df.groupby(level=0, sort=False)['Octets'].sum()
ax.plot(df_series.as_matrix() / df_series.max())
ax.set_title('{} normalized traffic vs time'.format(dname))
plt.savefig('all-traf.eps', format='eps', dpi=1000)
def get_avg_pkt_size(path):
df = get_data(path)
pkt_size = df.Octets.mean()
return {
'trace': path.split(os.sep)[-1].split('.')[0],
'pkt_size': pkt_size,
}
def get_port_table(path, ports):
df, dname = get_data(path)
mask = (df['SrcP'].astype(int).map(lambda x: x in ports).as_matrix() |
df['DstP'].astype(int).map(lambda x: x in ports).as_matrix())
port_traffic = df[mask]['Octets'].sum()
total_traffic = df['Octets'].sum()
return {
'trace': dname,
'traffic'.format(app): port_traffic,
'percentage': "{:.2%}".format(port_traffic / total_traffic)
}
def plot_traffic_distribution_over_port(path):
df, dname = get_data(path)
port, traffic = df.groupby('SrcP')['Octets'].sum().reset_index().as_matrix().T
distribution = sp.csr_matrix((traffic, port, [0, port.shape[0]]), shape=(1, 65536)).toarray()[0]
plt.figure(figsize=(16, 3))
# +1 to prevent underflow under log scale
plt.semilogy(distribution[:1000]+1)
plt.title(dname)
plt.xlabel('Src Port Number')
plt.ylabel('Traffic')
plt.savefig('{}-port.eps'.format(dname), format='eps', dpi=1000)
def main():
# Find trace files
trace_files = []
for path, _, files in os.walk('./data'):
for f in files:
if 'trace' in f:
trace_files.append(os.path.join(path, f))
# Task 1
for path in trace_files:
if 'gi' in path:
plot_traffic_distribution_over_last_octet(path)
# Plot trace traffic as time series
plot_all_trace_traffic_over_time(trace_files)
# Average packet size of traces
df = pd.DataFrame([get_avg_pkt_size(path) for path in trace_files]).set_index('trace')
df.to_csv('mean_pkt_size.csv')
# Plot traffic distribution over all well known ports
for path in trace_files:
plot_traffic_distribution_over_port(path)
# Traffic distribution over some applications
app_ports = {
'dns': [53],
'dhcp': [546, 547],
'mail': [25, 110, 465, 587, 995],
'http': [80, 443],
'db': [1433, 1434, 1521, 3306, 5432, 6379, 11211],
'flash': [843],
'rsync': [873],
'ipv6nfs': [973],
}
for app, ports in app_ports.items():
df = pd.DataFrame([get_app_table(path, ports) for path in trace_files]).set_index('trace')
df.columns = ['{}-traffic'.format(app), 'percentage']
df.to_csv('{}-traffic.csv'.format(app))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment