Created
February 11, 2010 16:00
-
-
Save aflaxman/301647 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from networkx import Graph | |
from pylab import * | |
class MsgData: | |
def __init__(self, fname='haiti_sms.csv', | |
start_date='JAN 12', | |
end_date='FEB 11'): | |
self.start_date = start_date | |
self.end_date = end_date | |
self.categories = {} | |
self.data = Graph() | |
f = open(fname) | |
f.readline() | |
for d in csv.DictReader(f): | |
d_id = int(d.pop('#')) | |
for c in d['CATEGORY'].split(','): | |
if c in [' ', '']: | |
continue | |
c_code, c_str = c.split('.') | |
c_code = c_code.strip() | |
c_str = c_str.strip() | |
self.data.add_node(d_id, d) | |
self.data.add_edge(c_code, d_id) | |
self.categories[c_code] = c_str | |
def streams(self): | |
streams = [] | |
for c in sorted(self.categories): | |
times_c = [] | |
for n in self.data[c]: | |
times_c.append(datestr2num(self.data.node[n]['INCIDENT DATE'])) | |
stream_c, bins_c = histogram(times_c, arange(datestr2num('%s 2010' % self.start_date), | |
datestr2num('%s 2010' % self.end_date), .01)) | |
streams.append(stream_c) | |
return bins_c, streams | |
def txt_col(col): | |
return (.5*col[0], .5*col[1], .5*col[2]) | |
def plot_report_streams(D, cmap=cm.spectral): | |
t, streams = D.streams() | |
n = float(len(streams)) | |
x = t.copy() | |
k = normpdf(arange(-10,10,.1), 0, 4) | |
streams = [np.convolve(s_c, k) for s_c in streams] | |
for i in range(len(streams)): | |
plot_date(x, i+streams[i][99:-99], '-', | |
linewidth=3, | |
alpha=1, color=cmap(i/n), zorder=.5-i) | |
fill(hstack((x, x[::-1])), | |
hstack((i+streams[i][99:-99], (i+zeros(len(x)))[::-1])), | |
linewidth=2, | |
color=cmap(i/n), alpha=.25, zorder=-i) | |
text(x[0], i+.4, | |
sorted(D.categories.items())[i][1].split('| ')[-1], | |
color=txt_col(cmap(i/n)), va='top', ha='right', fontsize=8, rotation=30) | |
text(x[-1]+.01, i, | |
sorted(D.categories.items())[i][1].split('| ')[-1], | |
color=txt_col(cmap(i/n)), va='bottom', ha='left', fontsize=8, rotation=30) | |
x -= .1 | |
yticks([]) | |
xticks(.5+arange(datestr2num(D.start_date), datestr2num(D.end_date)), | |
['%d-%d' % (num2date(i).month, num2date(i).day) | |
for i in arange(datestr2num(D.start_date), datestr2num(D.end_date))], | |
fontsize=8, rotation=75, ha='center') | |
l,r,b,t = axis() | |
axis([l-4.5,r+4.5,b-4,t]) | |
def hist_reports(D): | |
""" Generate histogram of report type frequency""" | |
hist = [[len(D.data[c]), c] for c in D.categories] | |
hist = sorted(hist, reverse=True) | |
left = arange(len(hist)) | |
bar(left, [h for h,c in hist]) | |
xticks(left+.5, | |
[D.categories[c].split('| ')[-1] for h,c in hist], | |
fontsize=8, rotation=45, ha='right') | |
ylabel('Number of Reports') | |
title('Number of Reports by Type') | |
l,r,b,t = axis() | |
axis([l-.5, len(hist)+.5, b, t]) | |
if __name__ == 'main': | |
D = MsgData() | |
plot_report_streams(D) | |
savefig('reports.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment