aflaxman · February 11, 2010 16:00
diff --git a/gistfile2.txt b/gistfile2.txt
diff --git a/plot_ushahidi_streams.py b/plot_ushahidi_streams.py
 import csv
 from networkx import Graph
 from pylab import *

 class MsgData:
    def __init__(self, fname='haiti_sms.csv',
                 start_date='JAN 12',
                 end_date='FEB 11'):

        self.start_date = start_date
        self.end_date = end_date
        
        self.categories = {}
        self.data = Graph()

        f = open(fname)
        f.readline()
        for d in csv.DictReader(f):
            d_id = int(d.pop('#'))
            for c in d['CATEGORY'].split(','):
                if c in [' ', '']:
                    continue

                c_code, c_str = c.split('.')

                c_code = c_code.strip()
                c_str = c_str.strip()
                
                self.data.add_node(d_id, d)
                self.data.add_edge(c_code, d_id)

                self.categories[c_code] = c_str

    def streams(self):
        streams = []
        for c in sorted(self.categories):
            times_c = []
            for n in self.data[c]:
                times_c.append(datestr2num(self.data.node[n]['INCIDENT DATE']))
            stream_c, bins_c = histogram(times_c, arange(datestr2num('%s 2010' % self.start_date),
                                                         datestr2num('%s 2010' % self.end_date), .01))
            streams.append(stream_c)
        return bins_c, streams

 def txt_col(col):
    return (.5*col[0], .5*col[1], .5*col[2])

 def plot_report_streams(D, cmap=cm.spectral):
    t, streams = D.streams()
    n = float(len(streams))
    x = t.copy()
    
    k = normpdf(arange(-10,10,.1), 0, 4)
    streams = [np.convolve(s_c, k) for s_c in streams]

    for i in range(len(streams)):
        plot_date(x, i+streams[i][99:-99], '-',
                  linewidth=3,
                  alpha=1, color=cmap(i/n), zorder=.5-i)
        fill(hstack((x, x[::-1])),
             hstack((i+streams[i][99:-99], (i+zeros(len(x)))[::-1])),
             linewidth=2,
             color=cmap(i/n), alpha=.25, zorder=-i)
        text(x[0], i+.4,
             sorted(D.categories.items())[i][1].split('| ')[-1],
             color=txt_col(cmap(i/n)), va='top', ha='right', fontsize=8, rotation=30)
        text(x[-1]+.01, i,
             sorted(D.categories.items())[i][1].split('| ')[-1],
             color=txt_col(cmap(i/n)), va='bottom', ha='left', fontsize=8, rotation=30)
        x -= .1
        
    yticks([])
    xticks(.5+arange(datestr2num(D.start_date), datestr2num(D.end_date)),
            ['%d-%d' % (num2date(i).month, num2date(i).day)
             for i in arange(datestr2num(D.start_date), datestr2num(D.end_date))],
            fontsize=8, rotation=75, ha='center')
    l,r,b,t = axis()
    axis([l-4.5,r+4.5,b-4,t])
    
 def hist_reports(D):
    """ Generate histogram of report type frequency"""

    hist = [[len(D.data[c]), c] for c in D.categories]
    hist = sorted(hist, reverse=True)

    left = arange(len(hist))
    bar(left, [h for h,c in hist])
    xticks(left+.5,
           [D.categories[c].split('| ')[-1] for h,c in hist],
           fontsize=8, rotation=45, ha='right')
    ylabel('Number of Reports')
    title('Number of Reports by Type')
    l,r,b,t = axis()
    axis([l-.5, len(hist)+.5, b, t])

 if __name__ == 'main':
    D = MsgData()
    plot_report_streams(D)
    savefig('reports.png')
	import csv
	from networkx import Graph
	from pylab import *

	class MsgData:
	def __init__(self, fname='haiti_sms.csv',
	start_date='JAN 12',
	end_date='FEB 11'):

	self.start_date = start_date
	self.end_date = end_date

	self.categories = {}
	self.data = Graph()

	f = open(fname)
	f.readline()
	for d in csv.DictReader(f):
	d_id = int(d.pop('#'))
	for c in d['CATEGORY'].split(','):
	if c in [' ', '']:
	continue

	c_code, c_str = c.split('.')

	c_code = c_code.strip()
	c_str = c_str.strip()

	self.data.add_node(d_id, d)
	self.data.add_edge(c_code, d_id)

	self.categories[c_code] = c_str

	def streams(self):
	streams = []
	for c in sorted(self.categories):
	times_c = []
	for n in self.data[c]:
	times_c.append(datestr2num(self.data.node[n]['INCIDENT DATE']))
	stream_c, bins_c = histogram(times_c, arange(datestr2num('%s 2010' % self.start_date),
	datestr2num('%s 2010' % self.end_date), .01))
	streams.append(stream_c)
	return bins_c, streams

	def txt_col(col):
	return (.5col[0], .5col[1], .5*col[2])

	def plot_report_streams(D, cmap=cm.spectral):
	t, streams = D.streams()
	n = float(len(streams))
	x = t.copy()

	k = normpdf(arange(-10,10,.1), 0, 4)
	streams = [np.convolve(s_c, k) for s_c in streams]

	for i in range(len(streams)):
	plot_date(x, i+streams[i][99:-99], '-',
	linewidth=3,
	alpha=1, color=cmap(i/n), zorder=.5-i)
	fill(hstack((x, x[::-1])),
	hstack((i+streams[i][99:-99], (i+zeros(len(x)))[::-1])),
	linewidth=2,
	color=cmap(i/n), alpha=.25, zorder=-i)
	text(x[0], i+.4,
	sorted(D.categories.items())[i][1].split('\| ')[-1],
	color=txt_col(cmap(i/n)), va='top', ha='right', fontsize=8, rotation=30)
	text(x[-1]+.01, i,
	sorted(D.categories.items())[i][1].split('\| ')[-1],
	color=txt_col(cmap(i/n)), va='bottom', ha='left', fontsize=8, rotation=30)
	x -= .1

	yticks([])
	xticks(.5+arange(datestr2num(D.start_date), datestr2num(D.end_date)),
	['%d-%d' % (num2date(i).month, num2date(i).day)
	for i in arange(datestr2num(D.start_date), datestr2num(D.end_date))],
	fontsize=8, rotation=75, ha='center')
	l,r,b,t = axis()
	axis([l-4.5,r+4.5,b-4,t])

	def hist_reports(D):
	""" Generate histogram of report type frequency"""

	hist = [[len(D.data[c]), c] for c in D.categories]
	hist = sorted(hist, reverse=True)

	left = arange(len(hist))
	bar(left, [h for h,c in hist])
	xticks(left+.5,
	[D.categories[c].split('\| ')[-1] for h,c in hist],
	fontsize=8, rotation=45, ha='right')
	ylabel('Number of Reports')
	title('Number of Reports by Type')
	l,r,b,t = axis()
	axis([l-.5, len(hist)+.5, b, t])

	if __name__ == 'main':
	D = MsgData()
	plot_report_streams(D)
	savefig('reports.png')