Skip to content

Instantly share code, notes, and snippets.

@pib
Forked from andrewsomething/datehist.py
Created July 9, 2012 15:29
Show Gist options
  • Save pib/3077180 to your computer and use it in GitHub Desktop.
Save pib/3077180 to your computer and use it in GitHub Desktop.
Plot a histogram of mailinglist activity
#!/usr/bin/env python
"""
Plot histogram from list of dates
Usage
=====
Point to mbox file.
Ex.1: plot mailinglist activity::
datehist.py -t `ubuntu-motu list` ubuntu-motu.mbox
"""
import sys
import time
import datetime
import mailbox
import email.utils
import numpy
from matplotlib import pyplot
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
from matplotlib.dates import epoch2num, date2num
from optparse import OptionParser
def num_now():
"""
Return the current date in matplotlib representation
"""
return date2num(datetime.datetime.now())
def read_dates(filename):
print filename
dates = []
mbox = mailbox.mbox(filename)
for message in mbox:
date = message['date']
foo = email.utils.parsedate(date)
stamp = time.mktime(foo)
num = epoch2num(stamp)
dates.append(num)
return dates
def plot_datehist(dates, bins, title=None):
(hist, bin_edges) = numpy.histogram(dates, bins)
width = bin_edges[1] - bin_edges[0]
fig = pyplot.figure()
ax = fig.add_subplot(111)
ax.bar(bin_edges[:-1], hist / width, width=width)
ax.set_xlim(bin_edges[0], num_now())
ax.set_ylabel('Events [1/day]')
if title:
ax.set_title(title)
# set x-ticks in date
# see: http://matplotlib.sourceforge.net/examples/api/date_demo.html
ax.xaxis.set_major_locator(YearLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y'))
ax.xaxis.set_minor_locator(MonthLocator())
# format the coords message box
ax.format_xdata = DateFormatter('%Y-%m-%d')
ax.grid(True)
fig.autofmt_xdate()
return fig
def main():
parser = OptionParser(usage='%prog [options] <path/to/mbox>')
parser.add_option("-o", "--out", default=None,
help="output file. open gui if not specified.")
parser.add_option("-b", "--bins", default=50, type=int,
help="number of bins for histogram. (default: 50)")
parser.add_option("-t", "--title")
parser.add_option("-d", "--doc", default=False, action="store_true",
help="print document")
(opts, args) = parser.parse_args()
if len(args) != 1:
parser.error("You must provide the path to an mbox file.")
filename = args[0]
if opts.doc:
print __doc__
return
dates = read_dates(filename)
fig = plot_datehist(dates, opts.bins, title=opts.title)
if opts.out:
fig.savefig(opts.out)
else:
pyplot.show()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment