-
-
Save Jorge-C/3613794 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Cool histograms from your gmail account. | |
Remember to turn on IMAP, and if you're using two-steps verification | |
(you should) you'll have to request a password just for this (security | |
"tab" of your account configuration). | |
""" | |
from collections import Counter | |
from itertools import izip | |
import imaplib | |
import getpass | |
import matplotlib.pyplot as plt | |
from email.utils import mktime_tz, parsedate_tz | |
from datetime import datetime | |
import pytz | |
def _plotby_helper(iterable, xlim=None, width=0.7): | |
x, y = izip(*Counter(iterable).iteritems()) | |
_plot_helper(x, y, xlim, width) | |
def _plot_helper(x, y, xlim=None, width=20): | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
if xlim is not None: | |
ax.set_xlim(xlim) | |
ax.bar(x, y, width=width) | |
fig.autofmt_xdate() | |
plt.show() | |
class EmailStats(object): | |
def __init__(self, mailboxname, email, timezone="Europe/Madrid"): | |
"""timezone is the timezone you're usually in, so that the | |
messages will appear at your right local time""" | |
self.mailboxname = mailboxname | |
self.email = email | |
self.dtimes = None | |
self.data = None | |
self.imap = None | |
self._connect() | |
self._fetch_dates() | |
self._disconnect() | |
self.parse_dates(timezone) | |
def _connect(self): | |
self.imap = imaplib.IMAP4_SSL("imap.gmail.com") | |
password = getpass.getpass("Enter your password: ") | |
self.imap.login(self.email, password) | |
def _disconnect(self): | |
self.imap.logout() | |
def _fetch_dates(self): | |
self.imap.select(mailbox=self.mailboxname, readonly=True) | |
print 'searching...' | |
resp, items = self.imap.search(None, 'All') | |
email_ids = ','.join(items[0].split()) | |
email_id_list = [int(ID) for ID in items[0].split()] | |
print len(email_id_list), 'messages in ', self.mailboxname | |
print 'fetching headers...' | |
resp, self.data = self.imap.fetch(email_ids, | |
"(BODY[HEADER.FIELDS (DATE)])") | |
print str(len(self.data) / 2), 'headers fetched from', self.mailboxname | |
def parse_dates(self, timezone): | |
print 'reformatting dates...' | |
baddates = [self.data[2*i][1][6:] for i in range(len(self.data) // 2)] | |
print 'converting dates...' | |
self.dtimes = [datetime.fromtimestamp( | |
mktime_tz(parsedate_tz(i)), | |
pytz.utc).astimezone(pytz.timezone(timezone)) for i in baddates] | |
def plot_all(self): | |
months, counts = izip(*((datetime(*(k + (1,))), v) for | |
k, v in Counter((i.year, i.month) for | |
i in self.dtimes).iteritems())) | |
_plot_helper(months, counts) | |
def plotby_hours(self): | |
_plotby_helper((i.hour for i in self.dtimes), [0, 24]) | |
def plotby_minutes(self): | |
_plotby_helper((i.minute for i in self.dtimes), [0, 60]) | |
def plotby_weekday(self): | |
_plotby_helper((i.weekday() for i in self.dtimes), [0, 7]) | |
if __name__ == '__main__': | |
# Or "[Gmail]/Sent" or labels "my_label" or... | |
mailbox_name = '[Gmail]/All' | |
email = '<your-email>@gmail.com' | |
st = EmailStats(mailbox_name, email) | |
st.plot_all() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment