Created
November 10, 2019 10:18
-
-
Save tirkarthi/3a5f968d9442d62f12c0f43667caa314 to your computer and use it in GitHub Desktop.
A script to generate a plot of emails sent by me by hour for CPython to python-bugs mailing list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import calendar | |
from collections import Counter | |
import datetime | |
import gzip | |
import os | |
from urllib.request import urlretrieve | |
from urllib.error import HTTPError | |
import matplotlib.pylab as plt | |
BASE_URL = "https://mail.python.org/pipermail/python-bugs-list/{year}-{month}.txt.gz" | |
FILENAME_FORMAT = "{year}-{month}.txt.gz" | |
months = calendar.month_name | |
def process_file(filename): | |
with gzip.open(filename, "rt") as f: | |
lines = list(f) | |
hours = [] | |
for no, line in enumerate(lines): | |
if "From: report at bugs.python.org (Karthikeyan Singaravelan)" in line: | |
timestamp = lines[no + 1].replace("Date: ", "").strip() | |
time = datetime.datetime.strptime( | |
timestamp, "%a, %d %b %Y %H:%M:%S +0000" | |
) + datetime.timedelta(hours=5, minutes=30) | |
hour = time.hour | |
weekend = time.weekday() > 5 | |
if hour in range(8) or hour in range(18, 25) or weekend: | |
hours.append(hour) | |
return hours | |
def plot(emails_per_hour): | |
plt.bar(range(len(emails_per_hour.keys())), | |
sorted(emails_per_hour.values()), | |
tick_label=sorted(emails_per_hour.keys(), key=lambda x: emails_per_hour[x])) | |
plt.show() | |
def process_mailing_list(): | |
hours = [] | |
for year in (2018, 2019): | |
for month in months[1:]: | |
month = month.capitalize() | |
url = BASE_URL.format(year=year, month=month) | |
filename = FILENAME_FORMAT.format(year=year, month=month) | |
try: | |
if not os.path.exists(filename): | |
urlretrieve(url, filename) | |
else: | |
hours.extend(process_file(filename)) | |
except HTTPError: | |
pass | |
return Counter(hours) | |
def main(): | |
mails_per_hour = process_mailing_list() | |
plot(mails_per_hour) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment