This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# ~*~ utf-8 ~*~ | |
# About: Extension of gmail_mbox_parser.py to parse GMail's Google Takeout exports in .mbox format and do clustering analysis of senders so you can quickly triage which kind of information sources might no longer be relevant. It also adds command line arguments and exports the sender's statistics in a .csv file. | |
# Based on https://gist.github.com/benwattsjones/060ad83efd2b3afc8b229d41f9b246c4 but expanded to add command line arguments, do clustering of senders and export a .csv with those statistics. | |
import re | |
import argparse | |
import mailbox | |
from collections import Counter |