Created
April 17, 2024 10:52
-
-
Save NiceRath/5e4606be0ff8c96bcbfe4740f1c3e3bf to your computer and use it in GitHub Desktop.
Script to analyze Google Workspace/Gmail Mailboxes (Mailing Lists, Top Senders)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eo pipefail | |
# NOTES: | |
# to use on backup files created by Google Takeout: https://support.google.com/accounts/answer/3024190?hl=en | |
# creates lists of top N mail senders & distribution-lists @ /tmp | |
# can be used to create Google Vault retentions to clean-up old mails or spam: https://support.google.com/vault/answer/2990828?hl=en | |
if [-z "$1" ] | |
then | |
# if no path to the takeout is provided - we assue its in the current directory | |
cd "${dirname[$0]}" | |
else | |
cd "$1" | |
fi | |
user_mbox_prefix='All' # may differ.. | |
for user in $(ls | grep '@') | |
do | |
mboxs="$(ls "./${user}/${user_mbox_prefix}"*.mbox 2> /dev/null)" | |
if [[ -n "$mboxs" ]] | |
then | |
# mails to/from mailing lists | |
echo "$(date '+%Y/%m/%d %H:%M:%S') PROCESSING ${user} (Mailing-Lists)" | |
if ! [ -f "/tmp/${user}.mlists" ] | |
then | |
LC_ALL=C fgrep "Mailing-list" "./${user}/${user_mbox_prefix}"*.mbox | cut -d ' ' -f3 > "/tmp/${user}.mlists" | |
fi | |
if ! [ -f "/tmp/${user}.cntmlists" ] | |
then | |
cat "/tmp/${user}.mlists" | sort | uniq -c | sort -nr > "/tmp/${user}.cntmlists" | |
fi | |
# mail senders | |
echo "$(date '+%Y/%m/%d %H:%M:%S') PROCESSING ${user} (From)" | |
if ! [ -f "/tmp/${user}.mfrom" ] | |
then | |
LC_ALL=C grep "^From:" "./${user}/${user_mbox_prefix}"*.mbox > "/tmp/${user}.mfrom" | |
fi | |
if ! [ -f "/tmp/${user}.cntmfrom" ] || ! [ -f "/tmp/${user}.mfromclean" ] | |
then | |
# basic mails (no pretty display name) | |
cat "/tmp/${user}.mfrom" | grep -Ev "UTF-8|iso-8859|=$" | grep -v "<" | cut -d ' ' -f2 | grep -Ev '=|"' | grep '@' > "/tmp/${user}.mfromclean" | |
# get raw mail-address if pretty-name is present | |
cat "/tmp/${user}.mfrom" | grep -Ev "UTF-8|iso-8859" | grep '<' | sed -r 's/.*<(.*)>.*/\1/p' | grep -Ev '\s|=|"' | grep '@' >> "/tmp/${user}.mfromclean" | |
cat "/tmp/${user}.mfromclean" | sort | uniq -c | sort -nr | head -n 100 > "/tmp/${user}.cntmfrom" | |
fi | |
fi | |
done | |
echo "FULL COUNT" | |
echo "Mailing Lists" | |
cat /tmp/*.mlists | sort | uniq -c | sort -nr > /tmp/full.cntlists | |
echo "Mailing From" | |
cat /tmp/*.mfromclean | sort | uniq -c | sort -nr | head -n 10000 > /tmp/full.cntfrom |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment