Skip to content

Instantly share code, notes, and snippets.

@shkesar
Created May 2, 2016 06:19
Show Gist options
  • Save shkesar/be9009d83b82548edd16cd5a7d300457 to your computer and use it in GitHub Desktop.
Save shkesar/be9009d83b82548edd16cd5a7d300457 to your computer and use it in GitHub Desktop.
NLP in python to test sentiments in mailbox
import nltk
nltk.download('stopwords')
echo "Installing dependencies"
brew install python
pip install ipython
pip install pandas
pip install nltk
echo "\n\nConfiguring ..."
ipython --quiet config.py
ipython sent_analysis.py
from mailbox import mbox
import pandas as pd
def store_content(message, body=None):
if not body:
body = message.get_payload(decode=True)
if len(message):
contents = {
"subject": message['subject'] or "",
"body": body,
"from": message['from'],
"to": message['to'],
"date": message['date'],
"labels": message['X-Gmail-Labels'],
"epilogue": message.epilogue,
}
return df.append(contents, ignore_index=True)
# Create an empty DataFrame with the relevant columns
df = pd.DataFrame(
columns=("subject", "body", "from", "to", "date", "labels", "epilogue"))
# Import your downloaded mbox file
box = mbox('~/Desktop/Sent.mbox/mbox')
fails = []
for message in box:
try:
if message.get_content_type() == 'text/plain':
df = store_content(message)
elif message.is_multipart():
# plaintext from multipart messages
for part in message.get_payload():
if part.get_content_type() == 'text/plain':
df = store_content(message, part.get_payload(decode=True))
break
except:
fails.append(message)
from nltk.corpus import stopwords
from collections import Counter
subject_word_bag = df.subject.apply(lambda t: t.lower() + " ").sum()
stops = [unicode(word) for word in stopwords.words('english')] + ['re:', 'fwd:', '-']
subject_words = [word for word in subject_word_bag.split() if word.lower() not in stops]
from textblob import TextBlob
df['feels'] = df.subject.apply(
lambda s: TextBlob(unicode(s, errors='ignore')).sentiment.polarity)
sum = 0
for score in df['feels']:
sum += score
print "Total mails - " + str(len(df['subject']) - 1)
print sum
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment