shkesar · May 2, 2016 06:19
diff --git a/config.py b/config.py

 import nltk
 nltk.download('stopwords')
diff --git a/run.sh b/run.sh
 echo "Installing dependencies"
 brew install python
 pip install ipython
 pip install pandas
 pip install nltk
 echo "\n\nConfiguring ..."
 ipython --quiet config.py
 ipython sent_analysis.py
diff --git a/sent_analysis.py b/sent_analysis.py
 from mailbox import mbox
 import pandas as pd

 def store_content(message, body=None):
    if not body:
        body = message.get_payload(decode=True)
    if len(message):
        contents = {
            "subject": message['subject'] or "",
            "body": body,
            "from": message['from'],
            "to": message['to'],
            "date": message['date'],
            "labels": message['X-Gmail-Labels'],
            "epilogue": message.epilogue,
        }
        return df.append(contents, ignore_index=True)

 # Create an empty DataFrame with the relevant columns
 df = pd.DataFrame(
    columns=("subject", "body", "from", "to", "date", "labels", "epilogue"))

 # Import your downloaded mbox file
 box = mbox('~/Desktop/Sent.mbox/mbox')

 fails = []
 for message in box:
    try:
        if message.get_content_type() == 'text/plain':
            df = store_content(message)
        elif message.is_multipart():
            # plaintext from multipart messages
            for part in message.get_payload():
                if part.get_content_type() == 'text/plain':
                    df = store_content(message, part.get_payload(decode=True))
                    break
    except:
        fails.append(message)

 from nltk.corpus import stopwords

 from collections import Counter

 subject_word_bag = df.subject.apply(lambda t: t.lower() + " ").sum()

 stops = [unicode(word) for word in stopwords.words('english')] + ['re:', 'fwd:', '-']
 subject_words = [word for word in subject_word_bag.split() if word.lower() not in stops]

 from textblob import TextBlob
 df['feels'] = df.subject.apply(
    lambda s: TextBlob(unicode(s, errors='ignore')).sentiment.polarity)

 sum = 0
 for score in df['feels']:
    sum += score

 print "Total mails - " + str(len(df['subject']) - 1)
 print sum
	echo "Installing dependencies"
	brew install python
	pip install ipython
	pip install pandas
	pip install nltk
	echo "\n\nConfiguring ..."
	ipython --quiet config.py
	ipython sent_analysis.py
	from mailbox import mbox
	import pandas as pd

	def store_content(message, body=None):
	if not body:
	body = message.get_payload(decode=True)
	if len(message):
	contents = {
	"subject": message['subject'] or "",
	"body": body,
	"from": message['from'],
	"to": message['to'],
	"date": message['date'],
	"labels": message['X-Gmail-Labels'],
	"epilogue": message.epilogue,
	}
	return df.append(contents, ignore_index=True)

	# Create an empty DataFrame with the relevant columns
	df = pd.DataFrame(
	columns=("subject", "body", "from", "to", "date", "labels", "epilogue"))

	# Import your downloaded mbox file
	box = mbox('~/Desktop/Sent.mbox/mbox')

	fails = []
	for message in box:
	try:
	if message.get_content_type() == 'text/plain':
	df = store_content(message)
	elif message.is_multipart():
	# plaintext from multipart messages
	for part in message.get_payload():
	if part.get_content_type() == 'text/plain':
	df = store_content(message, part.get_payload(decode=True))
	break
	except:
	fails.append(message)

	from nltk.corpus import stopwords

	from collections import Counter

	subject_word_bag = df.subject.apply(lambda t: t.lower() + " ").sum()

	stops = [unicode(word) for word in stopwords.words('english')] + ['re:', 'fwd:', '-']
	subject_words = [word for word in subject_word_bag.split() if word.lower() not in stops]

	from textblob import TextBlob
	df['feels'] = df.subject.apply(
	lambda s: TextBlob(unicode(s, errors='ignore')).sentiment.polarity)

	sum = 0
	for score in df['feels']:
	sum += score

	print "Total mails - " + str(len(df['subject']) - 1)
	print sum