Created
May 25, 2016 22:31
-
-
Save githoov/752a593c59ae7695b7ed5fb5b741377c to your computer and use it in GitHub Desktop.
Analysis of Chat Text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# preliminaries | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
from nltk.stem.porter import PorterStemmer | |
from nltk import tokenize, pos_tag | |
import csv | |
import re | |
# read in data | |
comments = csv.reader(open("/Users/scott/Downloads/issue_comments.csv", "rb")) | |
# fire up porter stemmer and sentimment analyzer | |
st = PorterStemmer() | |
sid = SentimentIntensityAnalyzer() | |
def get_mentions(line): | |
return re.findall('@{1}\\S+', line) | |
def tokenize_lines(lines): | |
return tokenize.sent_tokenize(lines) | |
def tokenize_sentence(line): | |
return [tokenize.word_tokenize(sent) for sent in sentences] | |
def stem_text(text): | |
return ' '.join([st.stem(stem) for stem in text.split(' ')]) | |
def get_polarity(line): | |
return sid.polarity_scores(line) | |
' '.join([st.stem(stem) for stem in foo[5].split(' ')]) | |
st.stem(foo[5]) | |
get_mentions(foo[5]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment