Created
July 8, 2013 15:27
-
-
Save fnielsen/5949814 to your computer and use it in GitHub Desktop.
AFINN scoring on different dimensions http://fnielsen.tumblr.com/post/54918264238/afinn-scoring-on-different-dimensions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# http://rainbowdash.net/notice/2764341 | |
text = "@zeldatra I'm surprised you got my hair spot on though, " + \ | |
"considering how I exaggerate it so much. " + \ | |
"Thanks! Although I dunno why?" | |
import os.path | |
import re | |
FILENAME_AFINN = os.path.expanduser('~') + '/data/AFINN/AFINN-111.txt' | |
# Find the words in the text | |
pattern_word = re.compile(r'[^\W_]+(?:-[\W_]+)?', flags=re.UNICODE) | |
words = pattern_word.findall(text) | |
# Read in the dictionary with valence: | |
# https://gist.github.com/fnielsen/4183541 | |
afinn = dict(map(lambda (w, s): (unicode(w, 'utf-8'), int(s)), [ ws.strip().split('\t') for ws in open(FILENAME_AFINN) ])) | |
valences = map(lambda w: float(afinn.get(w.lower(), 0)), words) | |
arousals = map(abs, valences) | |
positives = map(lambda v: float(v>0) and v, valences) | |
negatives = map(lambda v: abs(float(v<0) and v), valences) | |
valence = sum(valences) | |
arousal = sum(arousals) | |
positive = sum(positives) | |
negative = sum(negatives) | |
ambivalence = arousal - abs(valence) | |
def format_row(header, items, total): | |
print('<tr><th>' + header + ':<td>' + '<td>'.join(map(str, items)) + '<td>' + str(total)) | |
print(""" | |
<style type="text/css"> | |
table { | |
border-width: 1px; | |
border-spacing: 2px; | |
border-style: solid; | |
border-color: black; | |
border-collapse: collapse; | |
background-color: white; | |
} | |
table th { | |
border-width: 1px; | |
padding: 2px; | |
border-style: inset; | |
border-color: gray; | |
background-color: white; | |
} | |
table td { | |
border-width: 1px; | |
padding: 2px; | |
border-style: inset; | |
border-color: gray; | |
background-color: white; | |
} | |
</style> | |
""") | |
if False: | |
print('<table>') # style="border:1px #000 solid; border-collapse: collapse;">') | |
format_row('Words', words, 'Total') | |
format_row('Valences', valences, valence) | |
format_row('Arousal', arousals, arousal) | |
format_row('Positive', positives, positive) | |
format_row('Negative', negatives, negative) | |
format_row('Ambivalence', ['']*len(words), ambivalence) | |
print('</table>') | |
else: | |
print('<table>') # style="border:1px #000 solid; border-collapse: collapse;">') | |
print('<tr><td>' + "<td>".join(['Word', 'Valence', 'Arousal', 'Positive', 'Negative', 'Ambivalence'])) | |
for items in zip(words, valences, arousals, positives, negatives): | |
print('<tr><td>' + '<td>'.join(map(str, items))) | |
print('<tr><td>' + '<td>'.join(map(str, ['Total', valence, arousal, positive, negative, ambivalence]))) | |
print('</table>') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment