Skip to content

Instantly share code, notes, and snippets.

@magnusnissel
Last active August 29, 2015 14:25
Show Gist options
  • Select an option

  • Save magnusnissel/1674886e4d0ef9f99d93 to your computer and use it in GitHub Desktop.

Select an option

Save magnusnissel/1674886e4d0ef9f99d93 to your computer and use it in GitHub Desktop.
A simple class to score text with LabMT and pandas based on the code at http://neuro.imm.dtu.dk/wiki/LabMT
import pandas as pd
import re
class LabMTScorer():
""" A simple class to score text with LabMT and pandas,
based on the example provided by Finn Årup Nielsen
at http://neuro.imm.dtu.dk/wiki/LabMT """
def __init__(self, source=None):
if source is None:
source = ('http://www.plosone.org/article/'
'fetchSingleRepresentation.action?'
'uri=info:doi/10.1371/journal.pone.0026752.s001')
labmt_csv = pd.read_csv(source, skiprows=2, sep='\t', index_col=0)
self.avg_happiness = labmt_csv.happiness_average.mean()
self.happiness = (
labmt_csv.happiness_average - self.avg_happiness).to_dict()
def score_tokens(self, tokens):
score = sum([self.happiness.get(tok.lower(), 0.0)
for tok in tokens]) / len(tokens)
return score
def tokenize(self, text):
tokens = re.split(r"[^0-9A-Za-z\-']+", text)
return tokens
def score_text(self, text):
tokens = self.tokenize(text)
score = self.score_tokens(tokens)
return score
def test():
test_texts = ["He was a bad, bad man. Utterly corrupt.",
"She's the best. So awesome and great."]
scorer = LabMTScorer()
for test_text in test_texts:
result = scorer.score_text(test_text)
print(test_text, result)
if __name__ == "__main__":
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment