joshz · July 19, 2012 20:10
diff --git a/gistfile1.py b/gistfile1.py
 #!/usr/bin/python 
 #
 # wget wget http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6010/zip/imm6010.zip
 # unzip imm6010.zip

 import math
 import re
 import sys
 reload(sys)
 sys.setdefaultencoding('utf-8')

 # AFINN-111 is as of June 2011 the most recent version of AFINN
 filenameAFINN = 'AFINN/AFINN-111.txt'
 afinn = dict(map(lambda (w, s): (w, int(s)), [ 
            ws.strip().split('\t') for ws in open(filenameAFINN) ]))

 # Word splitter pattern
 pattern_split = re.compile(r"\W+")

 def sentiment(text):
    """
    Returns a float for sentiment strength based on the input text.
    Positive values are positive valence, negative value are negative valence. 
    """
    words = pattern_split.split(text.lower())
    sentiments = map(lambda word: afinn.get(word, 0), words)
    if sentiments:
        # How should you weight the individual word sentiments? 
        # You could do N, sqrt(N) or 1 for example. Here I use sqrt(N)
        sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
        
    else:
        sentiment = 0
    return sentiment



 if __name__ == '__main__':
    # Single sentence example:
    text = "Finn is stupid and idiotic"
    print("%6.2f %s" % (sentiment(text), text))
    
    # No negation and booster words handled in this approach
    text = "Finn is only a tiny bit stupid and not idiotic"
    print("%6.2f %s" % (sentiment(text), text))


    # Example with downloading from Twitter:
    import json as simplejson
    import urllib

    query = "groupon"
    jsont = simplejson.load(urllib.urlopen("http://search.twitter.com/search.json?q=" + query))
    #for item in jsont['results'][:1]:
        #for k, v in item.iteritems():
            #print("{:<20} {}".format(k, v))
    sentiments = map(sentiment, [ tweet['text'] for tweet in jsont['results'] ])
    print("%6.2f %s" % (sum(sentiments)/math.sqrt(len(sentiments)), query))
	#!/usr/bin/python
	#
	# wget wget http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6010/zip/imm6010.zip
	# unzip imm6010.zip

	import math
	import re
	import sys
	reload(sys)
	sys.setdefaultencoding('utf-8')

	# AFINN-111 is as of June 2011 the most recent version of AFINN
	filenameAFINN = 'AFINN/AFINN-111.txt'
	afinn = dict(map(lambda (w, s): (w, int(s)), [
	ws.strip().split('\t') for ws in open(filenameAFINN) ]))

	# Word splitter pattern
	pattern_split = re.compile(r"\W+")

	def sentiment(text):
	"""
	Returns a float for sentiment strength based on the input text.
	Positive values are positive valence, negative value are negative valence.
	"""
	words = pattern_split.split(text.lower())
	sentiments = map(lambda word: afinn.get(word, 0), words)
	if sentiments:
	# How should you weight the individual word sentiments?
	# You could do N, sqrt(N) or 1 for example. Here I use sqrt(N)
	sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))

	else:
	sentiment = 0
	return sentiment



	if __name__ == '__main__':
	# Single sentence example:
	text = "Finn is stupid and idiotic"
	print("%6.2f %s" % (sentiment(text), text))

	# No negation and booster words handled in this approach
	text = "Finn is only a tiny bit stupid and not idiotic"
	print("%6.2f %s" % (sentiment(text), text))


	# Example with downloading from Twitter:
	import json as simplejson
	import urllib

	query = "groupon"
	jsont = simplejson.load(urllib.urlopen("http://search.twitter.com/search.json?q=" + query))
	#for item in jsont['results'][:1]:
	#for k, v in item.iteritems():
	#print("{:<20} {}".format(k, v))
	sentiments = map(sentiment, [ tweet['text'] for tweet in jsont['results'] ])
	print("%6.2f %s" % (sum(sentiments)/math.sqrt(len(sentiments)), query))
No results found