Skip to content

Instantly share code, notes, and snippets.

View ravikiranj's full-sized avatar

Ravikiran Janardhana ravikiranj

View GitHub Profile
@ravikiranj
ravikiranj / tw-18.py
Created June 2, 2012 19:25
Sample Feature Vector for SVM
Sentence = AT_USER i heard about that contest! congrats girl!!
Feature Vector
==============
hey',.....'heard','congrats', .... 'bombs', 'strange', 'australian', 'women', 'drink', 'head', 'hurts', 'bloodwork'
0 1 1 0 0 0 0 0 0 0 0
@ravikiranj
ravikiranj / tw-17.py
Created June 2, 2012 19:20
SVM Feature Vector extraction and Training the classifier
def getSVMFeatureVectorAndLabels(tweets, featureList):
sortedFeatures = sorted(featureList)
map = {}
feature_vector = []
labels = []
for t in tweets:
label = 0
map = {}
#Initialize empty map
for w in sortedFeatures:
@ravikiranj
ravikiranj / tw-16.py
Created May 9, 2012 04:16
Simple SVM Demo output
.*
optimization finished, #iter = 5
nu = 0.176245
obj = -2.643822, rho = 0.164343
nSV = 3, nBSV = 0
*
optimization finished, #iter = 1
nu = 0.254149
obj = -2.541494, rho = 0.000000
nSV = 2, nBSV = 0
@ravikiranj
ravikiranj / tw-15.py
Last active October 4, 2015 13:07
Simple SVM Demo
import svm
from svmutil import *
#training data
labels = [0, 1, 1, 2]
samples = [[0, 1, 0], [1, 1, 1], [1, 1, 0], [0, 0, 0]]
#SVM params
param = svm_parameter()
param.C = 10
@ravikiranj
ravikiranj / tw-14.py
Created May 9, 2012 04:02
Maximum Entropy Classifier
#Max Entropy Classifier
MaxEntClassifier = nltk.classify.maxent.MaxentClassifier.train(training_set, 'GIS', trace=3, \
encoding=None, labels=None, sparse=True, gaussian_prior_sigma=0, max_iter = 10)
testTweet = 'Congrats @ravikiranj, i heard you wrote a new tech post on sentiment analysis'
processedTestTweet = processTweet(testTweet)
print MaxEntClassifier.classify(extract_features(getFeatureVector(processedTestTweet)))
Output
=======
positive
@ravikiranj
ravikiranj / tw-13.py
Created May 9, 2012 03:47
Bad Classification
testTweet = 'I am so badly hurt'
processedTestTweet = processTweet(testTweet)
print NBClassifier.classify(extract_features(getFeatureVector(processedTestTweet)))
Output
======
positive
@ravikiranj
ravikiranj / tw-12.py
Created May 9, 2012 03:36
Most Informative Features
# print informative features about the classifier
print NBClassifier.show_most_informative_features(10)
Output
======
Most Informative Features
contains(twitter) = False positi : neutra = 2.3 : 1.0
contains(car) = False positi : negati = 2.3 : 1.0
contains(hurts) = False positi : negati = 2.3 : 1.0
contains(articles) = False positi : neutra = 1.4 : 1.0
@ravikiranj
ravikiranj / tw-11.py
Created May 9, 2012 03:33
Instantiate Classifier and Classify Tweet
# Train the classifier
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
# Test the classifier
testTweet = 'Congrats @ravikiranj, i heard you wrote a new tech post on sentiment analysis'
processedTestTweet = processTweet(testTweet)
print NBClassifier.classify(extract_features(getFeatureVector(processedTestTweet)))
Output
======
@ravikiranj
ravikiranj / tw-10.py
Last active October 4, 2015 13:07
Bulk feature extraction
#Read the tweets one by one and process it
inpTweets = csv.reader(open('data/sampleTweets.csv', 'rb'), delimiter=',', quotechar='|')
stopWords = getStopWordList('data/feature_list/stopwords.txt')
featureList = []
# Get tweet words
tweets = []
for row in inpTweets:
sentiment = row[0]
tweet = row[1]
@ravikiranj
ravikiranj / tw-8.py
Created May 8, 2012 21:46
Extract Features Method
#get feature list stored in a file (for reuse)
featureList = getFeatureList('data/sampleTweetFeatureList.txt')
#start extract_features
def extract_features(tweet):
tweet_words = set(tweet)
features = {}
for word in featureList:
features['contains(%s)' % word] = (word in tweet_words)
return features