Skip to content

Instantly share code, notes, and snippets.

@ravikiranj
Created June 2, 2012 19:20
Show Gist options
  • Select an option

  • Save ravikiranj/2859639 to your computer and use it in GitHub Desktop.

Select an option

Save ravikiranj/2859639 to your computer and use it in GitHub Desktop.
SVM Feature Vector extraction and Training the classifier
def getSVMFeatureVectorAndLabels(tweets, featureList):
sortedFeatures = sorted(featureList)
map = {}
feature_vector = []
labels = []
for t in tweets:
label = 0
map = {}
#Initialize empty map
for w in sortedFeatures:
map[w] = 0
tweet_words = t[0]
tweet_opinion = t[1]
#Fill the map
for word in tweet_words:
#process the word (remove repetitions and punctuations)
word = replaceTwoOrMore(word)
word = word.strip('\'"?,.')
#set map[word] to 1 if word exists
if word in map:
map[word] = 1
#end for loop
values = map.values()
feature_vector.append(values)
if(tweet_opinion == 'positive'):
label = 0
elif(tweet_opinion == 'negative'):
label = 1
elif(tweet_opinion == 'neutral'):
label = 2
labels.append(label)
#return the list of feature_vector and labels
return {'feature_vector' : feature_vector, 'labels': labels}
#end
#Train the classifier
result = getSVMFeatureVectorandLabels(tweets, featureList)
problem = svm_problem(result['labels'], result['feature_vector'])
#'-q' option suppress console output
param = svm_parameter('-q')
param.kernel_type = LINEAR
classifier = svm_train(problem, param)
svm_save_model(classifierDumpFile, classifier)
#Test the classifier
test_feature_vector = getSVMFeatureVector(test_tweets, featureList)
#p_labels contains the final labeling result
p_labels, p_accs, p_vals = svm_predict([0] * len(test_feature_vector),test_feature_vector, classifier)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment