Created
June 2, 2012 19:20
-
-
Save ravikiranj/2859639 to your computer and use it in GitHub Desktop.
SVM Feature Vector extraction and Training the classifier
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def getSVMFeatureVectorAndLabels(tweets, featureList): | |
| sortedFeatures = sorted(featureList) | |
| map = {} | |
| feature_vector = [] | |
| labels = [] | |
| for t in tweets: | |
| label = 0 | |
| map = {} | |
| #Initialize empty map | |
| for w in sortedFeatures: | |
| map[w] = 0 | |
| tweet_words = t[0] | |
| tweet_opinion = t[1] | |
| #Fill the map | |
| for word in tweet_words: | |
| #process the word (remove repetitions and punctuations) | |
| word = replaceTwoOrMore(word) | |
| word = word.strip('\'"?,.') | |
| #set map[word] to 1 if word exists | |
| if word in map: | |
| map[word] = 1 | |
| #end for loop | |
| values = map.values() | |
| feature_vector.append(values) | |
| if(tweet_opinion == 'positive'): | |
| label = 0 | |
| elif(tweet_opinion == 'negative'): | |
| label = 1 | |
| elif(tweet_opinion == 'neutral'): | |
| label = 2 | |
| labels.append(label) | |
| #return the list of feature_vector and labels | |
| return {'feature_vector' : feature_vector, 'labels': labels} | |
| #end | |
| #Train the classifier | |
| result = getSVMFeatureVectorandLabels(tweets, featureList) | |
| problem = svm_problem(result['labels'], result['feature_vector']) | |
| #'-q' option suppress console output | |
| param = svm_parameter('-q') | |
| param.kernel_type = LINEAR | |
| classifier = svm_train(problem, param) | |
| svm_save_model(classifierDumpFile, classifier) | |
| #Test the classifier | |
| test_feature_vector = getSVMFeatureVector(test_tweets, featureList) | |
| #p_labels contains the final labeling result | |
| p_labels, p_accs, p_vals = svm_predict([0] * len(test_feature_vector),test_feature_vector, classifier) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment