Skip to content

Instantly share code, notes, and snippets.

@kopylovvlad
Created November 6, 2017 16:50
Show Gist options
  • Save kopylovvlad/0ef81f868137b3aec57c11bf6e74545d to your computer and use it in GitHub Desktop.
Save kopylovvlad/0ef81f868137b3aec57c11bf6e74545d to your computer and use it in GitHub Desktop.
from sklearn.naive_bayes import *
from sklearn.dummy import *
from sklearn.ensemble import *
from sklearn.neighbors import *
from sklearn.tree import *
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.calibration import *
from sklearn.linear_model import *
from sklearn.multiclass import *
from sklearn.svm import *
import pandas
import csv
data = pandas.read_csv('spam.csv', encoding='latin-1')
train_data = data[:4400] # 4400 items
test_data = data[4400:] # 1172 items
classifier = OneVsRestClassifier(SVC(kernel='linear'))
vectorizer = TfidfVectorizer()
# train
vectorize_text = vectorizer.fit_transform(train_data.v2)
classifier.fit(vectorize_text, train_data.v1)
# score
# vectorize_text = vectorizer.transform(test_data.v2)
# score = classifier.score(vectorize_text, test_data.v1)
# print(score) # 98,8
csv_arr = []
for index, row in test_data.iterrows():
answer = row[0]
text = row[1]
vectorize_text = vectorizer.transform([text])
predict = classifier.predict(vectorize_text)[0]
if predict == answer:
result = 'right'
else:
result = 'wrong'
csv_arr.append([len(csv_arr), text, answer, predict, result])
# write csv
with open('test_score.csv', 'w', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=';',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(['#', 'text', 'answer', 'predict', result])
for row in csv_arr:
spamwriter.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment