Skip to content

Instantly share code, notes, and snippets.

@sobamchan
Created March 22, 2020 01:23
Show Gist options
  • Save sobamchan/09f0e57edba1da8b53991f0ebfd289c3 to your computer and use it in GitHub Desktop.
Save sobamchan/09f0e57edba1da8b53991f0ebfd289c3 to your computer and use it in GitHub Desktop.
import numpy as np
from lineflow import datasets
from sklearn.svm import SVC
import sister
def main():
train = datasets.Imdb("train")
test = datasets.Imdb("test")
train_texts, train_labels = zip(*train.all())
test_texts, test_labels = zip(*test.all())
sentence_embedding = sister.MeanEmbedding("en")
train_x = np.array([sentence_embedding(t) for t in train_texts])
test_x = np.array([sentence_embedding(t) for t in test_texts])
clf = SVC(kernel="linear")
clf.fit(train_x, train_labels)
print(clf.score(test_x, test_labels))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment