Skip to content

Instantly share code, notes, and snippets.

@strikaco
Created November 29, 2017 04:41
Show Gist options
  • Save strikaco/ab58fb774750b28104bfe278a9dd5412 to your computer and use it in GitHub Desktop.
Save strikaco/ab58fb774750b28104bfe278a9dd5412 to your computer and use it in GitHub Desktop.
ML Demo
from django.test import TestCase
from pandas import DataFrame
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
import numpy
import os
# Create your tests here.
class LearnTest(TestCase):
def test_learn(self):
data = tuple(os.walk('common/enron3/'))
dirs = tuple(data[x] for x in range(1,3))
rows = []
index = []
for dir in dirs:
classifier = dir[0].split('/')[-1]
print(classifier)
for f in dir[2]:
with open(os.path.join(dir[0], f), 'r', encoding='latin-1') as fil:
rows.append({'text': fil.read(), 'class': classifier})
index.append(f)
df = DataFrame(rows, index=index)
df = df.reindex(numpy.random.permutation(df.index))
pipeline = Pipeline([
('count_vectorizer', CountVectorizer()),
('tfidf_transformer', TfidfTransformer()),
('classifier', MultinomialNB())
])
examples = ['Hello dear sir have you heard the news', 'Free Viagra call today!', "I'm going to attend the Linux users group tomorrow."]
pipeline.fit(df['text'].values, df['class'].values)
print(pipeline.predict(examples))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment