theo agustinustheo

agustinustheo / bagOfWords.py

Created July 30, 2019 07:01

Creating bag of words for SMS Classifier blog

	# create bag-of-words
	all_words = []

	for message in sms_data:
	words = word_tokenize(message)
	for w in words:
	all_words.append(w)

	all_words = nltk.FreqDist(all_words)

agustinustheo / extractFeatures.py

Last active July 30, 2019 16:24

Extracting features from bag-of-words for SMS Classifier blog..

	encoder = LabelEncoder()
	Y = encoder.fit_transform(classes)

	# Now lets do it for all the messages
	messages = list(zip(sms_data, Y))

agustinustheo / randomFeatures.py

Created July 30, 2019 16:22

Randomizing features to be used as a training set for SMS Classifier blog

	# define a seed for reproducibility
	seed = 1
	np.random.seed = seed
	np.random.shuffle(messages)

	# call find_features function for each SMS message
	featuresets = [(find_features(text), label) for (text, label) in messages]

agustinustheo / determineAlgorithms.py

Last active July 30, 2019 16:35

Determining Algorithms that are going to be used for SMS Classifier blog

	# we can split the featuresets into training and testing datasets using sklearn
	from sklearn import model_selection

	# split the data into training and testing datasets
	training = featuresets

	# Define models to train
	names = ["Logistic Regression"]

	classifiers = [

agustinustheo / trainingClassifiers.py

Created July 30, 2019 16:31

Training the text classifiers for SMS Classifier blog

	for name, model in models:
	nltk_model = SklearnClassifier(model)
	classifier = nltk_model.train(training)
	f = open(name + ' Classifier.pickle', 'wb')
	pickle.dump(classifier, f)
	f.close

agustinustheo / classifyText.py

Created July 30, 2019 16:38

Classify text for SMS Classifier blog

	classifier_s = open("sms_classifier_pickle/" + name + ' Classifier.pickle', "rb")
	sms_classifier = pickle.load(classifier_s)
	classifier_s.close()

	result = sms_classifier.classify(find_features(preproccess_text('ENTER MESSAGE TO CLASSIFY HERE')))

agustinustheo / librariesUsed.py

Created July 30, 2019 16:43

Libraries used in SMS Classifier blog

	import os
	import re
	import sys
	import nltk
	import random
	import pickle
	import numpy as np
	import pandas as pd
	from sklearn.svm import SVC
	from sklearn.naive_bayes import MultinomialNB

agustinustheo / app.py

Created October 26, 2020 20:03

Start off Flask project

agustinustheo / faunadb_entity.py

Created October 26, 2020 21:57

Script to get data from FaunaDB by Index. Usually gets singular data.

	import os
	from faunadb import query as q
	from faunadb.objects import Ref
	from faunadb.client import FaunaClient

	def get(index, data):
	try:
	serverClient = FaunaClient(secret=os.environ.get("FAUNA_SERVER_SECRET"))
	res = serverClient.query(q.get(q.match(q.index(index), data)))
	res["data"]["ref_id"] = res["ref"].id()

agustinustheo / get_multiple.py

Created October 26, 2020 22:04

Function to get multiple documents from FaunaDB

	def get_multiple(index, data=None):
	try:
	serverClient = FaunaClient(secret=os.environ.get("FAUNA_SERVER_SECRET"))
	res_arr = []
	if data is None:
	res = serverClient.query(
	q.map_(
	q.lambda_("data", q.get(q.var("data"))),
	q.paginate(q.match(q.index(index)))
	)