Skip to content

Instantly share code, notes, and snippets.

@felipessalvatore
Created October 26, 2016 02:03
Show Gist options
  • Save felipessalvatore/bdbfb06d773f18bd144bb3877b04bb82 to your computer and use it in GitHub Desktop.
Save felipessalvatore/bdbfb06d773f18bd144bb3877b04bb82 to your computer and use it in GitHub Desktop.
Multinomial logistic classifier applied to the iris dataset
#--------TENSORFLOW-------------
#MULTINOMIAL LOGISTIC CLASSIFIER
#APPLIED TO THE IRIS DATASET
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import scipy as sp
from sklearn.datasets import load_iris
%matplotlib inline
#loading the iris dataset from sklearn
iris = load_iris()
#given a dataset, this function normalize it
def normalize(Xdata):
l=[]
for i in range(len(Xdata.T)):
l.append(np.array([(j - (np.sum(Xdata.T[i]))/len(Xdata.T[i]))/\
(max(Xdata.T[i]) - min(Xdata.T[i])) for j in Xdata.T[i]]))
return np.array(l).T
#fuctions to randomize the dataset
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation]
shuffled_labels = labels[permutation]
print(permutation.shape, len(permutation))
return shuffled_dataset, shuffled_labels
def randomize_in_place(list1,list2, init):
np.random.seed(seed=init)
np.random.shuffle(list1)
np.random.seed(seed=init)
np.random.shuffle(list2)
#give an array target (with numbers) it return an
#array of one-hot-encondigs
def one_hot_encoding(target):
num_labels = len(set(target))
return (np.arange(num_labels) == target[:,None]).astype(np.float32)
#the dataset normalized
all_X = normalize(iris['data'])
#the target of the classification
labels = one_hot_encoding(iris['target'])
#shuffling both X and L
randomize_in_place(all_X,labels,0)
# cada x in X eh um vetor de tamanho 3, minhas respostas l
# sao vetores de tamanho 2, entao minha matrix W eh 3x2.
#dado x_i in X vamos fazer o seguinte:
# 1) Vou fazer o calculo x_i * W + b = r1
#r1 eh conhecido como 'score' ou 'logit'
# 2) depois vou usar a funcao softmax para
#transformar o valor em probabilodade
#i.e., S(r1) = r2
# 3) Por ultimo vou comparar as probabi-
#lidades r2 l_i (l_i in L) com a cross-
#entropy, i.e, D(r2,l_i) = r3
# Seja N o tamanho do dataset
# a funcao final tem a cara
# (sum_i D(S(x_i * W + b),l_i))/N
#essa eh a funcao custo que queremos minimizar
train_labels = labels[:131]
valid_labels = labels[132:141]
test_labels = labels[142:]
graph = tf.Graph()
with graph.as_default():
#CONSTANTS
tf_train_dataset = tf.constant(all_X[:131], dtype='float32')
tf_train_labels = tf.constant(labels[:131], dtype='float32')
tf_valid_dataset = tf.constant(all_X[132:141], dtype='float32')
tf_valid_labels = tf.constant(labels[132:141], dtype='float32')
tf_test_dataset = tf.constant(all_X[142:], dtype='float32')
tf_test_labels = tf.constant(labels[142:], dtype='float32')
#VARIABLES
weights = tf.Variable(
tf.truncated_normal([4,3]))
biases = tf.Variable(tf.zeros([3]))
#SCORE
logits = tf.matmul(tf_train_dataset, weights) + biases
#LOSS FUCNTION
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Optimizer: gradient descent.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions, i.e., S(W*X +b)
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(
tf.matmul(tf_valid_dataset, weights) + biases)
test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
#number of trials for the otimization
num_steps = 801
#fast way to compair a array of arrays of probabilities
#with an array of arrays of one hot encoding.
#the position with the highest value should mach.
# divide by the number of samples and multiply by 100
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
with tf.Session(graph=graph) as session:
#initializate all variables
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
_, l, predictions = session.run([optimizer, loss, train_prediction])
if (step % 100 == 0):
print('Loss at step %d: %f' % (step, l))
print('Training accuracy: %.1f%%' % accuracy(predictions, train_labels))
print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels))
print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment