Skip to content

Instantly share code, notes, and snippets.

@Nick-Harvey
Created January 29, 2017 22:20
Show Gist options
  • Save Nick-Harvey/404b605423b3c19710eb2a1de6cb5880 to your computer and use it in GitHub Desktop.
Save Nick-Harvey/404b605423b3c19710eb2a1de6cb5880 to your computer and use it in GitHub Desktop.
from __future__ import print_function
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import os
os.chdir('/Users/Nick/Git/Machinelearning')
# Define our dataset
dataset = pd.read_csv('first1500.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 3].values
#Fill in the gaps of the data
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values = 0, strategy = 'mean')
imputer = imputer.fit(X[:, 1:3])
X[:, 1:3] = imputer.transform(X[:, 1:3])
#Categorize city names
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X = LabelEncoder()
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()
#Split the dataset into our train and test sets
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
# Set our learning rate and training epoch
learningrate = 0.01
trainingepochs = 100
display_step = 50
# Generate a random number
rng = numpy.random
# tf Graph Input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
# Construct a linear model
pred = tf.add(tf.mul(X, W), b)
# Mean squared error
n_samples = X_train.shape[0]
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
#cost = (tf.square(y-y_train))
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learningrate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(trainingepochs):
for (x, y) in zip(X_train, y_train):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(cost, feed_dict={X: X_train, Y: y_train})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: X_train, Y: y_train})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(X_train, y_train, 'ro', label='Original data')
plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
print("Testing... (Mean square loss Comparison)")
testing_cost = sess.run(
tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * X_test.shape[0]),
feed_dict={X: X_test, Y: y_test}) # same function as cost above
print("Testing cost=", testing_cost)
print("Absolute mean square loss difference:", abs(
training_cost - testing_cost))
plt.plot(X_test, y_test, 'bo', label='Testing data')
plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment