Created
January 29, 2017 22:20
-
-
Save Nick-Harvey/404b605423b3c19710eb2a1de6cb5880 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import tensorflow as tf | |
import numpy | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import os | |
os.chdir('/Users/Nick/Git/Machinelearning') | |
# Define our dataset | |
dataset = pd.read_csv('first1500.csv') | |
X = dataset.iloc[:, :-1].values | |
y = dataset.iloc[:, 3].values | |
#Fill in the gaps of the data | |
from sklearn.preprocessing import Imputer | |
imputer = Imputer(missing_values = 0, strategy = 'mean') | |
imputer = imputer.fit(X[:, 1:3]) | |
X[:, 1:3] = imputer.transform(X[:, 1:3]) | |
#Categorize city names | |
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
labelencoder_X = LabelEncoder() | |
X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) | |
onehotencoder = OneHotEncoder(categorical_features = [0]) | |
X = onehotencoder.fit_transform(X).toarray() | |
#Split the dataset into our train and test sets | |
from sklearn.cross_validation import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2) | |
# Set our learning rate and training epoch | |
learningrate = 0.01 | |
trainingepochs = 100 | |
display_step = 50 | |
# Generate a random number | |
rng = numpy.random | |
# tf Graph Input | |
X = tf.placeholder("float") | |
Y = tf.placeholder("float") | |
# Set model weights | |
W = tf.Variable(rng.randn(), name="weight") | |
b = tf.Variable(rng.randn(), name="bias") | |
# Construct a linear model | |
pred = tf.add(tf.mul(X, W), b) | |
# Mean squared error | |
n_samples = X_train.shape[0] | |
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples) | |
#cost = (tf.square(y-y_train)) | |
# Gradient descent | |
optimizer = tf.train.GradientDescentOptimizer(learningrate).minimize(cost) | |
# Initializing the variables | |
init = tf.global_variables_initializer() | |
# Launch the graph | |
with tf.Session() as sess: | |
sess.run(init) | |
# Fit all training data | |
for epoch in range(trainingepochs): | |
for (x, y) in zip(X_train, y_train): | |
sess.run(optimizer, feed_dict={X: x, Y: y}) | |
# Display logs per epoch step | |
if (epoch+1) % display_step == 0: | |
c = sess.run(cost, feed_dict={X: X_train, Y: y_train}) | |
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \ | |
"W=", sess.run(W), "b=", sess.run(b)) | |
print("Optimization Finished!") | |
training_cost = sess.run(cost, feed_dict={X: X_train, Y: y_train}) | |
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n') | |
# Graphic display | |
plt.plot(X_train, y_train, 'ro', label='Original data') | |
plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line') | |
plt.legend() | |
plt.show() | |
print("Testing... (Mean square loss Comparison)") | |
testing_cost = sess.run( | |
tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * X_test.shape[0]), | |
feed_dict={X: X_test, Y: y_test}) # same function as cost above | |
print("Testing cost=", testing_cost) | |
print("Absolute mean square loss difference:", abs( | |
training_cost - testing_cost)) | |
plt.plot(X_test, y_test, 'bo', label='Testing data') | |
plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line') | |
plt.legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment