Nick-Harvey · January 29, 2017 22:20
diff --git a/Logistic regression script b/Logistic regression script
 from __future__ import print_function

 import tensorflow as tf
 import numpy
 import matplotlib.pyplot as plt
 import pandas as pd
 import os
 os.chdir('/Users/Nick/Git/Machinelearning')

 # Define our dataset
 dataset = pd.read_csv('first1500.csv')
 X = dataset.iloc[:, :-1].values
 y = dataset.iloc[:, 3].values

 #Fill in the gaps of the data
 from sklearn.preprocessing import Imputer
 imputer = Imputer(missing_values = 0, strategy = 'mean')
 imputer = imputer.fit(X[:, 1:3])
 X[:, 1:3] = imputer.transform(X[:, 1:3])

 #Categorize city names
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 labelencoder_X = LabelEncoder()
 X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
 onehotencoder = OneHotEncoder(categorical_features = [0])
 X = onehotencoder.fit_transform(X).toarray()

 #Split the dataset into our train and test sets
 from sklearn.cross_validation import train_test_split
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

 # Set our learning rate and training epoch
 learningrate = 0.01
 trainingepochs = 100
 display_step = 50

 # Generate a random number
 rng = numpy.random

 # tf Graph Input
 X = tf.placeholder("float")
 Y = tf.placeholder("float")

 # Set model weights
 W = tf.Variable(rng.randn(), name="weight")
 b = tf.Variable(rng.randn(), name="bias")

 # Construct a linear model
 pred = tf.add(tf.mul(X, W), b)

 # Mean squared error
 n_samples = X_train.shape[0]
 cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
 #cost = (tf.square(y-y_train))
 # Gradient descent
 optimizer = tf.train.GradientDescentOptimizer(learningrate).minimize(cost)

 # Initializing the variables
 init = tf.global_variables_initializer()

 # Launch the graph
 with tf.Session() as sess:
    sess.run(init)

    # Fit all training data
    for epoch in range(trainingepochs):
        for (x, y) in zip(X_train, y_train):
            sess.run(optimizer, feed_dict={X: x, Y: y})

        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            c = sess.run(cost, feed_dict={X: X_train, Y: y_train})
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
                "W=", sess.run(W), "b=", sess.run(b))

    print("Optimization Finished!")
    training_cost = sess.run(cost, feed_dict={X: X_train, Y: y_train})
    print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

    # Graphic display
    plt.plot(X_train, y_train, 'ro', label='Original data')
    plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line')
    plt.legend()
    plt.show()

    print("Testing... (Mean square loss Comparison)")
    testing_cost = sess.run(
        tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * X_test.shape[0]),
        feed_dict={X: X_test, Y: y_test})  # same function as cost above
    print("Testing cost=", testing_cost)
    print("Absolute mean square loss difference:", abs(
        training_cost - testing_cost))

    plt.plot(X_test, y_test, 'bo', label='Testing data')
    plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line')
    plt.legend()
    plt.show()
	from __future__ import print_function

	import tensorflow as tf
	import numpy
	import matplotlib.pyplot as plt
	import pandas as pd
	import os
	os.chdir('/Users/Nick/Git/Machinelearning')

	# Define our dataset
	dataset = pd.read_csv('first1500.csv')
	X = dataset.iloc[:, :-1].values
	y = dataset.iloc[:, 3].values

	#Fill in the gaps of the data
	from sklearn.preprocessing import Imputer
	imputer = Imputer(missing_values = 0, strategy = 'mean')
	imputer = imputer.fit(X[:, 1:3])
	X[:, 1:3] = imputer.transform(X[:, 1:3])

	#Categorize city names
	from sklearn.preprocessing import LabelEncoder, OneHotEncoder
	labelencoder_X = LabelEncoder()
	X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
	onehotencoder = OneHotEncoder(categorical_features = [0])
	X = onehotencoder.fit_transform(X).toarray()

	#Split the dataset into our train and test sets
	from sklearn.cross_validation import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

	# Set our learning rate and training epoch
	learningrate = 0.01
	trainingepochs = 100
	display_step = 50

	# Generate a random number
	rng = numpy.random

	# tf Graph Input
	X = tf.placeholder("float")
	Y = tf.placeholder("float")

	# Set model weights
	W = tf.Variable(rng.randn(), name="weight")
	b = tf.Variable(rng.randn(), name="bias")

	# Construct a linear model
	pred = tf.add(tf.mul(X, W), b)

	# Mean squared error
	n_samples = X_train.shape[0]
	cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
	#cost = (tf.square(y-y_train))
	# Gradient descent
	optimizer = tf.train.GradientDescentOptimizer(learningrate).minimize(cost)

	# Initializing the variables
	init = tf.global_variables_initializer()

	# Launch the graph
	with tf.Session() as sess:
	sess.run(init)

	# Fit all training data
	for epoch in range(trainingepochs):
	for (x, y) in zip(X_train, y_train):
	sess.run(optimizer, feed_dict={X: x, Y: y})

	# Display logs per epoch step
	if (epoch+1) % display_step == 0:
	c = sess.run(cost, feed_dict={X: X_train, Y: y_train})
	print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
	"W=", sess.run(W), "b=", sess.run(b))

	print("Optimization Finished!")
	training_cost = sess.run(cost, feed_dict={X: X_train, Y: y_train})
	print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

	# Graphic display
	plt.plot(X_train, y_train, 'ro', label='Original data')
	plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line')
	plt.legend()
	plt.show()

	print("Testing... (Mean square loss Comparison)")
	testing_cost = sess.run(
	tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * X_test.shape[0]),
	feed_dict={X: X_test, Y: y_test}) # same function as cost above
	print("Testing cost=", testing_cost)
	print("Absolute mean square loss difference:", abs(
	training_cost - testing_cost))

	plt.plot(X_test, y_test, 'bo', label='Testing data')
	plt.plot(X_train, sess.run(W) * X_train + sess.run(b), label='Fitted line')
	plt.legend()
	plt.show()