hackintoshrao · May 6, 2018 02:21
diff --git a/tf_housing_load_plot.py b/tf_housing_load_plot.py
 import pandas as pd
 # import library to split the data
 from sklearn.model_selection import train_test_split

 %matplotlib inline
 import matplotlib.pyplot as plt


 def read_data():
  """
  read the CSV data 
  """
  data = pd.read_csv("https://goo.gl/PQXxZ8")
  size_data = data["Size"].values
  price_data = data["Price"].values
  return size_data, price_data



 def split_test_train(size, price):
  # split the data, test size = 33%
  size_train, size_test, price_train, price_test = train_test_split(size, price, test_size=0.33)
  return size_train, size_test, price_train, price_test
  
 # Normalize a data set
 def normalize(array): 
    return (array - array.mean()) / array.std()

 # read the data from the csv file.
 size, price = read_data()
 # split the data into testing and training set.
 X_train, X_test, Y_train,Y_test = split_test_train(size, price)
 # print the length of test and train dataset.
 print("Length of the training data: ", len(X_train))
 print("Length of the testing data: ", len(X_test))


 # normalize the data
 X_train = normalize(X_train)
 Y_train = normalize(Y_train)

 X_test = normalize(X_test)
 Y_test = normalize(Y_test)

 # plot the normalized data
 plt.scatter(X_train, Y_train, label='Samples data')
 plt.draw()
	import pandas as pd
	# import library to split the data
	from sklearn.model_selection import train_test_split

	%matplotlib inline
	import matplotlib.pyplot as plt


	def read_data():
	"""
	read the CSV data
	"""
	data = pd.read_csv("https://goo.gl/PQXxZ8")
	size_data = data["Size"].values
	price_data = data["Price"].values
	return size_data, price_data



	def split_test_train(size, price):
	# split the data, test size = 33%
	size_train, size_test, price_train, price_test = train_test_split(size, price, test_size=0.33)
	return size_train, size_test, price_train, price_test

	# Normalize a data set
	def normalize(array):
	return (array - array.mean()) / array.std()

	# read the data from the csv file.
	size, price = read_data()
	# split the data into testing and training set.
	X_train, X_test, Y_train,Y_test = split_test_train(size, price)
	# print the length of test and train dataset.
	print("Length of the training data: ", len(X_train))
	print("Length of the testing data: ", len(X_test))


	# normalize the data
	X_train = normalize(X_train)
	Y_train = normalize(Y_train)

	X_test = normalize(X_test)
	Y_test = normalize(Y_test)

	# plot the normalized data
	plt.scatter(X_train, Y_train, label='Samples data')
	plt.draw()