Skip to content

Instantly share code, notes, and snippets.

@hackintoshrao
Last active May 6, 2018 02:21
Show Gist options
  • Save hackintoshrao/c11102268ecec9ad74d9c6069e7f5eac to your computer and use it in GitHub Desktop.
Save hackintoshrao/c11102268ecec9ad74d9c6069e7f5eac to your computer and use it in GitHub Desktop.
tf_housing_load_plot.py
import pandas as pd
# import library to split the data
from sklearn.model_selection import train_test_split
%matplotlib inline
import matplotlib.pyplot as plt
def read_data():
"""
read the CSV data
"""
data = pd.read_csv("https://goo.gl/PQXxZ8")
size_data = data["Size"].values
price_data = data["Price"].values
return size_data, price_data
def split_test_train(size, price):
# split the data, test size = 33%
size_train, size_test, price_train, price_test = train_test_split(size, price, test_size=0.33)
return size_train, size_test, price_train, price_test
# Normalize a data set
def normalize(array):
return (array - array.mean()) / array.std()
# read the data from the csv file.
size, price = read_data()
# split the data into testing and training set.
X_train, X_test, Y_train,Y_test = split_test_train(size, price)
# print the length of test and train dataset.
print("Length of the training data: ", len(X_train))
print("Length of the testing data: ", len(X_test))
# normalize the data
X_train = normalize(X_train)
Y_train = normalize(Y_train)
X_test = normalize(X_test)
Y_test = normalize(Y_test)
# plot the normalized data
plt.scatter(X_train, Y_train, label='Samples data')
plt.draw()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment