Last active
January 11, 2017 10:11
-
-
Save codeboy101/b9a437c52ab61fc665b49c61e5224811 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| df = pd.read_csv('dataset.txt', header=None) | |
| df.columns = ['size', 'price'] | |
| length = len(df['size']) | |
| for i in df.columns: | |
| df[i] = df[i] / max(df[i]) | |
| def linear_regression(theta_0, theta_1, dataset): | |
| final_error = 0 | |
| for i in range(length): | |
| final_error += ((theta_0 + theta_1 * df['size'].iloc[i]) - df['price'].iloc[i]) ** 2 | |
| return final_error / length | |
| def gradient_descent(theta_0, theta_1, alpha): | |
| error_sum_0 = 0 | |
| error_sum_1 = 0 | |
| for i in range(length): | |
| error_sum_0 += (theta_0 + theta_1 * df['size'].iloc[i]) - df['price'].iloc[i] | |
| error_sum_1 += ((theta_0 + theta_1 * df['size'].iloc[i]) - df['price'].iloc[i]) * df['size'].iloc[i] | |
| result_theta_0 = theta_0 - (alpha * (1/length * error_sum_0)) | |
| result_theta_1 = theta_1 - (alpha * (1/length * error_sum_1)) | |
| return [result_theta_0, result_theta_1] | |
| def gradient_descent_runner(theta_0, theta_1, alpha, num_iterations): | |
| new_theta_0 = theta_0 | |
| new_theta_1 = theta_1 | |
| for i in range(num_iterations): | |
| new_theta_0, new_theta_1 = gradient_descent(theta_0, theta_1, alpha) | |
| return [new_theta_0, new_theta_1] | |
| print('cost before gradient descent = {}'.format(linear_regression(0, 0, df))) | |
| init_theta_0 = 0 | |
| init_theta_1 = 0 | |
| alpha = 0.0001 | |
| num_iterations = 1000 | |
| theta_0, theta_1 = gradient_descent_runner(init_theta_0, init_theta_0, alpha, num_iterations) | |
| print('cost after gradient descent = {}'.format(linear_regression(theta_0, theta_1, df))) | |
| predicted_price = (df.iloc[7]['size'] * theta_1) + theta_0 | |
| real_price = df.iloc[7]['price'] | |
| print('real price = {} ; predicted price = {}'.format(real_price, predicted_price)) | |
| print('difference in prediciton = {}'.format(predicted_price - real_price)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment