Skip to content

Instantly share code, notes, and snippets.

@gfredtech
Created November 19, 2019 16:50
Show Gist options
  • Save gfredtech/918fadc8320ab9809ebfce624b140863 to your computer and use it in GitHub Desktop.
Save gfredtech/918fadc8320ab9809ebfce624b140863 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
def create_lookup_tables(values):
"""
Create lookup tables for vocabulary
:param values: List of values to be converted to a dictionary
:return: A tuple of dicts (vocab_to_int, int_to_vocab)
"""
vals = sorted(list(set(values)))
ft_to_int = {word: idx for idx, word in enumerate(vals)}
int_to_ft = {idx: word for idx, word in enumerate(vals)}
# return tuple
return ft_to_int, int_to_ft
if __name__ == '__main__':
data = pd.read_csv('data - data.csv')
area_to_int, int_to_area = create_lookup_tables(data['area'].to_list())
region_to_int, int_to_region = create_lookup_tables(data['region'].to_list())
print(area_to_int)
main_data = np.empty((210, 5))
main_target = np.empty(210)
# strip out individual values in a row
for idx, row in data.iterrows():
area_size = row['area_size']
num_rooms = row['num_rooms']
num_bathrooms = row['num_bathrooms']
target = row['priceGHS']
area = area_to_int[row['area']]
region = region_to_int[row['region']]
entry = np.array([area_size, num_rooms, num_bathrooms, area, region])
main_data[idx] = entry
main_target[idx] = target
X_train, X_test, y_train, y_test = train_test_split(main_data, main_target, random_state=0)
rfr = LinearRegression()
rfr.fit(X_train, y_train)
print(rfr.score(X_train, y_train))
#prediction on dataset
x = np.array([304593, 5, 2, 0, 0])
y_pred = rfr.predict(x.reshape(1, -1))
print(y_pred)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment