Maria MariaLavrovskaya

MariaLavrovskaya / airbnb_2.py

Created October 13, 2019 11:45

airbnb_2


	#Calculating the correlation matrix using matplotlib
	data.corr().style.background_gradient(cmap='PuOr')

MariaLavrovskaya / airbnb_1.py

Last active October 13, 2019 11:42

airbnb_1

	import pandas as pd
	from scipy import median
	from scipy.stats import mode
	data=pd.read_csv('AB_NYC_2019.csv')
	data.head()
	data.describe().T

	#Calculating the median for one of the rows
	reviews= data.loc[:,['number_of_reviews']]
	reviews.dropna()

MariaLavrovskaya / MLR_movies_11.py

Last active June 19, 2019 11:38

	#Slope for every predictor
	df = pd.DataFrame({'Actual': Y_test.values.flatten(), 'Predicted': y_pred.flatten()})
	df.head(25)

	from sklearn.metrics import mean_squared_error, median_squared_error, r2_score
	print("Mean squared error: %.2f"
	% mean_squared_error(Y_test, y_pred))
	print("Median squared error: %.2f"
	% median_squared_error(Y_test, y_pred))

MariaLavrovskaya / MLR_movies_11.py

Created June 14, 2019 12:07

	#Slope for every predictor
	df = pd.DataFrame({'Actual': Y_test.values.flatten(), 'Predicted': y_pred.flatten()})
	df.head(25)

	from sklearn.metrics import mean_squared_error, r2_score
	print("Mean squared error: %.2f"
	% mean_squared_error(Y_test, y_pred))

MariaLavrovskaya / MLR_movies_10.py

Created June 14, 2019 12:06

	from sklearn import linear_model
	from sklearn.model_selection import train_test_split
	X_train, X_test, Y_train, Y_test = train_test_split(X_2, y,test_size = 0.25, random_state = 0)
	# Create linear regression object
	regr = linear_model.LinearRegression(fit_intercept=False) # Do not use fit_intercept = False if you have removed 1 column after dummy encoding
	# Train the model using the training sets
	regr.fit(X_train, Y_train)
	y_pred = regr.predict(X_test)

MariaLavrovskaya / MLR_movies_9.py

Created June 14, 2019 12:05

	# Resulted feature matrix with all of independent variables
	X_2 = np.concatenate((scaled_columns,X_train_ohe),axis=1)

MariaLavrovskaya / MLR_movies_8.py

Created June 14, 2019 12:03

	#Treating continous variables with Standart Scaler

	columns_to_scale = np.array(df_1['runtime'])
	#Initiate Scaler:
	scaler = StandardScaler()
	scaled_columns = scaler.fit_transform(columns_to_scale[:, np.newaxis])

MariaLavrovskaya / MLR_movies_7.py

Created June 14, 2019 11:59

	#From labels to dummy
	from sklearn.preprocessing import OneHotEncoder
	ohe = OneHotEncoder(sparse=False)
	X_train_ohe = ohe.fit_transform(X_train_le)

MariaLavrovskaya / MLR_movies_6.py

Created June 14, 2019 11:57

	#Treating categorical variables with One-hot-encoding
	from sklearn import preprocessing
	le = preprocessing.LabelEncoder()


	# LabelEncoder for a number of columns
	class MultiColumnLabelEncoder:

	def __init__(self, columns = None):
	self.columns = columns # list of column to encode

MariaLavrovskaya / MLR_movies_5.py

Created June 14, 2019 11:54

	#Dropping missing values from my dataset
	df_1.dropna(how='any', inplace=True)
	print(df_1.isnull().values.sum()) #checking for missing values after the dropna()