MariaLavrovskaya’s gists

MariaLavrovskaya / svm_1.py

Created October 28, 2019 15:11

svm_1


	#Data preparation
	data_1 = data.drop(['id', 'name', 'host_id','host_name','neighbourhood','latitude', 'last_review', 'longitude', 'room_type'], axis =1)
	data_1.dropna(how='any', inplace=True)
	data_1.head()

	#Label Encoding of the labels
	from sklearn.preprocessing import LabelEncoder
	area_encoder = LabelEncoder()
	data_y = data_1['neighbourhood_group']

MariaLavrovskaya / airbnb_21.py

Created October 15, 2019 06:57

airbnb_21

	from sklearn import linear_model
	from sklearn.model_selection import train_test_split
	X_train, X_test, Y_train, Y_test = train_test_split(X_2, y,test_size = 0.25, random_state = 0)
	# Create linear regression object
	regr = linear_model.LinearRegression(fit_intercept=True) # Do not use fit_intercept = False if you have removed 1 column after dummy encoding
	# Train the model using the training sets
	regr.fit(X_train, Y_train)
	y_pred = regr.predict(X_test)

	#Checking between observed and predicted data

MariaLavrovskaya / airbnb_20.py

Created October 15, 2019 06:56

airbnb_20

	# Resulted feature matrix with all of independent variables
	X_2 = np.concatenate((scaled_columns,X_train_ohe),axis=1)

MariaLavrovskaya / airbnb_18.py

Created October 15, 2019 06:55

airbnb_18

	#Treating continous variables with Standart Scaler

	columns_to_scale = np.array(df_1['runtime'])
	#Initiate Scaler:
	scaler = StandardScaler()
	scaled_columns = scaler.fit_transform(columns_to_scale[:, np.newaxis])

MariaLavrovskaya / airbnb_17.py

Created October 15, 2019 06:54

airbnb_17

	#From labels to dummy
	from sklearn.preprocessing import OneHotEncoder
	ohe = OneHotEncoder(sparse=False)
	X_train_ohe = ohe.fit_transform(X_train_le)

MariaLavrovskaya / airbnb_16.py

Created October 15, 2019 06:52

airbnb_16

	from sklearn.preprocessing import StandardScaler
	from sklearn import preprocessing
	from sklearn.preprocessing import LabelEncoder
	le = preprocessing.LabelEncoder()


	# LabelEncoder for a number of columns
	class MultiColumnLabelEncoder:

	def __init__(self, columns = None):

MariaLavrovskaya / airbnb_14.py

Created October 15, 2019 06:49

airbnb_14

	#Splitting for 2 matrices: independent variables used for prediction and dependent variables (that is predicted)
	X = df_1.drop(['price', 'reviews_per_month'], axis = 1) #Feature Matrix
	y = df_1["price"] #Dependent Variables

MariaLavrovskaya / airbnb_12.py

Created October 15, 2019 06:47

airbnb_12

	print(df_1.isnull().values.sum())
	print(df_1.isnull().sum())

	#Dropping missing values from my dataset
	df_1.dropna(how='any', inplace=True)
	print(df_1.isnull().values.sum()) #checking for missing values after the dropna()

MariaLavrovskaya / airbnb_11.py

Created October 15, 2019 06:44

airbnb_11

	df_1 = data.loc[:, ['neighbourhood_group', 'neighbourhood','room_type', 'price', 'minimum_nights',
	'number_of_reviews', 'reviews_per_month']]

MariaLavrovskaya / airbnb_12.py

Created October 13, 2019 12:35

airbnb_12

	f_obs = np.array([contingency_table.iloc[0][0:4].values,
	contingency_table.iloc[1][0:4].values,
	contingency_table.iloc[2][0:4].values,
	contingency_table.iloc[3][0:4].values,
	contingency_table.iloc[4][0:4].values])
	from scipy import stats
	stats.chi2_contingency(f_obs)[0:3]
	###В связи с этим, мы отрицаем нулевую гипотезу и принимаем альтернативную гипотезу, которая утверждает, что
	###существует непосредственная взаимосвязь между расположением и типом сдаваемой недвижимости на сайте AIRBNB.

Maria MariaLavrovskaya