This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Data preparation | |
data_1 = data.drop(['id', 'name', 'host_id','host_name','neighbourhood','latitude', 'last_review', 'longitude', 'room_type'], axis =1) | |
data_1.dropna(how='any', inplace=True) | |
data_1.head() | |
#Label Encoding of the labels | |
from sklearn.preprocessing import LabelEncoder | |
area_encoder = LabelEncoder() | |
data_y = data_1['neighbourhood_group'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import linear_model | |
from sklearn.model_selection import train_test_split | |
X_train, X_test, Y_train, Y_test = train_test_split(X_2, y,test_size = 0.25, random_state = 0) | |
# Create linear regression object | |
regr = linear_model.LinearRegression(fit_intercept=True) # Do not use fit_intercept = False if you have removed 1 column after dummy encoding | |
# Train the model using the training sets | |
regr.fit(X_train, Y_train) | |
y_pred = regr.predict(X_test) | |
#Checking between observed and predicted data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Resulted feature matrix with all of independent variables | |
X_2 = np.concatenate((scaled_columns,X_train_ohe),axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Treating continous variables with Standart Scaler | |
columns_to_scale = np.array(df_1['runtime']) | |
#Initiate Scaler: | |
scaler = StandardScaler() | |
scaled_columns = scaler.fit_transform(columns_to_scale[:, np.newaxis]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#From labels to dummy | |
from sklearn.preprocessing import OneHotEncoder | |
ohe = OneHotEncoder(sparse=False) | |
X_train_ohe = ohe.fit_transform(X_train_le) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import StandardScaler | |
from sklearn import preprocessing | |
from sklearn.preprocessing import LabelEncoder | |
le = preprocessing.LabelEncoder() | |
# LabelEncoder for a number of columns | |
class MultiColumnLabelEncoder: | |
def __init__(self, columns = None): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Splitting for 2 matrices: independent variables used for prediction and dependent variables (that is predicted) | |
X = df_1.drop(['price', 'reviews_per_month'], axis = 1) #Feature Matrix | |
y = df_1["price"] #Dependent Variables |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(df_1.isnull().values.sum()) | |
print(df_1.isnull().sum()) | |
#Dropping missing values from my dataset | |
df_1.dropna(how='any', inplace=True) | |
print(df_1.isnull().values.sum()) #checking for missing values after the dropna() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_1 = data.loc[:, ['neighbourhood_group', 'neighbourhood','room_type', 'price', 'minimum_nights', | |
'number_of_reviews', 'reviews_per_month']] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
f_obs = np.array([contingency_table.iloc[0][0:4].values, | |
contingency_table.iloc[1][0:4].values, | |
contingency_table.iloc[2][0:4].values, | |
contingency_table.iloc[3][0:4].values, | |
contingency_table.iloc[4][0:4].values]) | |
from scipy import stats | |
stats.chi2_contingency(f_obs)[0:3] | |
###В связи с этим, мы отрицаем нулевую гипотезу и принимаем альтернативную гипотезу, которая утверждает, что | |
###существует непосредственная взаимосвязь между расположением и типом сдаваемой недвижимости на сайте AIRBNB. |