This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#importing the libraries | |
import pandas as pd | |
import numpy as np | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import statsmodels.api as sm | |
%matplotlib inline | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#importing the libraries | |
import pandas as pd | |
import numpy as np | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import statsmodels.api as sm | |
%matplotlib inline | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Using Pearson Correlation | |
plt.figure(figsize=(12,10)) | |
cor = df.corr() | |
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Using Pearson Correlation | |
plt.figure(figsize=(12,10)) | |
cor = df.corr() | |
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.set_index("title", inplace=True) #setting the index name | |
df_1 = df.loc[:, ['imdb_rating','genre', 'runtime', 'best_pic_nom', | |
'top200_box', 'director', 'actor1']] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Let's also check the column-wise distribution of null values | |
print(df_1.isnull().values.sum()) | |
print(df_1.isnull().sum()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Dropping missing values from my dataset | |
df_1.dropna(how='any', inplace=True) | |
print(df_1.isnull().values.sum()) #checking for missing values after the dropna() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Treating categorical variables with One-hot-encoding | |
from sklearn import preprocessing | |
le = preprocessing.LabelEncoder() | |
# LabelEncoder for a number of columns | |
class MultiColumnLabelEncoder: | |
def __init__(self, columns = None): | |
self.columns = columns # list of column to encode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#From labels to dummy | |
from sklearn.preprocessing import OneHotEncoder | |
ohe = OneHotEncoder(sparse=False) | |
X_train_ohe = ohe.fit_transform(X_train_le) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Treating continous variables with Standart Scaler | |
columns_to_scale = np.array(df_1['runtime']) | |
#Initiate Scaler: | |
scaler = StandardScaler() | |
scaled_columns = scaler.fit_transform(columns_to_scale[:, np.newaxis]) |
OlderNewer