Skip to content

Instantly share code, notes, and snippets.

View MariaLavrovskaya's full-sized avatar

Maria MariaLavrovskaya

  • London, United Kingdom
View GitHub Profile
#Let's also check the column-wise distribution of null values
print(df_1.isnull().values.sum())
print(df_1.isnull().sum())
df.set_index("title", inplace=True) #setting the index name
df_1 = df.loc[:, ['imdb_rating','genre', 'runtime', 'best_pic_nom',
'top200_box', 'director', 'actor1']]
#Using Pearson Correlation
plt.figure(figsize=(12,10))
cor = df.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()
#Using Pearson Correlation
plt.figure(figsize=(12,10))
cor = df.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()
#importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
#importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression