Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
train['SalePrice'].describe()
import seaborn as sns
sns.distplot(train['SalePrice'])
plt.xticks(rotation=30);
print('Skewness = ',train['SalePrice'].skew())
target = np.log(train['SalePrice'])
print('Skewness = ',target.skew())
sns.distplot(target);
corr = train.corr()
corr['SalePrice'].sort_values(ascending=False).head(10)
table = pd.pivot_table(train,index='OverallQual',values='SalePrice',aggfunc=np.mean)
table
plt.scatter(x=train['GrLivArea'], y=train['SalePrice'])
plt.ylabel('Sale Price')
plt.xlabel('GrLivArea')
plt.show();
# dropping outlier values from the dataset
train = train[train['GrLivArea']<4500]
#train rows
ntrain = train.shape[0]
#save log transform of target feature
target = np.log(train['SalePrice'])
#drop Id and SalePrice from train dataframe
train.drop(['Id','SalePrice'],inplace=True,axis=1)
#store test Id
#Null values
train.isna().sum().sort_values(ascending=False).head(20)