Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
# Ordinal features
#NA means no Pool
train['PoolQC'].replace(['Ex','Gd','TA','Fa',np.nan],[4,3,2,1,0],inplace=True)
# NA means no fence
train['Fence'].replace(['GdPrv','MnPrv','GdWo','MnWw',np.nan],[4,3,2,1,0],inplace=True)
# NA means no fireplace
train['FireplaceQu'].replace(['Ex','Gd','TA','Fa','Po',np.nan],[5,4,3,2,1,0],inplace=True)
# Ordinal features
for i in ['GarageCond','GarageQual']:
train[i].replace(['Ex','Gd','TA','Fa','Po',np.nan],[5,4,3,2,1,0],inplace=True)
# Nominal features
for i in ['GarageFinish','GarageType']:
train[i].fillna('None',inplace=True)
# Numerical features
for i in ['GarageYrBlt','GarageCars','GarageArea']:
# Ordinal features
for i in ['BsmtCond','BsmtQual']:
train[i].replace(['Ex','Gd','TA','Fa','Po',np.nan],[5,4,3,2,1,0],inplace=True)
train['BsmtExposure'].replace(['Gd','Av','Mn','No',np.nan],[4,3,2,1,0],inplace=True)
for i in ['BsmtFinType1','BsmtFinType2']:
train[i].replace(['GLQ','ALQ','BLQ','Rec','LwQ','Unf',np.nan],[6,5,4,3,2,1,0],inplace=True)
# Numerical features
#NA means no masonary work
train['MasVnrType'].fillna('None',inplace=True)
#If no masonary work, then area is 0
train['MasVnrArea'].fillna(0,inplace=True)
#Replace with the most common value
for i in ['MSZoning','Utilities']:
train[i].fillna(train[i].mode()[0],inplace=True)
# Ordinal features
train['CentralAir'].replace(['N','Y'],[0,1],inplace=True)
# Nominal features
for i in ['HeatingQC','ExterCond','ExterQual']:
train[i].replace(['Ex','Gd','TA','Fa','Po'],[4,3,2,1,0],inplace=True)
train['HasPool'] = train['PoolArea'].apply(lambda x: 1 if x>0 else 0)
train['HasFirePlace'] = train['FireplaceQu'].apply(lambda x: 1 if x>0 else 0)
train['HasFence'] = train['Fence'].apply(lambda x: 1 if x>0 else 0)
train['HasMsonary'] = train['MasVnrArea'].apply(lambda x: 1 if x>0 else 0)
train['HasGarage'] = train['GarageArea'].apply(lambda x: 1 if x>0 else 0)
train['HasBsmt'] = train['TotalBsmtSF'].apply(lambda x: 1 if x>0 else 0)
# Total surface area of house
train['TotalSF'] = train.apply(lambda x: x['1stFlrSF'] + x['2ndFlrSF'] + x['TotalBsmtSF'], axis=1)
# Total number of bathrooms in the house
train['TotalBath'] = train.apply(lambda x: x['FullBath'] + 0.5*x['HalfBath'] + x['BsmtFullBath'] + 0.5*x['BsmtHalfBath'], axis=1)
# One-Hot encoding
train = pd.get_dummies(train,drop_first=True)
train.head()
# train dataset
df = train.iloc[:ntrain,:]
# test dataset
test = train.iloc[ntrain:,:]