Created
February 25, 2019 20:10
-
-
Save ravishchawla/2c7137db660408eaf7c6646593c651e3 to your computer and use it in GitHub Desktop.
AirBnB post: Data exploration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def df_stats(df): | |
print('Shape: ' , df.shape); | |
missings = df.isnull().sum() / len(df); | |
missing_vals = dict(zip(df.columns[missings > 0], missings[missings > 0])); | |
print('# Columns with any missing elements : ' , [(w, missing_vals[w]) for w in sorted(missing_vals, key=missing_vals.get, reverse=True)]) | |
print(); | |
print('Listings: '); | |
df_stats(listings); | |
print('Reviews: '); | |
df_stats(reviews); | |
print('Calandar: '); | |
df_stats(calandar); | |
#Let's look at a histogram of the different values | |
sns.set(style='ticks') | |
sns.pairplot(listings) | |
#The price attribute looks very skewed, and is important. Let's look at in detail: | |
fig, ax = plt.subplots(); | |
listings['price'].hist(ax=ax, bins=500); | |
ax.set_xscale('log') | |
#Let's look at the correlation between the attributes in Listings | |
plt.figure(figsize=(14,10)) | |
corrs = listings.corr(); | |
sns.heatmap(corrs); | |
listings_cats = listings[['neighbourhood_group', 'neighbourhood', 'room_type']]; | |
plt.figure(figsize=(12,4)) | |
plt.subplot(1,2,1); | |
g = sns.countplot(x='neighbourhood_group', data=listings_cats); | |
g.set_xticklabels(g.get_xticklabels(), rotation=90); | |
plt.subplot(1,2,2); | |
g = sns.countplot(x='room_type', data=listings_cats); | |
g.set_xticklabels(g.get_xticklabels(), rotation=45); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment