ravishchawla · February 25, 2019 20:10
diff --git a/airbnb_post_2.py b/airbnb_post_2.py
 def df_stats(df):
    print('Shape: ' , df.shape);
    
    missings = df.isnull().sum() / len(df);
    missing_vals = dict(zip(df.columns[missings > 0], missings[missings > 0]));
    print('# Columns with any missing elements : ' ,   [(w, missing_vals[w]) for w in sorted(missing_vals, key=missing_vals.get, reverse=True)])
    
    print();
    
 print('Listings: ');
 df_stats(listings);

 print('Reviews: ');
 df_stats(reviews);

 print('Calandar: ');
 df_stats(calandar);

 #Let's look at a histogram of the different values
 sns.set(style='ticks')
 sns.pairplot(listings)

 #The price attribute looks very skewed, and is important. Let's look at in detail:
 fig, ax = plt.subplots();
 listings['price'].hist(ax=ax, bins=500);
 ax.set_xscale('log')

 #Let's look at the correlation between the attributes in Listings
 plt.figure(figsize=(14,10))
 corrs = listings.corr();
 sns.heatmap(corrs);

 listings_cats = listings[['neighbourhood_group', 'neighbourhood', 'room_type']];

 plt.figure(figsize=(12,4))
 plt.subplot(1,2,1);
 g = sns.countplot(x='neighbourhood_group', data=listings_cats);
 g.set_xticklabels(g.get_xticklabels(), rotation=90);

 plt.subplot(1,2,2);
 g = sns.countplot(x='room_type', data=listings_cats);
 g.set_xticklabels(g.get_xticklabels(), rotation=45);
	def df_stats(df):
	print('Shape: ' , df.shape);

	missings = df.isnull().sum() / len(df);
	missing_vals = dict(zip(df.columns[missings > 0], missings[missings > 0]));
	print('# Columns with any missing elements : ' , [(w, missing_vals[w]) for w in sorted(missing_vals, key=missing_vals.get, reverse=True)])

	print();

	print('Listings: ');
	df_stats(listings);

	print('Reviews: ');
	df_stats(reviews);

	print('Calandar: ');
	df_stats(calandar);

	#Let's look at a histogram of the different values
	sns.set(style='ticks')
	sns.pairplot(listings)

	#The price attribute looks very skewed, and is important. Let's look at in detail:
	fig, ax = plt.subplots();
	listings['price'].hist(ax=ax, bins=500);
	ax.set_xscale('log')

	#Let's look at the correlation between the attributes in Listings
	plt.figure(figsize=(14,10))
	corrs = listings.corr();
	sns.heatmap(corrs);

	listings_cats = listings[['neighbourhood_group', 'neighbourhood', 'room_type']];

	plt.figure(figsize=(12,4))
	plt.subplot(1,2,1);
	g = sns.countplot(x='neighbourhood_group', data=listings_cats);
	g.set_xticklabels(g.get_xticklabels(), rotation=90);

	plt.subplot(1,2,2);
	g = sns.countplot(x='room_type', data=listings_cats);
	g.set_xticklabels(g.get_xticklabels(), rotation=45);