Skip to content

Instantly share code, notes, and snippets.

@adriantorrie
Last active August 29, 2017 01:46
Show Gist options
  • Select an option

  • Save adriantorrie/9a1dcdb939ba12b817f93117968a1426 to your computer and use it in GitHub Desktop.

Select an option

Save adriantorrie/9a1dcdb939ba12b817f93117968a1426 to your computer and use it in GitHub Desktop.
Plot distributions of each series in a Pandas dataframe
# plot the distribution of each feature
def plot_distributions(df, cols=3, width=20, height=20, hspace=0.45, wspace=0.5):
# generate the figure to draw on
plt.style.use('seaborn-whitegrid')
fig = plt.figure(figsize=(width, height))
fig.subplots_adjust(left=None, bottom=None, right=None, top=None,
wspace=wspace, hspace=hspace)
rows = math.ceil(float(df.shape[1]) / cols)
# add subplot for each series with appropriate formatting
for i, column in enumerate(df.columns):
ax = fig.add_subplot(rows, cols, i + 1)
ax.set_title(column)
if df.dtypes[column] == np.object:
g = sns.countplot(y=column, data=df)
substrings = [s.get_text()[:18] for s in g.get_yticklabels()]
g.set(yticklabels=substrings)
plt.xticks(rotation=25)
else:
g = sns.distplot(df[column].dropna())
plt.xticks(rotation=25)
# format the figure
plt.tight_layout()
plt.show()
plot_distribution(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment