Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save maria-aguilera/e10cdea0fd468e9d4fcc28045ba44601 to your computer and use it in GitHub Desktop.
Save maria-aguilera/e10cdea0fd468e9d4fcc28045ba44601 to your computer and use it in GitHub Desktop.
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#adjust seaborn plot size
plt.figure(figsize=(20,12))
#pairplot (my favorite) - Draw scatterplots for joint relationships and histograms for univariate distributions
#hue optional like on all other seaborn plots
sns.pairplot(MyDataFrame,hue='some_categorical_column')
"""
barplot for aggregation of categorical variables. - the estimator argument is what determines the aggregation function(done with numpy)
"""
sns.barplot(x='catgegorical_column_name',y='column_name_to_aggregate',data=MyDataFrame,estimator=np.sum) #np.sum should sum the y for x
#Count plot - count a categorical variable
sns.countplot(x='categorical_column_to_count',data=MyDataFrame)
"""
stripplot - Draw a scatterplot where one variable is categorical
Categorical variables go on the x axis. Continuous variable on the y axis.
jitter argument staggers the points so you can see them better
hue argument takes a categorical variable to add another level of detail
"""
sns.stripplot(x='sex',y='goals_scored',data=MyDataFrame,jitter=True,hue='occupation')
"""
distplot - Flexibly plot a univariate distribution of observations.
"""
sns.distplot(df['column_name'],bins=30)
#jointplot
sns.jointplot(x='continuous_column1',y='continuous_column2',data=MyDataFrame,kind='scatter')
#heatmaps
#this is nifty in conjunction with the Pandas .corr() method for a heatmap of Pearson Correlation Coefficients
sns.heatmap(MyDataFrame)
#regplot - regression plot for 2 variables
sns.regplot(x='column1',y='column2',data=MyDataFrame)
#Scatterplot - built in to pandas
#c argument is optional. You can put a column name with discrete values as an argument to add more details
df.plot.scatter(x='blah',y='blahblah',c='discrete_value')
#line plot
df.plot.line(x='blah',y='blahblah')
#histogram - I like this one, numeric data only
#bin=10 is a default argument
df['column'].plot.hist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment