-
-
Save maria-aguilera/e10cdea0fd468e9d4fcc28045ba44601 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
#adjust seaborn plot size | |
plt.figure(figsize=(20,12)) | |
#pairplot (my favorite) - Draw scatterplots for joint relationships and histograms for univariate distributions | |
#hue optional like on all other seaborn plots | |
sns.pairplot(MyDataFrame,hue='some_categorical_column') | |
""" | |
barplot for aggregation of categorical variables. - the estimator argument is what determines the aggregation function(done with numpy) | |
""" | |
sns.barplot(x='catgegorical_column_name',y='column_name_to_aggregate',data=MyDataFrame,estimator=np.sum) #np.sum should sum the y for x | |
#Count plot - count a categorical variable | |
sns.countplot(x='categorical_column_to_count',data=MyDataFrame) | |
""" | |
stripplot - Draw a scatterplot where one variable is categorical | |
Categorical variables go on the x axis. Continuous variable on the y axis. | |
jitter argument staggers the points so you can see them better | |
hue argument takes a categorical variable to add another level of detail | |
""" | |
sns.stripplot(x='sex',y='goals_scored',data=MyDataFrame,jitter=True,hue='occupation') | |
""" | |
distplot - Flexibly plot a univariate distribution of observations. | |
""" | |
sns.distplot(df['column_name'],bins=30) | |
#jointplot | |
sns.jointplot(x='continuous_column1',y='continuous_column2',data=MyDataFrame,kind='scatter') | |
#heatmaps | |
#this is nifty in conjunction with the Pandas .corr() method for a heatmap of Pearson Correlation Coefficients | |
sns.heatmap(MyDataFrame) | |
#regplot - regression plot for 2 variables | |
sns.regplot(x='column1',y='column2',data=MyDataFrame) | |
#Scatterplot - built in to pandas | |
#c argument is optional. You can put a column name with discrete values as an argument to add more details | |
df.plot.scatter(x='blah',y='blahblah',c='discrete_value') | |
#line plot | |
df.plot.line(x='blah',y='blahblah') | |
#histogram - I like this one, numeric data only | |
#bin=10 is a default argument | |
df['column'].plot.hist() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment