maria-aguilera · November 16, 2022 15:55
diff --git a/Visualization Cheat Sheet.py b/Visualization Cheat Sheet.py
 import seaborn as sns
 import pandas as pd
 import matplotlib.pyplot as plt
 %matplotlib inline

 #adjust seaborn plot size
 plt.figure(figsize=(20,12))

 #pairplot (my favorite) - Draw scatterplots for joint relationships and histograms for univariate distributions
 #hue optional like on all other seaborn plots
 sns.pairplot(MyDataFrame,hue='some_categorical_column')

 """
 barplot for aggregation of categorical variables. - the estimator argument is what determines the aggregation function(done with numpy)
 """
 sns.barplot(x='catgegorical_column_name',y='column_name_to_aggregate',data=MyDataFrame,estimator=np.sum) #np.sum should sum the y for x

 #Count plot - count a categorical variable
 sns.countplot(x='categorical_column_to_count',data=MyDataFrame)

 """
 stripplot - Draw a scatterplot where one variable is categorical
 Categorical variables go on the x axis. Continuous variable on the y axis.
 jitter argument staggers the points so you can see them better
 hue argument takes a categorical variable to add another level of detail
 """
 sns.stripplot(x='sex',y='goals_scored',data=MyDataFrame,jitter=True,hue='occupation')

 """
 distplot - Flexibly plot a univariate distribution of observations.
 """
 sns.distplot(df['column_name'],bins=30)

 #jointplot
 sns.jointplot(x='continuous_column1',y='continuous_column2',data=MyDataFrame,kind='scatter')

 #heatmaps
 #this is nifty in conjunction with the Pandas .corr() method for a heatmap of Pearson Correlation Coefficients
 sns.heatmap(MyDataFrame)

 #regplot - regression plot for 2 variables
 sns.regplot(x='column1',y='column2',data=MyDataFrame)

 #Scatterplot - built in to pandas
 #c argument is optional. You can put a column name with discrete values as an argument to add more details
 df.plot.scatter(x='blah',y='blahblah',c='discrete_value')

 #line plot
 df.plot.line(x='blah',y='blahblah')

 #histogram - I like this one, numeric data only
 #bin=10 is a default argument
 df['column'].plot.hist()
	import seaborn as sns
	import pandas as pd
	import matplotlib.pyplot as plt
	%matplotlib inline

	#adjust seaborn plot size
	plt.figure(figsize=(20,12))

	#pairplot (my favorite) - Draw scatterplots for joint relationships and histograms for univariate distributions
	#hue optional like on all other seaborn plots
	sns.pairplot(MyDataFrame,hue='some_categorical_column')

	"""
	barplot for aggregation of categorical variables. - the estimator argument is what determines the aggregation function(done with numpy)
	"""
	sns.barplot(x='catgegorical_column_name',y='column_name_to_aggregate',data=MyDataFrame,estimator=np.sum) #np.sum should sum the y for x

	#Count plot - count a categorical variable
	sns.countplot(x='categorical_column_to_count',data=MyDataFrame)

	"""
	stripplot - Draw a scatterplot where one variable is categorical
	Categorical variables go on the x axis. Continuous variable on the y axis.
	jitter argument staggers the points so you can see them better
	hue argument takes a categorical variable to add another level of detail
	"""
	sns.stripplot(x='sex',y='goals_scored',data=MyDataFrame,jitter=True,hue='occupation')

	"""
	distplot - Flexibly plot a univariate distribution of observations.
	"""
	sns.distplot(df['column_name'],bins=30)

	#jointplot
	sns.jointplot(x='continuous_column1',y='continuous_column2',data=MyDataFrame,kind='scatter')

	#heatmaps
	#this is nifty in conjunction with the Pandas .corr() method for a heatmap of Pearson Correlation Coefficients
	sns.heatmap(MyDataFrame)

	#regplot - regression plot for 2 variables
	sns.regplot(x='column1',y='column2',data=MyDataFrame)

	#Scatterplot - built in to pandas
	#c argument is optional. You can put a column name with discrete values as an argument to add more details
	df.plot.scatter(x='blah',y='blahblah',c='discrete_value')

	#line plot
	df.plot.line(x='blah',y='blahblah')

	#histogram - I like this one, numeric data only
	#bin=10 is a default argument
	df['column'].plot.hist()