Skip to content

Instantly share code, notes, and snippets.

@korkridake
Created November 26, 2018 16:28
Show Gist options
  • Save korkridake/0485fcadb715ade7d639cb5737a916e8 to your computer and use it in GitHub Desktop.
Save korkridake/0485fcadb715ade7d639cb5737a916e8 to your computer and use it in GitHub Desktop.
#-------------------------------------------------------------------------------------------------------------------
# Load standard libraries
# ------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
import tqdm
import warnings
warnings.filterwarnings('ignore')
from datetime import timedelta
import monthdelta
from glob import glob
from functools import reduce
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%config Completer.use_jedi = False
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
%matplotlib inline
from IPython.display import display, HTML
pd.options.display.float_format = '{:20,.2f}'.format
# ------------------------------------------------------------------------------------------------------------------
# Load data visualizations
# ------------------------------------------------------------------------------------------------------------------
# matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
# seaborn
import seaborn as sns;
sns.set(style="whitegrid", color_codes=True)
# ------------------------------------------------------------------------------------------------------------------
# Load sample data
# ------------------------------------------------------------------------------------------------------------------
df=pd.read_csv('https://github.com/prasertcbs/tutorial/raw/master/mpg.csv')
df.head()
# manufacturer model displ year cyl trans drv cty hwy fl class
# 0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
# 1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
# 2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
# 3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
# 4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
# Plot the histogram for the numerical columns (pandas chooses it automatically)
df.hist()
# Plot the histogram for the specific numerical column
df.cty.hist()
# Plot the histogram for the specific numerical column and conceal the logging
df.cty.hist();
# Plot the histogram for the specific numerical column with 20 bins
df.cty.hist(bins=20);
# Plot the histogram for two columns where we can specify color and share the "axes" arguments
df[['cty', 'hwy']].hist(grid=True, color='orange', sharex=True, sharey=True)
plt.tight_layout()
plt.show();
# Plot the histogram for two columns altogether
df[['cty', 'hwy']].plot.hist()
df[['cty', 'hwy']].plot.hist(alpha=.2)
# Plot the density (either density() or kde() works)
df.cty.plot.density()
df.cty.plot.kde()
# Plot the bar graph (count) for each class
df['class'].value_counts()
df['class'].value_counts().plot.bar()
df['class'].value_counts().plot.barh(color='orange')
df['class'].value_counts().plot.barh(color='.7')
df['class'].value_counts().sort_values().plot.barh(color='.7')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment