Created
November 26, 2018 16:28
-
-
Save korkridake/0485fcadb715ade7d639cb5737a916e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------------------------------------------- | |
# Load standard libraries | |
# ------------------------------------------------------------------------------------------------------------------ | |
import os | |
import pandas as pd | |
import numpy as np | |
import tqdm | |
import warnings | |
warnings.filterwarnings('ignore') | |
from datetime import timedelta | |
import monthdelta | |
from glob import glob | |
from functools import reduce | |
from IPython.core.interactiveshell import InteractiveShell | |
InteractiveShell.ast_node_interactivity = "all" | |
%config Completer.use_jedi = False | |
pd.options.display.max_rows = 999 | |
pd.options.display.max_columns = 999 | |
from IPython.core.display import display, HTML | |
display(HTML("<style>.container { width:90% !important; }</style>")) | |
%matplotlib inline | |
from IPython.display import display, HTML | |
pd.options.display.float_format = '{:20,.2f}'.format | |
# ------------------------------------------------------------------------------------------------------------------ | |
# Load data visualizations | |
# ------------------------------------------------------------------------------------------------------------------ | |
# matplotlib | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
# seaborn | |
import seaborn as sns; | |
sns.set(style="whitegrid", color_codes=True) | |
# ------------------------------------------------------------------------------------------------------------------ | |
# Load sample data | |
# ------------------------------------------------------------------------------------------------------------------ | |
df=pd.read_csv('https://github.com/prasertcbs/tutorial/raw/master/mpg.csv') | |
df.head() | |
# manufacturer model displ year cyl trans drv cty hwy fl class | |
# 0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact | |
# 1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact | |
# 2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact | |
# 3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact | |
# 4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact | |
# Plot the histogram for the numerical columns (pandas chooses it automatically) | |
df.hist() | |
# Plot the histogram for the specific numerical column | |
df.cty.hist() | |
# Plot the histogram for the specific numerical column and conceal the logging | |
df.cty.hist(); | |
# Plot the histogram for the specific numerical column with 20 bins | |
df.cty.hist(bins=20); | |
# Plot the histogram for two columns where we can specify color and share the "axes" arguments | |
df[['cty', 'hwy']].hist(grid=True, color='orange', sharex=True, sharey=True) | |
plt.tight_layout() | |
plt.show(); | |
# Plot the histogram for two columns altogether | |
df[['cty', 'hwy']].plot.hist() | |
df[['cty', 'hwy']].plot.hist(alpha=.2) | |
# Plot the density (either density() or kde() works) | |
df.cty.plot.density() | |
df.cty.plot.kde() | |
# Plot the bar graph (count) for each class | |
df['class'].value_counts() | |
df['class'].value_counts().plot.bar() | |
df['class'].value_counts().plot.barh(color='orange') | |
df['class'].value_counts().plot.barh(color='.7') | |
df['class'].value_counts().sort_values().plot.barh(color='.7') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment