Skip to content

Instantly share code, notes, and snippets.

@korkridake
Created November 26, 2018 16:42
Show Gist options
  • Save korkridake/f4f45ddaaa650975cd4df9e77f13cd1a to your computer and use it in GitHub Desktop.
Save korkridake/f4f45ddaaa650975cd4df9e77f13cd1a to your computer and use it in GitHub Desktop.
#-------------------------------------------------------------------------------------------------------------------
# Load standard libraries
# ------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
import tqdm
import warnings
warnings.filterwarnings('ignore')
from datetime import timedelta
import monthdelta
from glob import glob
from functools import reduce
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%config Completer.use_jedi = False
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
%matplotlib inline
from IPython.display import display, HTML
pd.options.display.float_format = '{:20,.2f}'.format
# ------------------------------------------------------------------------------------------------------------------
# Load data visualizations
# ------------------------------------------------------------------------------------------------------------------
# matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # Set as High-Definition (HD) Image
# seaborn
import seaborn as sns;
sns.set(style="whitegrid", color_codes=True)
# ------------------------------------------------------------------------------------------------------------------
# Load sample data
# ------------------------------------------------------------------------------------------------------------------
df=pd.read_csv('https://github.com/prasertcbs/tutorial/raw/master/mpg.csv')
df.sample(5)
df.head()
# manufacturer model displ year cyl trans drv cty hwy fl class
# 0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
# 1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
# 2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
# 3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
# 4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
# Plot the basic scatterplot
df.plot.scatter(x='displ', y='cty')
# Plot the basic scatterplot with
%config InlineBackend.figure_format = 'retina' # Set as High-Definition (HD) Image
df.plot.scatter(x='displ', y='cty')
# Plot the basic scatterplot with grid and title
df.plot.scatter(x='displ', y='cty', grid=True, title='City miles per gallon vs. engine size (litres)');
# More advanced one...
# Plot the scatterplot based on the categorical variable (in this case, it's df['drv'])
df.plot.scatter(x='displ', y='cty',
grid=True,
c=df.drv.map({'4':'green', 'f':'blue', 'r':'red'}),
title='City miles per gallon vs. engine size (litres)');
df.plot.scatter(x='displ', y='cty',
grid=True,
c=df.drv.map({'4':'green', 'f':'blue', 'r':'red'}),
s=df.displ*10,
title='City miles per gallon vs. engine size (litres)');
df.plot.scatter(x='displ', y='cty',
grid=True,
c=df.drv.map({'4':'green', 'f':'blue', 'r':'red'}),
s=df.cyl*10,
title='City miles per gallon vs. engine size (litres)');
df.plot.scatter(x='displ', y='cty',
grid=True,
c=df.drv.map({'4':'green', 'f':'blue', 'r':'red'}),
s=df.cyl*10,
alpha=.5,
title='City miles per gallon vs. engine size (litres)');
df.plot.scatter(x='displ', y='cty',
grid=True,
c=df.drv.map({'4':'green', 'f':'blue', 'r':'red'}),
s=30,
alpha=.5,
title='City miles per gallon vs. engine size (litres)');
df.plot.scatter(x='displ', y='cty',
grid=True,
c=df.drv.map({'4':'green', 'f':'blue', 'r':'red'}),
s=df.cyl*10,
alpha=.5,
ylim=(0, 40),
xlim=(0, 10),
title='City miles per gallon vs. engine size (litres)');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment