NaN Percent

total = df.isnull().sum().sort_values(ascending=False)
percent = (df.isnull().sum()/df.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])

Heat map

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

k = 10 #number of variables for heatmap
cols = corrmat.nlargest(k, 'SalePrice')['SalePrice'].index
cm = np.corrcoef(df[cols].values.T)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)

Show Relationship

def show_relationship(name, ylim=(0, 8*(10**5))):
    data = pd.concat([df['SalePrice'], df[name]], axis=1)
    data.plot.scatter(x=name, y='SalePrice', ylim=ylim);

Selecting pandas DataFrame Rows Based On Conditions

Testing data

# Import modules
import pandas as pd
import numpy as np

# Create a dataframe
raw_data = {'first_name': ['Jason', 'Molly', np.nan, np.nan, np.nan], 
        'nationality': ['USA', 'USA', 'France', 'UK', 'UK'], 
        'age': [42, 52, 36, 24, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'nationality', 'age'])

Method 1 : Using boolean

# Create variable with TRUE if nationality is USA
american = df['nationality'] == "USA"

# Create variable with TRUE if age is greater than 50
elderly = df['age'] > 50

# Select all cases where nationality is USA and age is greater than 50
df[american & elderly]

Method2 : Using variable attributes

# Select all cases where the first name is not missing and nationality is USA 
df[df['first_name'].notnull() & (df['nationality'] == "USA")]

Seaborn Type

import seaborn as sns
import matplotlib.pyplot as plt
sns.kdeplot(titanic['Age'], shade=True)
sns.despine(left=True, bottom=True)
g = sns.FacetGrid(titanic, col='Pclass', size=6), 'Age', shade=True)
sns.despine(left=True, bottom=True)
g = sns.FacetGrid(titanic, col="Survived", row="Pclass"), "Age", shade=True)
sns.despine(left=True, bottom=True)
g = sns.FacetGrid(titanic, col='Survived', row='Pclass', hue='Sex', size=3, legend_out=True)
g = (, 'Age', shade=True).add_legend())
sns.despine(left=True, bottom=True)
