Skip to content

Instantly share code, notes, and snippets.

@audhiaprilliant
Created November 11, 2022 16:55
Show Gist options
  • Select an option

  • Save audhiaprilliant/24342f8bf6b1ae0f7233a1074a6edf2d to your computer and use it in GitHub Desktop.

Select an option

Save audhiaprilliant/24342f8bf6b1ae0f7233a1074a6edf2d to your computer and use it in GitHub Desktop.
Matplotlib 102 - Basic Introduction to Multiplot, Subplot and Gridspec
# ---------- IMPORT PACKAGES ----------
# Dataframe manipulation
import pandas as pd
# Matrices operation
import numpy as np
# Data viz with matplotlib
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
# Check packages' version
print('pandas ', pd.__version__)
print('numpy ', np.__version__)
print('matplotlib ', matplotlib.__version__)
print('seaborn ', sns.__version__)
# ---------- LOAD DATA SET ----------
# Load the data set into Python
df = pd.read_csv(filepath_or_buffer = 'data/WA_Fn-UseC_-Telco-Customer-Churn.csv', sep = ';')
# Print top 5 rows
df.head(n = 5)
# Metadata
df.info()
# Change column types
df = df.astype({'SeniorCitizen': object})
# ---------- DATA PREPARATION ----------
# Number of missing values in a data frame
df.isnull().sum()
# Summary statistics
df.describe()
# Check the unique values of categorical columns
for col in df.select_dtypes('object').columns:
print(df[str(col)].value_counts(), '\n')
# Data aggregation
df_group_1 = df.groupby('PaymentMethod')[['customerID']].count().reset_index()
# ---------- AGGREGATE DATA FOR SAMPLES ----------
# 1 Number of customer by payment method
df_group_1 = df.groupby('PaymentMethod')[['customerID']].count().reset_index()
df_group_1.sort_values(by = 'customerID', ascending = False, inplace = True)
df_group_1['CummulativePerc'] = (df_group_1['customerID'].cumsum() / df_group_1['customerID'].sum())
df_group_1['CummulativeSum'] = df_group_1['customerID'].cumsum()
# 2 Number of customer by gender
df_group_2 = df.groupby('gender')[['customerID']].count().reset_index()
# 3 Number of customer by senior citizen status
df_group_3 = df.groupby('Contract')[['customerID']].count().reset_index()
# 4 Number of customer by paperless billing status
df_group_4 = df.groupby('PaperlessBilling')[['customerID']].count().reset_index()
# 5 Number of customer by churn status
df_group_5 = df.groupby('Churn')[['customerID']].count().reset_index()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment