Created
November 11, 2022 16:55
-
-
Save audhiaprilliant/24342f8bf6b1ae0f7233a1074a6edf2d to your computer and use it in GitHub Desktop.
Matplotlib 102 - Basic Introduction to Multiplot, Subplot and Gridspec
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ---------- IMPORT PACKAGES ---------- | |
| # Dataframe manipulation | |
| import pandas as pd | |
| # Matrices operation | |
| import numpy as np | |
| # Data viz with matplotlib | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| from matplotlib import style | |
| # Check packages' version | |
| print('pandas ', pd.__version__) | |
| print('numpy ', np.__version__) | |
| print('matplotlib ', matplotlib.__version__) | |
| print('seaborn ', sns.__version__) | |
| # ---------- LOAD DATA SET ---------- | |
| # Load the data set into Python | |
| df = pd.read_csv(filepath_or_buffer = 'data/WA_Fn-UseC_-Telco-Customer-Churn.csv', sep = ';') | |
| # Print top 5 rows | |
| df.head(n = 5) | |
| # Metadata | |
| df.info() | |
| # Change column types | |
| df = df.astype({'SeniorCitizen': object}) | |
| # ---------- DATA PREPARATION ---------- | |
| # Number of missing values in a data frame | |
| df.isnull().sum() | |
| # Summary statistics | |
| df.describe() | |
| # Check the unique values of categorical columns | |
| for col in df.select_dtypes('object').columns: | |
| print(df[str(col)].value_counts(), '\n') | |
| # Data aggregation | |
| df_group_1 = df.groupby('PaymentMethod')[['customerID']].count().reset_index() | |
| # ---------- AGGREGATE DATA FOR SAMPLES ---------- | |
| # 1 Number of customer by payment method | |
| df_group_1 = df.groupby('PaymentMethod')[['customerID']].count().reset_index() | |
| df_group_1.sort_values(by = 'customerID', ascending = False, inplace = True) | |
| df_group_1['CummulativePerc'] = (df_group_1['customerID'].cumsum() / df_group_1['customerID'].sum()) | |
| df_group_1['CummulativeSum'] = df_group_1['customerID'].cumsum() | |
| # 2 Number of customer by gender | |
| df_group_2 = df.groupby('gender')[['customerID']].count().reset_index() | |
| # 3 Number of customer by senior citizen status | |
| df_group_3 = df.groupby('Contract')[['customerID']].count().reset_index() | |
| # 4 Number of customer by paperless billing status | |
| df_group_4 = df.groupby('PaperlessBilling')[['customerID']].count().reset_index() | |
| # 5 Number of customer by churn status | |
| df_group_5 = df.groupby('Churn')[['customerID']].count().reset_index() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment