This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import unicodedata | |
import re | |
from typing import Hashable, List, Collection, Union | |
_underscorer1 = re.compile(r"(.)([A-Z][a-z]+)") | |
_underscorer2 = re.compile("([a-z0-9])([A-Z])") | |
def _camel2snake(col_name: str) -> str: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def reduce_mem_usage(df, verbose=True): | |
"""Converts numeric columns to properly sized bytes to reduce overall dataframe size in memory""" | |
numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] | |
start_mem = df.memory_usage().sum() / 1024 ** 2 | |
for col in df.columns: | |
col_type = df[col].dtypes | |
if col_type in numerics: | |
c_min = df[col].min() | |
c_max = df[col].max() | |
if str(col_type)[:3] == "int": |