Skip to content

Instantly share code, notes, and snippets.

@amrakm
Created October 27, 2018 11:02
Show Gist options
  • Save amrakm/3328fe9d9a859126f28aee0bb4f9ccf2 to your computer and use it in GitHub Desktop.
Save amrakm/3328fe9d9a859126f28aee0bb4f9ccf2 to your computer and use it in GitHub Desktop.
Automatically change dtypes for numerical columns to the minimum it can use
## source: https://www.kaggle.com/jeru666/did-you-think-of-these-features
def change_datatype(df):
int_cols = list(df.select_dtypes(include=['int']).columns)
for col in int_cols:
if ((np.max(df[col]) <= 127) and(np.min(df[col] >= -128))):
df[col] = df[col].astype(np.int8)
elif ((np.max(df[col]) <= 32767) and(np.min(df[col] >= -32768))):
df[col] = df[col].astype(np.int16)
elif ((np.max(df[col]) <= 2147483647) and(np.min(df[col] >= -2147483648))):
df[col] = df[col].astype(np.int32)
else:
df[col] = df[col].astype(np.int64)
change_datatype(df_transactions)
def change_datatype_float(df):
float_cols = list(df.select_dtypes(include=['float']).columns)
for col in float_cols:
df[col] = df[col].astype(np.float32)
change_datatype_float(df_transactions)
mem = df_transactions.memory_usage(index=True).sum()
print(mem/ 1024**2," MB")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment