Created
September 8, 2018 10:56
-
-
Save Diyago/c2a67ff01897685b08e75c5be5b6debb to your computer and use it in GitHub Desktop.
Reducing memory usage by pandas dataframe
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def reduce_mem_usage(data, verbose = True): | |
start_mem = data.memory_usage().sum() / 1024**2 | |
if verbose: | |
print('Memory usage of dataframe: {:.2f} MB'.format(start_mem)) | |
for col in data.columns: | |
col_type = data[col].dtype | |
if col_type != object: | |
c_min = data[col].min() | |
c_max = data[col].max() | |
if str(col_type)[:3] == 'int': | |
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: | |
data[col] = data[col].astype(np.int8) | |
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: | |
data[col] = data[col].astype(np.int16) | |
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: | |
data[col] = data[col].astype(np.int32) | |
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: | |
data[col] = data[col].astype(np.int64) | |
else: | |
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max: | |
data[col] = data[col].astype(np.float16) | |
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max: | |
data[col] = data[col].astype(np.float32) | |
else: | |
data[col] = data[col].astype(np.float64) | |
end_mem = data.memory_usage().sum() / 1024**2 | |
if verbose: | |
print('Memory usage after optimization: {:.2f} MB'.format(end_mem)) | |
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem)) | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment