Last active
June 24, 2020 18:58
-
-
Save simicd/d700b557be69127fb57d7d81dd405c75 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Measure initial memory consumption | |
memory_init = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
# Read csv | |
col_csv = pd.read_csv("penguin-dataset.csv")["Flipper Length (mm)"] | |
memory_post_csv = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
# Read parquet | |
col_parquet = pd.read_parquet("penguin-dataset.parquet", columns=["Flipper Length (mm)"]) | |
memory_post_parquet = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
# Read Arrow using file API | |
with pa.OSFile('penguin-dataset.arrow', 'rb') as source: | |
col_arrow_file = pa.ipc.open_file(source).read_all().column("Flipper Length (mm)").to_pandas() | |
memory_post_arrowos = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
# Read Arrow with memory-mapped API with missing values | |
source = pa.memory_map('penguin-dataset.arrow', 'r') | |
table_mmap = pa.ipc.RecordBatchFileReader(source).read_all().column("Flipper Length (mm)") | |
col_arrow_mapped = table_mmap.to_pandas() | |
memory_post_arrowmmap = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
# Read Arrow with memory-mapped API without missing values (zero-copy) | |
source = pa.memory_map('penguin-dataset-nonan.arrow', 'r') | |
table_mmap_zc = pa.ipc.RecordBatchFileReader(source).read_all().column("Flipper Length (mm)") | |
col_arrow_mapped_zc = table_mmap_zc.to_pandas() | |
memory_post_arrowmmap_zc = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
# Display memory consumption | |
print(f"csv memory consumption: {memory_post_csv - memory_init}\n" | |
f"Parquet memory consumption: {memory_post_parquet - memory_post_csv}\n" | |
f"Arrow file memory consumption: {memory_post_arrowos - memory_post_parquet}\n" | |
f"Arrow mapped (no zero-copy) memory consumption: {memory_post_arrowmmap - memory_post_arrowos}\n" | |
f"Arrow mapped (zero-copy) memory consumption: {memory_post_arrowmmap_zc - memory_post_arrowmmap}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment