Skip to content

Instantly share code, notes, and snippets.

@DeflateAwning
Last active March 25, 2025 20:56
Show Gist options
  • Save DeflateAwning/2751ffa05bc74bad8e19d4a76c6ef8c5 to your computer and use it in GitHub Desktop.
Save DeflateAwning/2751ffa05bc74bad8e19d4a76c6ef8c5 to your computer and use it in GitHub Desktop.
Benchmark Numpy to Polars
import numpy as np
import pandas as pd
import polars as pl
import time
from memory_profiler import memory_usage
# Array shapes to test
shapes = [
(10_000, 10),
(10_000, 200),
(100_000, 10),
(100_000, 200),
(1_000_000, 10),
(1_000_000, 200),
(10_000_000, 10),
]
REPEATS = 5
def time_and_memory(fn) -> tuple[float, float]:
times = []
mem_usages = []
for _ in range(REPEATS):
start_time = time.time()
mem, _ = memory_usage(
fn,
retval=True,
max_usage=True,
interval=0.01,
include_children=True,
multiprocess=True,
timestamps=False,
)
times.append(time.time() - start_time)
mem_usages.append(mem)
return sum(times) / REPEATS, max(mem_usages)
def time_numpy_to_polars(arr):
def fn():
df_pl = pl.from_numpy(arr, schema=[f"col{i}" for i in range(arr.shape[1])])
assert df_pl.height > 1000
assert len(df_pl.columns) in (10, 200)
return df_pl
return time_and_memory(fn)
def time_numpy_to_pandas_to_polars(arr):
def fn():
df = pd.DataFrame(arr, columns=[f"col{i}" for i in range(arr.shape[1])])
df_pl = pl.from_pandas(df, rechunk=True)
assert df_pl.height > 1000
assert len(df_pl.columns) in (10, 200)
del df
return df_pl
return time_and_memory(fn)
def benchmark():
print(
f"{'Shape':>15} | {'Time NumPy→Polars':>20} | {'Mem (MiB)':>10} | {'Time NumPy→Pandas→Polars':>28} | {'Mem (MiB)':>10}"
)
print("-" * 95)
for shape in shapes:
arr = np.random.rand(*shape)
t_np_polars, m_np_polars = time_numpy_to_polars(arr)
t_np_pd_polars, m_np_pd_polars = time_numpy_to_pandas_to_polars(arr)
print(
f"{str(shape):>15} | {t_np_polars:>20.6f} s | {m_np_polars:>10.2f} | {t_np_pd_polars:>28.6f} s | {m_np_pd_polars:>10.2f}"
)
for _ in range(5):
benchmark()
import numpy as np
import pandas as pd
import polars as pl
import timeit
# Array shapes to test
shapes = [
(10_000, 10),
(10_000, 200),
(100_000, 10),
(100_000, 200),
(1_000_000, 10),
(1_000_000, 200),
(10_000_000, 10),
]
REPEATS = 5
def time_numpy_to_polars(arr):
def fn():
df_pl = pl.from_numpy(arr, schema=[f"col{i}" for i in range(arr.shape[1])])
assert df_pl.height > 1000
assert len(df_pl.columns) in (10, 200)
return df_pl
return timeit.timeit(fn, number=REPEATS) / REPEATS
def time_numpy_to_pandas_to_polars(arr):
def fn():
df = pd.DataFrame(arr, columns=[f"col{i}" for i in range(arr.shape[1])])
df_pl = pl.from_pandas(df, rechunk=True)
assert df_pl.height > 1000
assert len(df_pl.columns) in (10, 200)
del df
return df_pl
return timeit.timeit(fn, number=REPEATS) / REPEATS
def benchmark():
print(f"{'Shape':>15} | {'NumPy → Polars':>18} | {'NumPy → Pandas → Polars':>26}")
print("-" * 65)
for shape in shapes:
arr1 = np.random.rand(*shape)
t_np_pd_polars = time_numpy_to_pandas_to_polars(arr1)
del arr1
arr2 = np.random.rand(*shape)
t_np_polars = time_numpy_to_polars(arr2)
del arr2
print(f"{str(shape):>15} | {t_np_polars:>18.6f} s | {t_np_pd_polars:>26.6f} s")
for _ in range(5):
benchmark()

In VS Code debugger

          Shape |    Time NumPy→Polars |  Mem (MiB) |     Time NumPy→Pandas→Polars |  Mem (MiB)
-----------------------------------------------------------------------------------------------
    (10000, 10) |             2.164523 s |     352.66 |                     1.091811 s |     381.28
   (10000, 200) |             2.200952 s |     497.80 |                     0.456082 s |     480.81
   (100000, 10) |             2.150014 s |     656.51 |                     0.800656 s |     553.36
  (100000, 200) |             0.555569 s |    1112.50 |                     0.547034 s |    1415.78
  (1000000, 10) |             0.478347 s |    1278.23 |                     0.502521 s |    1657.62
 (1000000, 200) |             1.710611 s |    6163.38 |                     1.133678 s |   10807.83
 (10000000, 10) |             0.942061 s |    9053.64 |                     0.726955 s |    9233.34
          Shape |     NumPy → Polars |    NumPy → Pandas → Polars
-----------------------------------------------------------------
   (10000, 10) |           0.000548 s |                   0.023213 s
  (10000, 200) |           0.005222 s |                   0.053185 s
  (100000, 10) |           0.003375 s |                   0.024302 s
 (100000, 200) |           0.165554 s |                   0.148855 s
 (1000000, 10) |           0.058200 s |                   0.061068 s
(1000000, 200) |           1.455404 s |                   0.627263 s
(10000000, 10) |           0.560799 s |                   0.338719 s

Without VS Code debugger

          Shape |     NumPy → Polars |    NumPy → Pandas → Polars
-----------------------------------------------------------------
   (10000, 10) |           0.000369 s |                   0.005119 s
  (10000, 200) |           0.004214 s |                   0.027848 s
  (100000, 10) |           0.003762 s |                   0.006106 s
 (100000, 200) |           0.106361 s |                   0.084003 s
 (1000000, 10) |           0.048475 s |                   0.028471 s
            Shape |    Time NumPy→Polars |  Mem (MiB) |     Time NumPy→Pandas→Polars |  Mem (MiB)
-----------------------------------------------------------------------------------------------
    (10000, 10) |             0.342732 s |    2563.91 |                     0.342225 s |    2566.27
   (10000, 200) |             0.299278 s |    2653.55 |                     0.155794 s |    2695.52
   (100000, 10) |             0.368094 s |    2697.49 |                     0.380731 s |    2707.75
  (100000, 200) |             0.183563 s |    3075.52 |                     0.192179 s |    3176.64
  (1000000, 10) |             0.144099 s |    3103.30 |                     0.138533 s |    3140.55
 (1000000, 200) |             1.392193 s |    7565.50 |                     0.788214 s |    7441.28
 (10000000, 10) |             0.585864 s |    6683.27 |                     0.339979 s |    7079.25
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment