Created
January 15, 2025 05:51
-
-
Save guidocaru/b553470004dd3e70bdfa1ae6bb02fc6e to your computer and use it in GitHub Desktop.
Methods to grow a DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from methods import list_of_dicts, concat, loc_without_alloc, loc_with_alloc | |
import time | |
num_rows = 1000 | |
# List of dicts | |
start_time = time.perf_counter() | |
list_of_dicts(num_rows) | |
end_time = time.perf_counter() | |
print("- List of dicts: {end_time - start_time:.3f} seconds") | |
# pd.concat() | |
start_time = time.perf_counter() | |
concat(num_rows) | |
end_time = time.perf_counter() | |
print("- Concat: {end_time - start_time:.3f} seconds") | |
# .loc without alloc | |
start_time = time.perf_counter() | |
loc_without_alloc(num_rows) | |
end_time = time.perf_counter() | |
print("- .loc without alloc: {end_time - start_time:.3f} seconds") | |
# .loc with alloc | |
start_time = time.perf_counter() | |
loc_with_alloc(num_rows) | |
end_time = time.perf_counter() | |
print("- .loc with alloc: {end_time - start_time:.3f} seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from methods import list_of_dicts, concat, loc_without_alloc, loc_with_alloc | |
import perfplot | |
kernels = [list_of_dicts, concat, loc_without_alloc, loc_with_alloc] | |
out = perfplot.bench( | |
setup=lambda n: n, | |
kernels=kernels, | |
labels=[k.__name__ for k in kernels], | |
n_range=[1000, 2000, 4000, 8000, 16000], | |
xlabel="Number of rows", | |
title="Methods to grow a DataFrame", | |
equality_check=None, | |
) | |
out.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def function_that_generates_data(n): | |
for _ in range(n): | |
yield 22, 12.3, "abc" | |
def list_of_dicts(n): | |
data = [] | |
for row in function_that_generates_data(n): | |
data.append(row) | |
return pd.DataFrame(data, columns=["column1", "column2", "column3"]) | |
def concat(n): | |
df = pd.DataFrame() | |
for row in function_that_generates_data(n): | |
df = pd.concat( | |
[df, pd.DataFrame([row], columns=["column1", "column2", "column3"])], | |
ignore_index=True, | |
) | |
return df | |
def loc_without_alloc(n): | |
df = pd.DataFrame(columns=["column1", "column2", "column3"]) | |
for row in function_that_generates_data(n): | |
df.loc[df.index.max() + 1] = row | |
return df | |
def loc_with_alloc(n): | |
df = pd.DataFrame(columns=["column1", "column2", "column3"], index=range(n)) | |
for i, row in enumerate(function_that_generates_data(n)): | |
df.loc[i] = row | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment