Skip to content

Instantly share code, notes, and snippets.

@secemp9
Created December 20, 2023 19:11
Show Gist options
  • Save secemp9/16a65c98d8c15be858ed2276015047f1 to your computer and use it in GitHub Desktop.
Save secemp9/16a65c98d8c15be858ed2276015047f1 to your computer and use it in GitHub Desktop.
Timing some functions from Python, pandas and numpy
import pandas as pd
import numpy as np
import timeit
import platform
# Printing versions of python, numpy, etc
python_version = platform.python_version()
numpy_version = np.__version__
pandas_version = pd.__version__
print("Python:", python_version, "numpy:", numpy_version, "pandas:", pandas_version)
# Generating a sample DataFrame
np.random.seed(0)
sample_size = 100000 # 100k rows
df = pd.DataFrame({
'col1': np.random.rand(sample_size),
'col2': np.random.rand(sample_size)
})
# Define a simple function to apply
def some_function(x, y):
return x * y + x - y
# 1. Basic Python Loop
def method_loop(df):
result = []
for index, row in df.iterrows():
result.append(some_function(row['col1'], row['col2']))
return result
# 2. Pandas apply with axis=1
def method_apply(df):
return df.apply(lambda row: some_function(row['col1'], row['col2']), axis=1)
# 3. Pandas itertuples
def method_itertuples(df):
return [some_function(row.col1, row.col2) for row in df.itertuples()]
# 4. List Comprehension
def method_list_comprehension(df):
return [some_function(x, y) for x, y in zip(df['col1'], df['col2'])]
# 5. numpy.vectorize
vec_function = np.vectorize(some_function)
def method_numpy_vectorize(df):
return vec_function(df['col1'], df['col2'])
# 6. Direct Vectorized operation with NumPy
def method_direct_vectorize(df):
return df['col1'] * df['col2'] + df['col1'] - df['col2']
# Timing each method
times = {}
times['loop'] = timeit.timeit(lambda: method_loop(df), number=10)
times['apply'] = timeit.timeit(lambda: method_apply(df), number=10)
times['itertuples'] = timeit.timeit(lambda: method_itertuples(df), number=10)
times['list_comprehension'] = timeit.timeit(lambda: method_list_comprehension(df), number=10)
times['numpy_vectorize'] = timeit.timeit(lambda: method_numpy_vectorize(df), number=10)
times['direct_vectorize'] = timeit.timeit(lambda: method_direct_vectorize(df), number=10)
print(times)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment