Created
December 20, 2023 19:11
-
-
Save secemp9/16a65c98d8c15be858ed2276015047f1 to your computer and use it in GitHub Desktop.
Timing some functions from Python, pandas and numpy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import timeit | |
| import platform | |
| # Printing versions of python, numpy, etc | |
| python_version = platform.python_version() | |
| numpy_version = np.__version__ | |
| pandas_version = pd.__version__ | |
| print("Python:", python_version, "numpy:", numpy_version, "pandas:", pandas_version) | |
| # Generating a sample DataFrame | |
| np.random.seed(0) | |
| sample_size = 100000 # 100k rows | |
| df = pd.DataFrame({ | |
| 'col1': np.random.rand(sample_size), | |
| 'col2': np.random.rand(sample_size) | |
| }) | |
| # Define a simple function to apply | |
| def some_function(x, y): | |
| return x * y + x - y | |
| # 1. Basic Python Loop | |
| def method_loop(df): | |
| result = [] | |
| for index, row in df.iterrows(): | |
| result.append(some_function(row['col1'], row['col2'])) | |
| return result | |
| # 2. Pandas apply with axis=1 | |
| def method_apply(df): | |
| return df.apply(lambda row: some_function(row['col1'], row['col2']), axis=1) | |
| # 3. Pandas itertuples | |
| def method_itertuples(df): | |
| return [some_function(row.col1, row.col2) for row in df.itertuples()] | |
| # 4. List Comprehension | |
| def method_list_comprehension(df): | |
| return [some_function(x, y) for x, y in zip(df['col1'], df['col2'])] | |
| # 5. numpy.vectorize | |
| vec_function = np.vectorize(some_function) | |
| def method_numpy_vectorize(df): | |
| return vec_function(df['col1'], df['col2']) | |
| # 6. Direct Vectorized operation with NumPy | |
| def method_direct_vectorize(df): | |
| return df['col1'] * df['col2'] + df['col1'] - df['col2'] | |
| # Timing each method | |
| times = {} | |
| times['loop'] = timeit.timeit(lambda: method_loop(df), number=10) | |
| times['apply'] = timeit.timeit(lambda: method_apply(df), number=10) | |
| times['itertuples'] = timeit.timeit(lambda: method_itertuples(df), number=10) | |
| times['list_comprehension'] = timeit.timeit(lambda: method_list_comprehension(df), number=10) | |
| times['numpy_vectorize'] = timeit.timeit(lambda: method_numpy_vectorize(df), number=10) | |
| times['direct_vectorize'] = timeit.timeit(lambda: method_direct_vectorize(df), number=10) | |
| print(times) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment