Last active
March 28, 2024 14:08
-
-
Save szmeku/149173860d84a75a124d1c072623926c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import time | |
def using_for(df): | |
start_time = time.time() | |
for original_col in df.columns: | |
for divisor_col in df.columns: | |
new_col_name = f"{original_col}_div_{divisor_col}" | |
df[new_col_name] = df[original_col] / df[divisor_col] | |
end_time = time.time() | |
print("for - time", end_time - start_time) | |
return df | |
def using_vectorized(df): | |
start_time = time.time() | |
data = df[['A', 'B', 'C', 'D']].values | |
ratios = 1/(data[:, None, :] / data[:, :, None]) | |
ratios_flat = ratios.reshape(data.shape[0], -1) | |
new_col_names = [f"{num1}_div_{num2}" for num1 in df.columns[:4] for num2 in df.columns[:4]] | |
df_fully_vec = pd.DataFrame(np.hstack([data, ratios_flat]), columns=df.columns[:4].tolist() + new_col_names) | |
end_time = time.time() | |
print("vectorized - time", end_time - start_time) | |
return df_fully_vec | |
df = pd.DataFrame(np.random.rand(5000000, 4), columns=['A', 'B', 'C', 'D']) | |
for_result = using_for(df.copy()) | |
vectorized_result = using_vectorized(df.copy()) | |
assert for_result.iloc[20000, 5].round(4) == vectorized_result.iloc[20000, 5].round(4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment