Skip to content

Instantly share code, notes, and snippets.

@szmeku
Last active March 28, 2024 14:08
Show Gist options
  • Save szmeku/149173860d84a75a124d1c072623926c to your computer and use it in GitHub Desktop.
Save szmeku/149173860d84a75a124d1c072623926c to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import time
def using_for(df):
start_time = time.time()
for original_col in df.columns:
for divisor_col in df.columns:
new_col_name = f"{original_col}_div_{divisor_col}"
df[new_col_name] = df[original_col] / df[divisor_col]
end_time = time.time()
print("for - time", end_time - start_time)
return df
def using_vectorized(df):
start_time = time.time()
data = df[['A', 'B', 'C', 'D']].values
ratios = 1/(data[:, None, :] / data[:, :, None])
ratios_flat = ratios.reshape(data.shape[0], -1)
new_col_names = [f"{num1}_div_{num2}" for num1 in df.columns[:4] for num2 in df.columns[:4]]
df_fully_vec = pd.DataFrame(np.hstack([data, ratios_flat]), columns=df.columns[:4].tolist() + new_col_names)
end_time = time.time()
print("vectorized - time", end_time - start_time)
return df_fully_vec
df = pd.DataFrame(np.random.rand(5000000, 4), columns=['A', 'B', 'C', 'D'])
for_result = using_for(df.copy())
vectorized_result = using_vectorized(df.copy())
assert for_result.iloc[20000, 5].round(4) == vectorized_result.iloc[20000, 5].round(4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment