Skip to content

Instantly share code, notes, and snippets.

@m4r00p
Created July 24, 2024 11:39
Show Gist options
  • Save m4r00p/e705c22496eb805686fe31775d9d78ba to your computer and use it in GitHub Desktop.
Save m4r00p/e705c22496eb805686fe31775d9d78ba to your computer and use it in GitHub Desktop.
Dedup statistical analysis (Micro optimizations)
import cProfile
import pstats
import io
import statistics
import pandas as pd
# Define the deduplication functions
def deduplicate_set(arr):
return list(set(arr))
def deduplicate_dict(arr):
return list(dict.fromkeys(arr))
def deduplicate_loop(arr):
result = []
for item in arr:
if item not in result:
result.append(item)
return result
def deduplicate_comprehension(arr):
seen = set()
return [item for item in arr if not (item in seen or seen.add(item))]
# Define a sample list with duplicates
sample_list = [1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10] * 100000
def profile_function(func, arr, runs=30):
times = []
profiler = cProfile.Profile()
for _ in range(runs):
profiler.enable()
func(arr)
profiler.disable()
total_time = sum(stat.totaltime for stat in profiler.getstats())
times.append(total_time)
profiler.clear()
mean_time = statistics.mean(times)
median_time = statistics.median(times)
stdev_time = statistics.stdev(times)
return mean_time, median_time, stdev_time
# Profile each deduplication function
runs = 3000
results = []
functions = [
("deduplicate_set", deduplicate_set),
("deduplicate_dict", deduplicate_dict),
("deduplicate_loop", deduplicate_loop),
("deduplicate_comprehension", deduplicate_comprehension),
]
for func_name, func in functions:
mean_time, median_time, stdev_time = profile_function(func, sample_list, runs)
results.append(
{
"Function": func_name,
"Mean Time (s)": mean_time,
"Median Time (s)": median_time,
"Standard Deviation (s)": stdev_time,
}
)
# Convert results to a DataFrame
df = pd.DataFrame(results)
# Display the results
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment