Created
November 7, 2021 14:11
-
-
Save mesejo/8969e069f7077b7a14d8eeba8d0e5ff3 to your computer and use it in GitHub Desktop.
defaultdict performance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from itertools import groupby, cycle | |
from operator import itemgetter as ig | |
import random | |
import perfplot | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
sns.set_style("dark") | |
def setup(unique): | |
def fun(length): | |
choices = cycle(range(unique)) | |
result = [ | |
(i, random.randint(0, 10), random.randint(0, 10_000)) | |
for _, i in zip(range(length), choices) | |
] | |
random.shuffle(result) | |
return result | |
return fun | |
def defaultdict_max_approach(lst): | |
d = defaultdict(lambda: (0, 0, float("-inf"))) | |
for e in lst: | |
first, _, last = e | |
if d[first][2] < last: | |
d[first] = e | |
return [*d.values()] | |
def dict_max_approach(lst): | |
# https://stackoverflow.com/a/69025193/4001592 | |
d = {} | |
for tpl in lst: | |
first, *_, last = tpl | |
if first not in d or last > d[first][-1]: | |
d[first] = tpl | |
return [*d.values()] | |
def groupby_max_approach(lst): | |
# https://stackoverflow.com/a/69025193/4001592 | |
return [max(g, key=ig(-1)) for _, g in groupby(sorted(lst), key=ig(0))] | |
def equality(a, b): | |
da = {k: v for k, _, v in a} | |
db = {k: v for k, _, v in b} | |
return da == db | |
if __name__ == "__main__": | |
res = [] | |
for unique in [500, 1000, 5000, 10_000]: | |
b = perfplot.bench( | |
setup=setup(unique), | |
kernels=[ | |
defaultdict_max_approach, | |
dict_max_approach, | |
groupby_max_approach, | |
], | |
labels=["defaultdict", "dict", "groupby"], | |
n_range=[k for k in range(100_000, 1000_001, 100_000)], | |
xlabel="len(lst)", | |
equality_check=equality, | |
) | |
df = pd.DataFrame( | |
data=b.timings_s.T, | |
index=pd.MultiIndex.from_product( | |
[[str(unique)], b.n_range], names=["unique", "range"] | |
), | |
columns=[*b.labels], | |
) | |
res.append(df) | |
data = pd.concat(res).reset_index() | |
data = data.melt( | |
id_vars=["range", "unique"], value_vars=["defaultdict", "dict", "groupby"] | |
) | |
mapping = { | |
"unique": "Unique Keys [count]", | |
"variable": "Algorithm", | |
"range": "len(lst) [Ks]", | |
"value": "Runtime [s]", | |
} | |
data = data.rename(columns=mapping) | |
data["len(lst) [Ks]"] = data["len(lst) [Ks]"] // 1000 | |
g = sns.FacetGrid(data, sharey=False, col="Unique Keys [count]", col_wrap=2, hue="Algorithm", legend_out=False) | |
g.map(sns.lineplot, "len(lst) [Ks]", "Runtime [s]") | |
g.tight_layout() | |
g.add_legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment