thomasahle · April 13, 2023 19:07
diff --git a/convex.py b/convex.py
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import sklearn.metrics


 def upper(xs, ys, convex=True):
    i = np.argsort(xs)
    ys = ys[i]
    xs = xs[i]
    i0 = np.argmax(ys)
    xs1 = [xs[i0]]
    ys1 = [ys[i0]]
    for x, y in zip(xs[i0 + 1 :], ys[i0 + 1 :]):
        # See if we can get rid of some previous points
        while (convex and len(ys1) >= 2 and (ys1[-2] - y) * (x - xs1[-1]) >= (ys1[-1] - y) * (x - xs1[-2])) or (
            not convex and y >= ys1[-1]
        ):
            ys1.pop()
            xs1.pop()
        xs1.append(x)
        ys1.append(y)
    return xs1, ys1


 def auc(xs, ys, cutoff=1 / 2):
    ys.append(0)
    xs.append(1)
    x0 = cutoff
    y0 = np.interp(x0, xs, ys)
    i = np.searchsorted(xs, x0)
    return sklearn.metrics.auc([x0] + xs[i:], [y0] + ys[i:])


 df = pd.read_csv("out.csv")
 datasets = np.unique(df["dataset"])
 algs = np.unique(df["algorithm"])

 scores = {alg: [] for alg in algs}
 for ds in datasets:
    for alg in algs:
        x = df[(df["algorithm"] == alg) & (df["dataset"] == ds)]
        xs, ys = x["k-nn"].to_numpy(), x["qps"].to_numpy()
        if xs.size == 0:
            print(f"No data for {alg}, {ds}")
            scores[alg].append(None)
            continue
        xs, ys = upper(xs, ys)
        scores[alg].append(auc(xs, ys))
    # Normalize by best score
    if all(lst[-1] is None for lst in scores.values()):
        continue
    best = max(lst[-1] for lst in scores.values() if lst[-1] is not None)
    for alg in algs:
        if scores[alg][-1] is not None:
            scores[alg][-1] /= best

 values = [[v for v in lst if v is not None] for lst in scores.values()]
 mean_algs_values = sorted((-np.mean(vals), alg, vals) for alg, vals in zip(algs, values))
 _, algs, values = zip(*mean_algs_values)

 # Create a box plot
 fig, ax = plt.subplots()
 print(values)
 ax.boxplot(values)

 # Set the x-axis tick labels to the algorithm names
 ax.set_xticklabels(algs, rotation=90)

 # Add labels and title
 ax.set_xlabel("Algorithms")
 ax.set_ylabel("Scores")
 ax.set_title("Box Plot Summary Chart Comparing Algorithms")

 # Show the plot
 plt.show()
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import sklearn.metrics


	def upper(xs, ys, convex=True):
	i = np.argsort(xs)
	ys = ys[i]
	xs = xs[i]
	i0 = np.argmax(ys)
	xs1 = [xs[i0]]
	ys1 = [ys[i0]]
	for x, y in zip(xs[i0 + 1 :], ys[i0 + 1 :]):
	# See if we can get rid of some previous points
	while (convex and len(ys1) >= 2 and (ys1[-2] - y) * (x - xs1[-1]) >= (ys1[-1] - y) * (x - xs1[-2])) or (
	not convex and y >= ys1[-1]
	):
	ys1.pop()
	xs1.pop()
	xs1.append(x)
	ys1.append(y)
	return xs1, ys1


	def auc(xs, ys, cutoff=1 / 2):
	ys.append(0)
	xs.append(1)
	x0 = cutoff
	y0 = np.interp(x0, xs, ys)
	i = np.searchsorted(xs, x0)
	return sklearn.metrics.auc([x0] + xs[i:], [y0] + ys[i:])


	df = pd.read_csv("out.csv")
	datasets = np.unique(df["dataset"])
	algs = np.unique(df["algorithm"])

	scores = {alg: [] for alg in algs}
	for ds in datasets:
	for alg in algs:
	x = df[(df["algorithm"] == alg) & (df["dataset"] == ds)]
	xs, ys = x["k-nn"].to_numpy(), x["qps"].to_numpy()
	if xs.size == 0:
	print(f"No data for {alg}, {ds}")
	scores[alg].append(None)
	continue
	xs, ys = upper(xs, ys)
	scores[alg].append(auc(xs, ys))
	# Normalize by best score
	if all(lst[-1] is None for lst in scores.values()):
	continue
	best = max(lst[-1] for lst in scores.values() if lst[-1] is not None)
	for alg in algs:
	if scores[alg][-1] is not None:
	scores[alg][-1] /= best

	values = [[v for v in lst if v is not None] for lst in scores.values()]
	mean_algs_values = sorted((-np.mean(vals), alg, vals) for alg, vals in zip(algs, values))
	_, algs, values = zip(*mean_algs_values)

	# Create a box plot
	fig, ax = plt.subplots()
	print(values)
	ax.boxplot(values)

	# Set the x-axis tick labels to the algorithm names
	ax.set_xticklabels(algs, rotation=90)

	# Add labels and title
	ax.set_xlabel("Algorithms")
	ax.set_ylabel("Scores")
	ax.set_title("Box Plot Summary Chart Comparing Algorithms")

	# Show the plot
	plt.show()