Last active
August 6, 2021 16:52
-
-
Save Ngoguey42/872ded9908593e1dc94ab0f28e8abbe8 to your computer and use it in GitHub Desktop.
irmin: bench/tree.exe bootstrap trace plot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(env | |
(dev (flags (-w +1..3+5..28+30..39+43+46..47+49..57+61..62-40-26 -strict-sequence -strict-formats -short-paths -keep-locs -g))) | |
(release (flags (-w +1..3+5..28+30..39+43+46..47+49..57+61..62-40-26 -strict-sequence -strict-formats -short-paths -keep-locs ))) | |
) | |
(executable | |
(name json_to_repr) | |
(libraries repr ppx_repr fmt ppx_deriving_yojson mtime mtime.clock.os unix re) | |
(preprocess | |
(pps ppx_repr ppx_deriving_yojson))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(lang dune 2.8) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Plot distribution of timings out of bench/tree.exe on boostrap trace | |
Formatted using YAPF | |
""" | |
import argparse | |
import json | |
import os | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib | |
import matplotlib.pyplot as plt | |
sns.set_theme(style="ticks", color_codes=True) | |
plt.rcParams["legend.labelspacing"] = 1.7 | |
p = argparse.ArgumentParser() | |
p.add_argument('--in', | |
help="""path to JSON file. | |
Pseudo schema: | |
{ | |
"revision" : string; | |
"flatten": int; | |
"inode_config": string; | |
"max_durations": {string -> (int, float)}; | |
"moving_average_points": {string -> {"x": float array; "y": float array}}; | |
"histo_points": {string -> (int * float) array}; | |
} | |
""") | |
p.add_argument('--out', help="Suffix of output filename") | |
p.add_argument('-o', | |
help="Overwrite if --out exists. Fail otherwise", | |
action="store_true") | |
args = p.parse_args() | |
print(args) | |
j = json.loads(open(dict(args._get_kwargs())['in']).read()) | |
if args.o == False and os.path.exists(args.out): | |
raise NameError("{} exists!! Pass -o to overrite it".format(args.out)) | |
def time_to_txt(t): | |
h = np.floor(t / 3600) | |
t -= h * 3600 | |
m = np.floor(t / 60) | |
t -= m * 60 | |
return "{:02.0f}:{:02.0f}:{:02.0f}".format(h, m, t) | |
def time_to_txt2(t): | |
if t < 1e-6: | |
return '{:.0f} ns'.format(t / 1e-9) | |
if t < 1e-3: | |
return '{:.0f} \u03bcs'.format(t / 1e-6) | |
if t < 1: | |
return '{:.0f} ms'.format(t / 1e-3) | |
else: | |
return '{:.0f} s'.format(t) | |
def time_to_txt3(t): | |
if t < 1e-6: | |
return '{:.1f} ns'.format(t / 1e-9) | |
if t < 1e-3: | |
return '{:.1f} \u03bcs'.format(t / 1e-6) | |
if t < 1: | |
return '{:.1f} ms'.format(t / 1e-3) | |
else: | |
return '{:.1f} s'.format(t) | |
def count_to_txt(v): | |
if v <= 9999: | |
return '{:.0f}'.format(v) | |
if v <= 9999999: | |
return '{:.0f} K'.format(v / 1e3) | |
if v <= 9999999999: | |
return '{:.0f} M'.format(v / 1e6) | |
return '{:.0f} G'.format(v / 1e9) | |
def logn_to_txt(v): | |
if v == 0: | |
return "1" | |
v = 10**v | |
if v >= 1e9: | |
return '< {:.0f} G'.format(v / 1e9) | |
if v >= 1e6: | |
return '< {:.0f} M'.format(v / 1e6) | |
if v >= 1e3: | |
return '< {:.0f} K'.format(v / 1e3) | |
return '< {:.0f}'.format(v) | |
columns = [] | |
for i, k in enumerate(sorted(j["histo_points"].keys())): | |
df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center']) | |
n = df.n.sum() | |
total = (df.center * df.n).sum() | |
df["op"] = k | |
df["label"] = k + '\n' + count_to_txt(n) + '\n' + time_to_txt(total) | |
print(f"column {i:<2d}: k:{k:<15s} points:{len(df):<3d} " | |
f"samples:{n:<10,d} total:{total:<.9f}") | |
columns.append(df) | |
df = pd.concat(columns) | |
print("Stats per op -----------------") | |
df1 = df.copy() | |
df1['duration'] = df1.center * df1.n | |
df1 = df1.groupby("op").sum().drop(columns=['center']).reset_index() | |
df1['mean_duration'] = df1.duration / df1.n | |
df1['max_duration'] = df1.op.apply(lambda op: j["max_durations"][op][1]) | |
df1['label'] = df.groupby("op").first().label.reset_index(drop=True) | |
print(df1) | |
print("All data points -----------------") | |
df["duration"] = df.center | |
df['logn'] = np.ceil(np.log10(df.n)) | |
print(df) | |
# Seaborn plot ---------------- | |
# https://seaborn.pydata.org/examples/scatter_bubbles.html | |
g = sns.relplot( | |
x="op", | |
y="duration", | |
size="logn", | |
palette="muted", | |
alpha=.5, | |
sizes=(20, 750), | |
height=6, | |
data=df, | |
legend="full", | |
# facet_kws=dict(legend_out=True, ) | |
) | |
g.set(yscale="log") | |
plt.grid(axis='y') | |
v = plt.plot(df1["op"], | |
df1.max_duration, | |
'_', | |
color='black', | |
alpha=0.5, | |
ms=15, | |
label="max") | |
w = plt.plot(df1["op"], | |
df1.mean_duration, | |
'_', | |
color='red', | |
alpha=0.5, | |
ms=15, | |
label="mean") | |
for _, row in df1.iterrows(): | |
plt.annotate( | |
time_to_txt3(row.mean_duration), | |
(row["op"], row.mean_duration), | |
xytext=(10, 0), | |
textcoords='offset pixels', | |
ha='left', | |
va='center', | |
size=8, | |
weight='bold', | |
) | |
plt.annotate( | |
time_to_txt3(row.max_duration), | |
(row["op"], row.max_duration), | |
xytext=(10, 0), | |
textcoords='offset pixels', | |
ha='left', | |
va='center', | |
size=8, | |
weight='bold', | |
) | |
plt.legend(handles=v + w, loc="upper right") | |
plt.xlabel('op name / call count / total time spent in op', weight="bold") | |
plt.ylabel('duration of call (log scale)', weight="bold") | |
# Legend ---------------- | |
g.legend.set_title("Number of calls") | |
for txt in g.legend.texts: | |
print(float(txt.get_text()), logn_to_txt(float(txt.get_text()))) | |
txt.set_text(logn_to_txt(float(txt.get_text()))) | |
# Title ---------------- | |
el_wall_inop = time_to_txt(df1.duration.sum()) | |
el_wall = time_to_txt(float(j["elapsed"])) | |
el_cpu = time_to_txt(float(j["elapsed_cpu"])) | |
flatten = { | |
"0": False, | |
"1": True, | |
0: False, | |
1: True, | |
}[j["flatten"]] | |
plt.title( | |
("bench/tree.exe on the bootstrap trace / commits:{} / flatten:{} / inode-config:{}\n" | |
"wall in ops:{} / wall:{} / cpu:{} / revision:{}") | |
.format( | |
df[df["op"].str.contains("Commit")].n.sum(), | |
flatten, | |
j["inode_config"], | |
el_wall_inop, el_wall, el_cpu, | |
j["revision"], | |
), weight="bold") | |
# Tick label x ---------------- | |
xticks, _ = plt.xticks() | |
plt.xticks(xticks, df1.label) | |
# Tick label y ---------------- | |
a = min(df["duration"].min(), 1e-7) | |
b = max(pd.DataFrame(j["max_durations"]).T.iloc[:, 1].max(), 1e+3) | |
a, b = np.log10([a, b]) | |
yticks = 10**np.arange(np.ceil(a), np.floor(b) + 1) | |
ymin = 10**(np.floor(a * 4) / 4) | |
ymax = 10**(np.ceil(b * 4) / 4) | |
plt.ylim(ymin, ymax) | |
plt.yticks(yticks, [time_to_txt2(v) for v in yticks]) | |
plt.minorticks_off() | |
# Save ---------------- | |
# https://stackoverflow.com/a/56970556 | |
g.fig.set_figwidth(10) | |
g.fig.set_figheight(6) | |
g.tight_layout() | |
g.savefig(args.out) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Plot distribution of timings out of bench/tree.exe on boostrap trace | |
Formatted using YAPF | |
""" | |
import argparse | |
import json | |
import os | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib | |
import matplotlib.pyplot as plt | |
sns.set_theme(style="ticks", color_codes=True) | |
# plt.rcParams["legend.labelspacing"] = 1.7 | |
p = argparse.ArgumentParser() | |
p.add_argument('--in', | |
help="""path to JSON file. | |
Pseudo schema: | |
{ | |
"revision" : string; | |
"flatten": int; | |
"inode_config": string; | |
"max_durations": {string -> (int, float)}; | |
"moving_average_points": {string -> {"x": float array; "y": float array}}; | |
"histo_points": {string -> (int * float) array}; | |
} | |
""") | |
p.add_argument('--out', help="Suffix of output filename") | |
p.add_argument('-o', | |
help="Overwrite if --out exists. Fail otherwise", | |
action="store_true") | |
args = p.parse_args() | |
print(args) | |
j = json.loads(open(dict(args._get_kwargs())['in']).read()) | |
if args.o == False and os.path.exists(args.out): | |
raise NameError("{} exists!! Pass -o to overrite it".format(args.out)) | |
def time_to_txt(t): | |
h = np.floor(t / 3600) | |
t -= h * 3600 | |
m = np.floor(t / 60) | |
t -= m * 60 | |
return "{:02.0f}:{:02.0f}:{:02.0f}".format(h, m, t) | |
def time_to_txt2(t): | |
if t < 1e-6: | |
return '{:.0f} ns'.format(t / 1e-9) | |
if t < 1e-3: | |
return '{:.0f} \u03bcs'.format(t / 1e-6) | |
if t < 1: | |
return '{:.0f} ms'.format(t / 1e-3) | |
else: | |
return '{:.0f} s'.format(t) | |
def time_to_txt3(t): | |
if t < 1e-6: | |
return '{:.1f}ns'.format(t / 1e-9) | |
if t < 1e-3: | |
return '{:.1f}\u03bcs'.format(t / 1e-6) | |
if t < 1: | |
return '{:.1f}ms'.format(t / 1e-3) | |
else: | |
return '{:.1f}s'.format(t) | |
def count_to_txt(v): | |
if v <= 9999: | |
return '{:.0f}'.format(v) | |
if v <= 9999999: | |
return '{:.0f} K'.format(v / 1e3) | |
if v <= 9999999999: | |
return '{:.0f} M'.format(v / 1e6) | |
return '{:.0f} G'.format(v / 1e9) | |
def logn_to_txt(v): | |
if v == 0: | |
return "1" | |
v = 10**v | |
if v >= 1e9: | |
return '< {:.0f} G'.format(v / 1e9) | |
if v >= 1e6: | |
return '< {:.0f} M'.format(v / 1e6) | |
if v >= 1e3: | |
return '< {:.0f} K'.format(v / 1e3) | |
return '< {:.0f}'.format(v) | |
columns = [] | |
for i, k in enumerate(sorted(j["histo_points"].keys())): | |
df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center']) | |
n = df.n.sum() | |
total = (df.center * df.n).sum() | |
df["op"] = k | |
df["label"] = k + '\n' + count_to_txt(n) + '\n' + time_to_txt(total) | |
print(f"column {i:<2d}: k:{k:<15s} points:{len(df):<3d} " | |
f"samples:{n:<10,d} total:{total:<.9f}") | |
columns.append(df) | |
print("Stats per op -----------------") | |
df1 = pd.concat(columns).copy() | |
df1['duration'] = df1.center * df1.n | |
df1 = df1.groupby("op").sum().drop(columns=['center']).reset_index() | |
df1['mean_duration'] = df1.duration / df1.n | |
df1['max_duration'] = df1.op.apply(lambda op: j["max_durations"][op][1]) | |
df1['label'] = df.groupby("op").first().label.reset_index(drop=True) | |
print(df1) | |
colors = dict( | |
Add="brown", | |
Checkout="grey", | |
Commit="red", | |
Copy="pink", | |
Find="orange", | |
Mem="blue", | |
Mem_tree="yellow", | |
Remove="purple", | |
) | |
plt.figure(figsize=(10, 6)) | |
plt.yscale("log") | |
ncommits = ... | |
columns = [] | |
for i, k in enumerate(sorted(j["moving_average_points"].keys())): | |
df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center']) | |
n = df.n.sum() | |
if k == "Commit": ncommits = n | |
df = pd.DataFrame(j["moving_average_points"][k]) | |
if len(df) == 0: continue | |
c = colors.get(k, "black") | |
plt.plot(df["xs"], df["ys"], c=c) | |
maxi, maxd = j["max_durations"][k] | |
maxx = maxi / (n - 1) | |
print(k, maxi, n, maxx, maxd) | |
plt.plot([maxx], [maxd], | |
'X', | |
color=c, | |
alpha=0.5, | |
ms=10, | |
label="{}\n• {}\n• {}".format(k, time_to_txt3(maxd), maxi)) | |
# label="{}, max at idx {}".format(k, maxi)) | |
# plt.annotate( | |
# time_to_txt3(maxd), | |
# (maxx, maxd), | |
# xytext=(10, 0), | |
# textcoords='offset pixels', | |
# ha='left', | |
# va='center', | |
# size=8, | |
# weight='bold', | |
# ) | |
columns.append(df) | |
df = pd.concat(columns) | |
print(df) | |
plt.xlabel('trace replay progress', weight="bold") | |
plt.ylabel('duration of call (log scale) (smoothed curve)', weight="bold") | |
plt.grid(axis='y') | |
# Title ---------------- | |
el_wall_inop = time_to_txt(df1.duration.sum()) | |
el_wall = time_to_txt(float(j["elapsed"])) | |
el_cpu = time_to_txt(float(j["elapsed_cpu"])) | |
flatten = { | |
"0": False, | |
"1": True, | |
0: False, | |
1: True, | |
}[j["flatten"]] | |
plt.title( | |
("bench/tree.exe on the bootstrap trace / commits:{} / flatten:{} / inode-config:{}\n" | |
"wall in ops:{} / wall:{} / cpu:{} / revision:{}") | |
.format( | |
ncommits, | |
flatten, | |
j["inode_config"], | |
el_wall_inop, el_wall, el_cpu, | |
j["revision"], | |
), weight="bold") | |
# Tick label x ---------------- | |
xticks = np.linspace(0, 1, 10) | |
plt.xticks(xticks, [ | |
time_to_txt(x * float(j["elapsed"])) for x in xticks | |
]) | |
plt.xlim(0, 1) | |
# Tick label y ---------------- | |
a = min(df["ys"].min(), 1e-7) | |
b = max(pd.DataFrame(j["max_durations"]).T.iloc[:, 1].max(), 1e+3) | |
print(a, b) | |
a, b = np.log10([a, b]) | |
print(a, b) | |
a = np.floor(a) | |
b = np.ceil(b) | |
print(a, b) | |
print(10**a, 10**b) | |
yticks = 10**np.arange(a, b + 1) | |
ymin = 10**a | |
ymax = 10**b | |
plt.ylim(ymin, ymax) | |
plt.yticks(yticks, [time_to_txt2(v) for v in yticks]) | |
plt.minorticks_off() | |
plt.legend(fontsize="small", bbox_to_anchor=(1.0, 1), loc='upper left', | |
title="Operation\n• len of max\n• idx of max") | |
# Save ---------------- | |
# https://stackoverflow.com/a/56970556 | |
# g.fig.set_figheight(6) | |
plt.tight_layout() | |
plt.savefig(args.out) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import argparse | |
import json | |
import os | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib | |
import matplotlib.pyplot as plt | |
# import plotly.express as px | |
# import plotly.graph_objects as go | |
class BubbleChart: | |
def __init__(self, area, bubble_spacing=0): | |
""" | |
Setup for bubble collapse. | |
Parameters | |
---------- | |
area : array-like | |
Area of the bubbles. | |
bubble_spacing : float, default: 0 | |
Minimal spacing between bubbles after collapsing. | |
Notes | |
----- | |
If "area" is sorted, the results might look weird. | |
""" | |
area = np.asarray(area) | |
r = np.sqrt(area / np.pi) | |
self.bubble_spacing = bubble_spacing | |
self.bubbles = np.ones((len(area), 4)) | |
self.bubbles[:, 2] = r | |
self.bubbles[:, 3] = area | |
self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing | |
self.step_dist = self.maxstep / 2 | |
# calculate initial grid layout for bubbles | |
length = np.ceil(np.sqrt(len(self.bubbles))) | |
grid = np.arange(length) * self.maxstep | |
gx, gy = np.meshgrid(grid, grid) | |
self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)] | |
self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)] | |
self.com = self.center_of_mass() | |
def center_of_mass(self): | |
return np.average( | |
self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3] | |
) | |
def center_distance(self, bubble, bubbles): | |
return np.hypot(bubble[0] - bubbles[:, 0], | |
bubble[1] - bubbles[:, 1]) | |
def outline_distance(self, bubble, bubbles): | |
center_distance = self.center_distance(bubble, bubbles) | |
return center_distance - bubble[2] - \ | |
bubbles[:, 2] - self.bubble_spacing | |
def check_collisions(self, bubble, bubbles): | |
distance = self.outline_distance(bubble, bubbles) | |
return len(distance[distance < 0]) | |
def collides_with(self, bubble, bubbles): | |
distance = self.outline_distance(bubble, bubbles) | |
idx_min = np.argmin(distance) | |
return idx_min if type(idx_min) == np.ndarray else [idx_min] | |
def collapse(self, n_iterations=50): | |
""" | |
Move bubbles to the center of mass. | |
Parameters | |
---------- | |
n_iterations : int, default: 50 | |
Number of moves to perform. | |
""" | |
for _i in range(n_iterations): | |
moves = 0 | |
for i in range(len(self.bubbles)): | |
rest_bub = np.delete(self.bubbles, i, 0) | |
# try to move directly towards the center of mass | |
# direction vector from bubble to the center of mass | |
dir_vec = self.com - self.bubbles[i, :2] | |
# shorten direction vector to have length of 1 | |
dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec)) | |
# calculate new bubble position | |
new_point = self.bubbles[i, :2] + dir_vec * self.step_dist | |
new_bubble = np.append(new_point, self.bubbles[i, 2:4]) | |
# check whether new bubble collides with other bubbles | |
if not self.check_collisions(new_bubble, rest_bub): | |
self.bubbles[i, :] = new_bubble | |
self.com = self.center_of_mass() | |
moves += 1 | |
else: | |
# try to move around a bubble that you collide with | |
# find colliding bubble | |
for colliding in self.collides_with(new_bubble, rest_bub): | |
# calculate direction vector | |
dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2] | |
dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec)) | |
# calculate orthogonal vector | |
orth = np.array([dir_vec[1], -dir_vec[0]]) | |
# test which direction to go | |
new_point1 = (self.bubbles[i, :2] + orth * | |
self.step_dist) | |
new_point2 = (self.bubbles[i, :2] - orth * | |
self.step_dist) | |
dist1 = self.center_distance( | |
self.com, np.array([new_point1])) | |
dist2 = self.center_distance( | |
self.com, np.array([new_point2])) | |
new_point = new_point1 if dist1 < dist2 else new_point2 | |
new_bubble = np.append(new_point, self.bubbles[i, 2:4]) | |
if not self.check_collisions(new_bubble, rest_bub): | |
self.bubbles[i, :] = new_bubble | |
self.com = self.center_of_mass() | |
if moves / len(self.bubbles) < 0.1: | |
self.step_dist = self.step_dist / 2 | |
def print_time_elapsed(s): | |
rhs = str(datetime.timedelta(seconds=s % 3600))[:-7] | |
lhs = s // 3600 | |
if lhs == 0: | |
return rhs | |
else: | |
return f'{lhs:.0f}{rhs[1:]}' | |
p = argparse.ArgumentParser() | |
p.add_argument('--in', help="""path to JSON file""", required=True) | |
p.add_argument('--out', help="output path (e.g. a png or svg path)", required=True) | |
p.add_argument('-o', | |
help="Overwrite if --out exists. Fail otherwise", | |
action="store_true") | |
args = p.parse_args() | |
print(args) | |
if args.o == False and os.path.exists(args.out): | |
raise NameError("{} exists!! Pass -o to overrite it".format(args.out)) | |
j = json.loads(open(dict(args._get_kwargs())['in']).read()) | |
print(j.keys()) | |
print(j['config']) | |
print(j['hostname']) | |
print() | |
print('> SPAN:') | |
print(j['span'].keys()) | |
print(j['span']['mem'].keys()) | |
print(j['span']['mem']['count'].keys()) | |
print() | |
print('> GC:') | |
print(j['gc'].keys()) | |
print(j['gc']['major_collections'].keys()) | |
print(j['gc']['major_collections']['diff_per_block'].keys()) | |
print() | |
print(j['span']['mem']['count']['min_value']) | |
print(j['span']['mem']['count']['max_value']) | |
print(j['span']['mem']['count']['mean']) | |
print(j['span']['mem']['cumu_count']['diff']) | |
block_count = j['block_count'] | |
total_op_count = j['op_count'] | |
total_wall = j['elapsed_wall'] | |
print('total_op_count', total_op_count) | |
print('block_count', block_count) | |
print('total_wall', total_wall, total_wall / 3600) | |
colors = { | |
# https://coolors.co/palettes/trending | |
# # https://coolors.co/d9ed92-b5e48c-99d98c-76c893-52b69a-34a0a4-168aad-1a759f-1e6091-184e77 | |
# 'checkout': '#d9ed92', | |
# 'find': '#b5e48c', | |
# 'mem': '#99d98c', | |
# 'mem_tree': '#76c893', | |
# 'add': '#52b69a', | |
# 'remove': '#34a0a4', | |
# 'copy': '#168aad', | |
# 'commit': '#1a759f', | |
# 'unseen': '#1e6091', | |
# 'buildup': '#184e77', | |
# https://coolors.co/03045e-023e8a-0077b6-0096c7-00b4d8-48cae4-90e0ef-ade8f4-caf0f8 | |
'': '#03045e', | |
'copy': '#023e8a', | |
'mem': '#0077b6', | |
'commit': '#0096c7', | |
'add': '#00b4d8', 'checkout': '#00b4d8', | |
'remove': '#48cae4', | |
'find': '#90e0ef', | |
'mem_tree': '#ade8f4', | |
'unseen': '#caf0f8', | |
} | |
labels = dict( | |
unseen='in-between ops', | |
buildup='others', | |
) | |
# https://plotly.com/python/pie-charts/ | |
# https://matplotlib.org/stable/gallery/misc/packed_bubbles.html#sphx-glr-gallery-misc-packed-bubbles-py | |
rows = [] | |
for opk in sorted(j['span'].keys()): | |
rows.append(dict(label=labels.get(opk, opk), | |
key=opk, | |
average_duration=j['span'][opk]['duration']['mean'], | |
count_per_block=j['span'][opk]['count']['mean'], | |
explode=0.2 if opk == 'buildup' else 0, | |
color=colors.get(opk, 'grey'))) | |
df0 = pd.DataFrame(rows) | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
plt.close('all') | |
fig, ax = plt.subplots(figsize=np.asarray([10, 5]) * 0.6666) | |
ax2 = ax.twinx() | |
ax.set_yscale('log') | |
# ax2.set_yscale('linear') | |
y0 = np.asarray(j['index']['bytes_written']['value_after_commit']['evolution']).astype('float') | |
x = np.linspace(0, block_count - 1, y0.size) | |
ax.plot(x, y0, label='written so far', color=colors['mem']) | |
ax.annotate( | |
f"{y0[-1] / 1e12:.0f}TB", | |
[(block_count - 1) * 1.01 , y0[-1]], verticalalignment="center", horizontalalignment='left', | |
) | |
y1 = np.asarray([d['value_after_commit']['evolution'] for d in j['disk'].values()]).astype(float).sum(axis=0) | |
ax.plot(x, y1, label='irmin-pack', color=colors['find']) | |
ax.annotate( | |
f"{y1[-1] / 1e9:.0f}GB", | |
[(block_count - 1) * 1.01 , y1[-1]], verticalalignment="center", horizontalalignment='left', | |
) | |
y2 = y0 / y1 | |
ax2.plot(x, y2, '--', label='write amplification', color='grey') | |
ax2.annotate( | |
f"x{y2[-1]:.0f}", | |
[(block_count - 1) * 1.01 , y2[-1]], verticalalignment="center", horizontalalignment='left', | |
) | |
ax.set_ylim(100_000_000, 10 ** (np.log10(y0.max()) * 1.025)) | |
ax.set_yticks([ | |
1e9, | |
1e10, | |
1e11, | |
1e12, | |
1e13, | |
# 1e14, | |
]) | |
ax.minorticks_off() | |
ax2.set_ylim(1, np.nanmax(y2) * 1.2) | |
ax2.set_yticks([1, 100, 200, 300]) | |
ax2.set_yticklabels(["x1", "x100", "x200", "x300"]) | |
ax.set_yticklabels([ | |
"1GB", | |
"10GB", | |
"100GB", | |
"1TB", | |
"10TB", | |
# "100TB", | |
]) | |
ax.set_xticks( | |
[ | |
0, | |
# 28_083, | |
204_762, | |
458_753, | |
655_361, | |
851_969, | |
1_212_417, | |
1_343_489, | |
# 1_466_367, | |
]) | |
ax.set_xticklabels( | |
[ | |
"genesis", | |
# "alpha II", | |
"alpha III", | |
"athens A", | |
"babylon", | |
"carthage", | |
"dephi", | |
'edo', | |
# "florence", | |
]) | |
ax.set_xlim(0 - block_count * 0.05, block_count * 1.12) | |
ax.xaxis.grid() | |
# ax.grid(which='x') | |
ax.legend(loc='upper left') | |
ax2.legend(loc='lower right') | |
fig.tight_layout() | |
plt.savefig('write_amplif.png', dpi=150) | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
plt.close('all') | |
fig, ax = plt.subplots(figsize=np.asarray([10, 4]) * 0.6666) | |
y = np.asarray(j['gc']['major_heap_bytes']['value_after_commit']['evolution']).astype('float') / 1e9 | |
x = np.linspace(0, block_count - 1, y.size) | |
ax.plot(x, y, label='smoothed average', color=colors['copy']) | |
ax.annotate( | |
f"{y[-1]:.2f}GB", | |
[(block_count - 1) * 1.01 , y[-1]], verticalalignment="center", horizontalalignment='left', | |
) | |
y = np.asarray(j['gc']['major_heap_top_bytes']).astype('float') / 1e9 | |
mask = np.r_[True, y[1:] != y[:-1]] | |
ax.plot([0, block_count - 1], [y.max(), y.max()], color=colors['remove']) | |
ax.plot([x[mask][-1]], [y[mask][-1]], 'x', label='highest recorded', color=colors['remove']) | |
ax.annotate( | |
f"{y.max():.2f}GB", | |
[(block_count - 1) * 1.01 , y.max()], verticalalignment="center", horizontalalignment='left', | |
) | |
ax.set_yticks([0, 1, 2, 3]) | |
ax.set_yticklabels( | |
[ | |
'0', | |
'1GB', | |
'2GB', | |
'3GB', | |
]) | |
ax.set_ylim(0, y.max() * 1.08) | |
ax.set_xticks( | |
[ | |
0, | |
# 28_083, | |
204_762, | |
458_753, | |
655_361, | |
851_969, | |
1_212_417, | |
1_343_489, | |
# 1_466_367, | |
]) | |
ax.set_xticklabels( | |
[ | |
"genesis", | |
# "alpha II", | |
"alpha III", | |
"athens A", | |
"babylon", | |
"carthage", | |
"dephi", | |
'edo', | |
# "florence", | |
]) | |
ax.set_xlim(0 - (block_count / 40), block_count * 1.12) | |
ax.grid() | |
ax.legend(loc='lower right') | |
fig.tight_layout() | |
plt.savefig('memory.png', dpi=150) | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
plt.close('all') | |
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"), figsize=(5, 5)) | |
df = df0.set_index('key').loc['checkout find mem mem_tree add remove copy commit'.split(' ')].reset_index() | |
index = list(df.index) | |
seed = int(np.random.rand() * 1000) | |
seed = 642 | |
np.random.RandomState(seed).shuffle(index) | |
print("seed!", seed) | |
df = df.loc[index] | |
print(df) | |
bubble_chart = BubbleChart(area=df['count_per_block'], bubble_spacing=4) | |
bubble_chart.collapse(5) | |
for i in range(len(bubble_chart.bubbles)): | |
row = df.iloc[i] | |
circ = plt.Circle( | |
bubble_chart.bubbles[i, :2], bubble_chart.bubbles[i, 2], color=row['color']) | |
ax.add_patch(circ) | |
text = row['label'] + '\n{:.1f}'.format(row['count_per_block']) | |
if row['label'] in 'commit remove checkout copy mem_tree'.split(' '): | |
ax.text(bubble_chart.bubbles[i, 0] + bubble_chart.bubbles[i, 2] / 2 * 1.8 + 1, | |
bubble_chart.bubbles[i, 1], | |
text, | |
horizontalalignment='left', verticalalignment='center', | |
) | |
else: | |
ax.text(*bubble_chart.bubbles[i, :2], text, | |
horizontalalignment='center', verticalalignment='center') | |
ax.axis("off") | |
ax.relim() | |
ax.autoscale_view() | |
# ax.set_title('Average Operation Count per Block') | |
plt.tight_layout() | |
plt.savefig('counts.png', dpi=100) | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
# ************************************************************************************************** | |
plt.close('all') | |
fig, [ax0, ax1] = plt.subplots(ncols=2, subplot_kw=dict(aspect="equal"), figsize=np.asarray([10, 5]) * 0.75) | |
df = df0.set_index('key').loc['buildup commit unseen'.split(' ')].reset_index() | |
print(df) | |
def mypct(v): | |
return f'{v:.1f}%' | |
ax0.set_title('All') | |
wedges, labs, pcts = ax0.pie(df.average_duration, | |
labels=df.label, | |
explode=df['explode'], | |
autopct=mypct, | |
pctdistance=0.76, | |
# shadow=True, | |
# textprops=dict(fontsize=10), | |
rotatelabels=True, | |
labeldistance=1.03, | |
startangle=360 / df['average_duration'].sum() * df.set_index('key').loc['buildup', 'average_duration'] / -2, | |
colors=df['color'], | |
normalize=True, | |
) | |
for t in pcts: | |
t.set_fontsize(9) | |
df = df0.set_index('key').loc['copy find mem checkout mem_tree add remove'.split(' ')].reset_index() | |
print(df) | |
def mypct(v): | |
v *= df0.set_index('key').loc['buildup'].average_duration / df0.set_index('key').loc['block'].average_duration | |
return f'{v:.2f}%' | |
ax1.set_title('Others') | |
wedges, labs, pcts = ax1.pie(df.average_duration, | |
labels=df.label, | |
explode=df['explode'], | |
autopct=mypct, | |
pctdistance=0.76, | |
# shadow=True, | |
rotatelabels=True, | |
labeldistance=1.03, | |
startangle=360 / df['average_duration'].sum() * df.set_index('key').loc['copy', 'average_duration'] / -2 + 180, | |
colors=df['color'], | |
normalize=True, | |
) | |
for t in pcts: | |
t.set_fontsize(9) | |
plt.tight_layout() | |
plt.savefig('durations.png', dpi=150) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment