Ngoguey42 · August 6, 2021 16:52
diff --git a/.ocamlformat b/.ocamlformat
diff --git a/dune b/dune
 (env
  (dev     (flags (-w +1..3+5..28+30..39+43+46..47+49..57+61..62-40-26 -strict-sequence -strict-formats -short-paths -keep-locs -g)))
  (release (flags (-w +1..3+5..28+30..39+43+46..47+49..57+61..62-40-26 -strict-sequence -strict-formats -short-paths -keep-locs   )))
 )

 (executable
 (name json_to_repr)
 (libraries repr ppx_repr fmt ppx_deriving_yojson mtime mtime.clock.os unix re)
 (preprocess
  (pps ppx_repr ppx_deriving_yojson)))
diff --git a/dune-project b/dune-project
 (lang dune 2.8)
diff --git a/histo_tree.py b/histo_tree.py
 """Plot distribution of timings out of bench/tree.exe on boostrap trace

 Formatted using YAPF
 """
 import argparse
 import json
 import os

 import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib
 import matplotlib.pyplot as plt

 sns.set_theme(style="ticks", color_codes=True)
 plt.rcParams["legend.labelspacing"] = 1.7

 p = argparse.ArgumentParser()
 p.add_argument('--in',
               help="""path to JSON file.
 Pseudo schema:
 {
  "revision" : string;
  "flatten": int;
  "inode_config": string;
  "max_durations": {string -> (int, float)};
  "moving_average_points": {string -> {"x": float array; "y": float array}};
  "histo_points": {string -> (int * float) array};
 }
 """)
 p.add_argument('--out', help="Suffix of output filename")
 p.add_argument('-o',
               help="Overwrite if --out exists. Fail otherwise",
               action="store_true")

 args = p.parse_args()
 print(args)


 j = json.loads(open(dict(args._get_kwargs())['in']).read())
 if args.o == False and os.path.exists(args.out):
    raise NameError("{} exists!! Pass -o to overrite it".format(args.out))


 def time_to_txt(t):
    h = np.floor(t / 3600)
    t -= h * 3600
    m = np.floor(t / 60)
    t -= m * 60
    return "{:02.0f}:{:02.0f}:{:02.0f}".format(h, m, t)


 def time_to_txt2(t):
    if t < 1e-6:
        return '{:.0f} ns'.format(t / 1e-9)
    if t < 1e-3:
        return '{:.0f} \u03bcs'.format(t / 1e-6)
    if t < 1:
        return '{:.0f} ms'.format(t / 1e-3)
    else:
        return '{:.0f} s'.format(t)


 def time_to_txt3(t):
    if t < 1e-6:
        return '{:.1f} ns'.format(t / 1e-9)
    if t < 1e-3:
        return '{:.1f} \u03bcs'.format(t / 1e-6)
    if t < 1:
        return '{:.1f} ms'.format(t / 1e-3)
    else:
        return '{:.1f} s'.format(t)


 def count_to_txt(v):
    if v <= 9999:
        return '{:.0f}'.format(v)
    if v <= 9999999:
        return '{:.0f} K'.format(v / 1e3)
    if v <= 9999999999:
        return '{:.0f} M'.format(v / 1e6)
    return '{:.0f} G'.format(v / 1e9)


 def logn_to_txt(v):
    if v == 0:
        return "1"
    v = 10**v
    if v >= 1e9:
        return '< {:.0f} G'.format(v / 1e9)
    if v >= 1e6:
        return '< {:.0f} M'.format(v / 1e6)
    if v >= 1e3:
        return '< {:.0f} K'.format(v / 1e3)
    return '< {:.0f}'.format(v)


 columns = []
 for i, k in enumerate(sorted(j["histo_points"].keys())):
    df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center'])
    n = df.n.sum()
    total = (df.center * df.n).sum()
    df["op"] = k
    df["label"] = k + '\n' + count_to_txt(n) + '\n' + time_to_txt(total)
    print(f"column {i:<2d}: k:{k:<15s} points:{len(df):<3d} "
          f"samples:{n:<10,d} total:{total:<.9f}")
    columns.append(df)
 df = pd.concat(columns)

 print("Stats per op -----------------")
 df1 = df.copy()
 df1['duration'] = df1.center * df1.n
 df1 = df1.groupby("op").sum().drop(columns=['center']).reset_index()
 df1['mean_duration'] = df1.duration / df1.n
 df1['max_duration'] = df1.op.apply(lambda op: j["max_durations"][op][1])
 df1['label'] = df.groupby("op").first().label.reset_index(drop=True)
 print(df1)

 print("All data points -----------------")
 df["duration"] = df.center
 df['logn'] = np.ceil(np.log10(df.n))
 print(df)

 # Seaborn plot ----------------
 # https://seaborn.pydata.org/examples/scatter_bubbles.html
 g = sns.relplot(
    x="op",
    y="duration",
    size="logn",
    palette="muted",
    alpha=.5,
    sizes=(20, 750),
    height=6,
    data=df,
    legend="full",
    # facet_kws=dict(legend_out=True, )
 )
 g.set(yscale="log")
 plt.grid(axis='y')
 v = plt.plot(df1["op"],
             df1.max_duration,
             '_',
             color='black',
             alpha=0.5,
             ms=15,
             label="max")
 w = plt.plot(df1["op"],
             df1.mean_duration,
             '_',
             color='red',
             alpha=0.5,
             ms=15,
             label="mean")

 for _, row in df1.iterrows():
    plt.annotate(
        time_to_txt3(row.mean_duration),
        (row["op"], row.mean_duration),
        xytext=(10, 0),
        textcoords='offset pixels',
        ha='left',
        va='center',
        size=8,
        weight='bold',
    )
    plt.annotate(
        time_to_txt3(row.max_duration),
        (row["op"], row.max_duration),
        xytext=(10, 0),
        textcoords='offset pixels',
        ha='left',
        va='center',
        size=8,
        weight='bold',
    )
 plt.legend(handles=v + w, loc="upper right")
 plt.xlabel('op name / call count / total time spent in op', weight="bold")
 plt.ylabel('duration of call (log scale)', weight="bold")

 # Legend ----------------
 g.legend.set_title("Number of calls")

 for txt in g.legend.texts:
    print(float(txt.get_text()), logn_to_txt(float(txt.get_text())))
    txt.set_text(logn_to_txt(float(txt.get_text())))

 # Title ----------------
 el_wall_inop = time_to_txt(df1.duration.sum())
 el_wall = time_to_txt(float(j["elapsed"]))
 el_cpu = time_to_txt(float(j["elapsed_cpu"]))
 flatten = {
    "0": False,
    "1": True,
    0: False,
    1: True,
 }[j["flatten"]]
 plt.title(
    ("bench/tree.exe on the bootstrap trace / commits:{} / flatten:{} / inode-config:{}\n"
     "wall in ops:{} / wall:{} / cpu:{} / revision:{}")
    .format(
        df[df["op"].str.contains("Commit")].n.sum(),
        flatten,
        j["inode_config"],
        el_wall_inop, el_wall, el_cpu,
        j["revision"],
    ), weight="bold")

 # Tick label x ----------------
 xticks, _ = plt.xticks()
 plt.xticks(xticks, df1.label)

 # Tick label y ----------------
 a = min(df["duration"].min(), 1e-7)
 b = max(pd.DataFrame(j["max_durations"]).T.iloc[:, 1].max(), 1e+3)
 a, b = np.log10([a, b])

 yticks = 10**np.arange(np.ceil(a), np.floor(b) + 1)
 ymin = 10**(np.floor(a * 4) / 4)
 ymax = 10**(np.ceil(b * 4) / 4)

 plt.ylim(ymin, ymax)
 plt.yticks(yticks, [time_to_txt2(v) for v in yticks])
 plt.minorticks_off()

 # Save ----------------
 # https://stackoverflow.com/a/56970556
 g.fig.set_figwidth(10)
 g.fig.set_figheight(6)
 g.tight_layout()
 g.savefig(args.out)
diff --git a/moving_average_tree.py b/moving_average_tree.py
 """Plot distribution of timings out of bench/tree.exe on boostrap trace

 Formatted using YAPF
 """
 import argparse
 import json
 import os

 import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib
 import matplotlib.pyplot as plt

 sns.set_theme(style="ticks", color_codes=True)
 # plt.rcParams["legend.labelspacing"] = 1.7

 p = argparse.ArgumentParser()
 p.add_argument('--in',
               help="""path to JSON file.
 Pseudo schema:
 {
  "revision" : string;
  "flatten": int;
  "inode_config": string;
  "max_durations": {string -> (int, float)};
  "moving_average_points": {string -> {"x": float array; "y": float array}};
  "histo_points": {string -> (int * float) array};
 }
 """)
 p.add_argument('--out', help="Suffix of output filename")
 p.add_argument('-o',
               help="Overwrite if --out exists. Fail otherwise",
               action="store_true")

 args = p.parse_args()
 print(args)


 j = json.loads(open(dict(args._get_kwargs())['in']).read())
 if args.o == False and os.path.exists(args.out):
    raise NameError("{} exists!! Pass -o to overrite it".format(args.out))

 def time_to_txt(t):
    h = np.floor(t / 3600)
    t -= h * 3600
    m = np.floor(t / 60)
    t -= m * 60
    return "{:02.0f}:{:02.0f}:{:02.0f}".format(h, m, t)


 def time_to_txt2(t):
    if t < 1e-6:
        return '{:.0f} ns'.format(t / 1e-9)
    if t < 1e-3:
        return '{:.0f} \u03bcs'.format(t / 1e-6)
    if t < 1:
        return '{:.0f} ms'.format(t / 1e-3)
    else:
        return '{:.0f} s'.format(t)


 def time_to_txt3(t):
    if t < 1e-6:
        return '{:.1f}ns'.format(t / 1e-9)
    if t < 1e-3:
        return '{:.1f}\u03bcs'.format(t / 1e-6)
    if t < 1:
        return '{:.1f}ms'.format(t / 1e-3)
    else:
        return '{:.1f}s'.format(t)


 def count_to_txt(v):
    if v <= 9999:
        return '{:.0f}'.format(v)
    if v <= 9999999:
        return '{:.0f} K'.format(v / 1e3)
    if v <= 9999999999:
        return '{:.0f} M'.format(v / 1e6)
    return '{:.0f} G'.format(v / 1e9)


 def logn_to_txt(v):
    if v == 0:
        return "1"
    v = 10**v
    if v >= 1e9:
        return '< {:.0f} G'.format(v / 1e9)
    if v >= 1e6:
        return '< {:.0f} M'.format(v / 1e6)
    if v >= 1e3:
        return '< {:.0f} K'.format(v / 1e3)
    return '< {:.0f}'.format(v)




 columns = []
 for i, k in enumerate(sorted(j["histo_points"].keys())):
    df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center'])
    n = df.n.sum()
    total = (df.center * df.n).sum()
    df["op"] = k
    df["label"] = k + '\n' + count_to_txt(n) + '\n' + time_to_txt(total)
    print(f"column {i:<2d}: k:{k:<15s} points:{len(df):<3d} "
          f"samples:{n:<10,d} total:{total:<.9f}")
    columns.append(df)

 print("Stats per op -----------------")
 df1 = pd.concat(columns).copy()
 df1['duration'] = df1.center * df1.n
 df1 = df1.groupby("op").sum().drop(columns=['center']).reset_index()
 df1['mean_duration'] = df1.duration / df1.n
 df1['max_duration'] = df1.op.apply(lambda op: j["max_durations"][op][1])
 df1['label'] = df.groupby("op").first().label.reset_index(drop=True)
 print(df1)



 colors = dict(
    Add="brown",
    Checkout="grey",
    Commit="red",
    Copy="pink",
    Find="orange",
    Mem="blue",
    Mem_tree="yellow",
    Remove="purple",
 )
 plt.figure(figsize=(10, 6))
 plt.yscale("log")

 ncommits = ...
 columns = []
 for i, k in enumerate(sorted(j["moving_average_points"].keys())):
    df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center'])
    n = df.n.sum()
    if k == "Commit": ncommits = n
    df = pd.DataFrame(j["moving_average_points"][k])
    if len(df) == 0: continue
    c = colors.get(k, "black")
    plt.plot(df["xs"], df["ys"], c=c)
    maxi, maxd = j["max_durations"][k]
    maxx = maxi / (n - 1)
    print(k, maxi, n, maxx, maxd)
    plt.plot([maxx], [maxd],
             'X',
             color=c,
             alpha=0.5,
             ms=10,
             label="{}\n• {}\n• {}".format(k, time_to_txt3(maxd), maxi))
             # label="{}, max at idx {}".format(k, maxi))
    # plt.annotate(
    #     time_to_txt3(maxd),
    #     (maxx, maxd),
    #     xytext=(10, 0),
    #     textcoords='offset pixels',
    #     ha='left',
    #     va='center',
    #     size=8,
    #     weight='bold',
    # )
    columns.append(df)
 df = pd.concat(columns)


 print(df)


 plt.xlabel('trace replay progress', weight="bold")
 plt.ylabel('duration of call (log scale) (smoothed curve)', weight="bold")
 plt.grid(axis='y')

 # Title ----------------
 el_wall_inop = time_to_txt(df1.duration.sum())
 el_wall = time_to_txt(float(j["elapsed"]))
 el_cpu = time_to_txt(float(j["elapsed_cpu"]))
 flatten = {
    "0": False,
    "1": True,
    0: False,
    1: True,
 }[j["flatten"]]
 plt.title(
    ("bench/tree.exe on the bootstrap trace / commits:{} / flatten:{} / inode-config:{}\n"
     "wall in ops:{} / wall:{} / cpu:{} / revision:{}")
    .format(
        ncommits,
        flatten,
        j["inode_config"],
        el_wall_inop, el_wall, el_cpu,
        j["revision"],
    ), weight="bold")

 # Tick label x ----------------
 xticks = np.linspace(0, 1, 10)
 plt.xticks(xticks, [
    time_to_txt(x * float(j["elapsed"])) for x in xticks
 ])
 plt.xlim(0, 1)

 # Tick label y ----------------
 a = min(df["ys"].min(), 1e-7)
 b = max(pd.DataFrame(j["max_durations"]).T.iloc[:, 1].max(), 1e+3)
 print(a, b)
 a, b = np.log10([a, b])
 print(a, b)

 a = np.floor(a)
 b = np.ceil(b)
 print(a, b)
 print(10**a, 10**b)
 yticks = 10**np.arange(a, b + 1)
 ymin = 10**a
 ymax = 10**b

 plt.ylim(ymin, ymax)
 plt.yticks(yticks, [time_to_txt2(v) for v in yticks])
 plt.minorticks_off()
 plt.legend(fontsize="small", bbox_to_anchor=(1.0, 1), loc='upper left',
           title="Operation\n• len of max\n• idx of max")

 # Save ----------------
 # https://stackoverflow.com/a/56970556
 # g.fig.set_figheight(6)
 plt.tight_layout()
 plt.savefig(args.out)
diff --git a/show_off.py b/show_off.py
 import datetime
 import argparse
 import json
 import os

 import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib
 import matplotlib.pyplot as plt
 # import plotly.express as px
 # import plotly.graph_objects as go

 class BubbleChart:
    def __init__(self, area, bubble_spacing=0):
        """
        Setup for bubble collapse.

        Parameters
        ----------
        area : array-like
            Area of the bubbles.
        bubble_spacing : float, default: 0
            Minimal spacing between bubbles after collapsing.

        Notes
        -----
        If "area" is sorted, the results might look weird.
        """
        area = np.asarray(area)
        r = np.sqrt(area / np.pi)

        self.bubble_spacing = bubble_spacing
        self.bubbles = np.ones((len(area), 4))
        self.bubbles[:, 2] = r
        self.bubbles[:, 3] = area
        self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
        self.step_dist = self.maxstep / 2

        # calculate initial grid layout for bubbles
        length = np.ceil(np.sqrt(len(self.bubbles)))
        grid = np.arange(length) * self.maxstep
        gx, gy = np.meshgrid(grid, grid)
        self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
        self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]

        self.com = self.center_of_mass()

    def center_of_mass(self):
        return np.average(
            self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3]
        )

    def center_distance(self, bubble, bubbles):
        return np.hypot(bubble[0] - bubbles[:, 0],
                        bubble[1] - bubbles[:, 1])

    def outline_distance(self, bubble, bubbles):
        center_distance = self.center_distance(bubble, bubbles)
        return center_distance - bubble[2] - \
            bubbles[:, 2] - self.bubble_spacing

    def check_collisions(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        return len(distance[distance < 0])

    def collides_with(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        idx_min = np.argmin(distance)
        return idx_min if type(idx_min) == np.ndarray else [idx_min]

    def collapse(self, n_iterations=50):
        """
        Move bubbles to the center of mass.

        Parameters
        ----------
        n_iterations : int, default: 50
            Number of moves to perform.
        """
        for _i in range(n_iterations):
            moves = 0
            for i in range(len(self.bubbles)):
                rest_bub = np.delete(self.bubbles, i, 0)
                # try to move directly towards the center of mass
                # direction vector from bubble to the center of mass
                dir_vec = self.com - self.bubbles[i, :2]

                # shorten direction vector to have length of 1
                dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))

                # calculate new bubble position
                new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
                new_bubble = np.append(new_point, self.bubbles[i, 2:4])

                # check whether new bubble collides with other bubbles
                if not self.check_collisions(new_bubble, rest_bub):
                    self.bubbles[i, :] = new_bubble
                    self.com = self.center_of_mass()
                    moves += 1
                else:
                    # try to move around a bubble that you collide with
                    # find colliding bubble
                    for colliding in self.collides_with(new_bubble, rest_bub):
                        # calculate direction vector
                        dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
                        dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
                        # calculate orthogonal vector
                        orth = np.array([dir_vec[1], -dir_vec[0]])
                        # test which direction to go
                        new_point1 = (self.bubbles[i, :2] + orth *
                                      self.step_dist)
                        new_point2 = (self.bubbles[i, :2] - orth *
                                      self.step_dist)
                        dist1 = self.center_distance(
                            self.com, np.array([new_point1]))
                        dist2 = self.center_distance(
                            self.com, np.array([new_point2]))
                        new_point = new_point1 if dist1 < dist2 else new_point2
                        new_bubble = np.append(new_point, self.bubbles[i, 2:4])
                        if not self.check_collisions(new_bubble, rest_bub):
                            self.bubbles[i, :] = new_bubble
                            self.com = self.center_of_mass()

            if moves / len(self.bubbles) < 0.1:
                self.step_dist = self.step_dist / 2

 def print_time_elapsed(s):
    rhs = str(datetime.timedelta(seconds=s % 3600))[:-7]
    lhs = s // 3600
    if lhs == 0:
        return rhs
    else:
        return f'{lhs:.0f}{rhs[1:]}'

 p = argparse.ArgumentParser()
 p.add_argument('--in', help="""path to JSON file""", required=True)
 p.add_argument('--out', help="output path (e.g. a png or svg path)", required=True)
 p.add_argument('-o',
               help="Overwrite if --out exists. Fail otherwise",
               action="store_true")

 args = p.parse_args()
 print(args)

 if args.o == False and os.path.exists(args.out):
    raise NameError("{} exists!! Pass -o to overrite it".format(args.out))
 j = json.loads(open(dict(args._get_kwargs())['in']).read())
 print(j.keys())
 print(j['config'])
 print(j['hostname'])
 print()
 print('> SPAN:')
 print(j['span'].keys())
 print(j['span']['mem'].keys())
 print(j['span']['mem']['count'].keys())
 print()
 print('> GC:')
 print(j['gc'].keys())
 print(j['gc']['major_collections'].keys())
 print(j['gc']['major_collections']['diff_per_block'].keys())
 print()
 print(j['span']['mem']['count']['min_value'])
 print(j['span']['mem']['count']['max_value'])
 print(j['span']['mem']['count']['mean'])
 print(j['span']['mem']['cumu_count']['diff'])
 block_count = j['block_count']
 total_op_count = j['op_count']
 total_wall = j['elapsed_wall']
 print('total_op_count', total_op_count)
 print('block_count', block_count)
 print('total_wall', total_wall, total_wall / 3600)

 colors = {
    # https://coolors.co/palettes/trending

    # # https://coolors.co/d9ed92-b5e48c-99d98c-76c893-52b69a-34a0a4-168aad-1a759f-1e6091-184e77
    # 'checkout': '#d9ed92',
    # 'find': '#b5e48c',
    # 'mem': '#99d98c',
    # 'mem_tree': '#76c893',
    # 'add': '#52b69a',
    # 'remove': '#34a0a4',
    # 'copy': '#168aad',
    # 'commit': '#1a759f',
    # 'unseen': '#1e6091',
    # 'buildup': '#184e77',

    # https://coolors.co/03045e-023e8a-0077b6-0096c7-00b4d8-48cae4-90e0ef-ade8f4-caf0f8
    '': '#03045e',
    'copy': '#023e8a',
    'mem': '#0077b6',
    'commit': '#0096c7',
    'add': '#00b4d8', 'checkout': '#00b4d8',
    'remove': '#48cae4',
    'find': '#90e0ef',
    'mem_tree': '#ade8f4',
    'unseen': '#caf0f8',

 }
 labels = dict(
    unseen='in-between ops',
    buildup='others',
 )

 # https://plotly.com/python/pie-charts/
 # https://matplotlib.org/stable/gallery/misc/packed_bubbles.html#sphx-glr-gallery-misc-packed-bubbles-py

 rows = []
 for opk in sorted(j['span'].keys()):
    rows.append(dict(label=labels.get(opk, opk),
                     key=opk,
                     average_duration=j['span'][opk]['duration']['mean'],
                     count_per_block=j['span'][opk]['count']['mean'],
                     explode=0.2 if opk == 'buildup' else 0,
                     color=colors.get(opk, 'grey')))
 df0 = pd.DataFrame(rows)



 # **************************************************************************************************
 # **************************************************************************************************
 # **************************************************************************************************
 plt.close('all')
 fig, ax = plt.subplots(figsize=np.asarray([10, 5]) * 0.6666)
 ax2 = ax.twinx()

 ax.set_yscale('log')
 # ax2.set_yscale('linear')

 y0 = np.asarray(j['index']['bytes_written']['value_after_commit']['evolution']).astype('float')
 x = np.linspace(0, block_count - 1, y0.size)
 ax.plot(x, y0, label='written so far', color=colors['mem'])
 ax.annotate(
    f"{y0[-1] / 1e12:.0f}TB",
    [(block_count - 1) * 1.01 , y0[-1]], verticalalignment="center", horizontalalignment='left',
 )

 y1 = np.asarray([d['value_after_commit']['evolution'] for d in j['disk'].values()]).astype(float).sum(axis=0)
 ax.plot(x, y1, label='irmin-pack', color=colors['find'])
 ax.annotate(
    f"{y1[-1] / 1e9:.0f}GB",
    [(block_count - 1) * 1.01 , y1[-1]], verticalalignment="center", horizontalalignment='left',
 )

 y2 = y0 / y1
 ax2.plot(x, y2, '--', label='write amplification', color='grey')
 ax2.annotate(
    f"x{y2[-1]:.0f}",
    [(block_count - 1) * 1.01 , y2[-1]], verticalalignment="center", horizontalalignment='left',
 )

 ax.set_ylim(100_000_000, 10 ** (np.log10(y0.max()) * 1.025))
 ax.set_yticks([
    1e9,
    1e10,
    1e11,
    1e12,
    1e13,
    # 1e14,
 ])
 ax.minorticks_off()

 ax2.set_ylim(1, np.nanmax(y2) * 1.2)
 ax2.set_yticks([1, 100, 200, 300])
 ax2.set_yticklabels(["x1", "x100", "x200", "x300"])

 ax.set_yticklabels([
    "1GB",
    "10GB",
    "100GB",
    "1TB",
    "10TB",
    # "100TB",
 ])
 ax.set_xticks(
    [
        0,
        # 28_083,
        204_762,
        458_753,
        655_361,
        851_969,
        1_212_417,
        1_343_489,
        # 1_466_367,
    ])
 ax.set_xticklabels(
    [
        "genesis",
        # "alpha II",
        "alpha III",
        "athens A",
         "babylon",
         "carthage",
         "dephi",
         'edo',
         # "florence",
     ])
 ax.set_xlim(0 - block_count * 0.05, block_count * 1.12)
 ax.xaxis.grid()
 # ax.grid(which='x')
 ax.legend(loc='upper left')
 ax2.legend(loc='lower right')
 fig.tight_layout()
 plt.savefig('write_amplif.png', dpi=150)

 # **************************************************************************************************
 # **************************************************************************************************
 # **************************************************************************************************
 plt.close('all')
 fig, ax = plt.subplots(figsize=np.asarray([10, 4]) * 0.6666)
 y = np.asarray(j['gc']['major_heap_bytes']['value_after_commit']['evolution']).astype('float') / 1e9
 x = np.linspace(0, block_count - 1, y.size)
 ax.plot(x, y, label='smoothed average', color=colors['copy'])
 ax.annotate(
    f"{y[-1]:.2f}GB",
    [(block_count - 1) * 1.01 , y[-1]], verticalalignment="center", horizontalalignment='left',
 )
 y = np.asarray(j['gc']['major_heap_top_bytes']).astype('float') / 1e9
 mask = np.r_[True, y[1:] != y[:-1]]
 ax.plot([0, block_count - 1], [y.max(), y.max()], color=colors['remove'])
 ax.plot([x[mask][-1]], [y[mask][-1]], 'x', label='highest recorded', color=colors['remove'])
 ax.annotate(
    f"{y.max():.2f}GB",
    [(block_count - 1) * 1.01 , y.max()], verticalalignment="center", horizontalalignment='left',
 )
 ax.set_yticks([0, 1, 2, 3])
 ax.set_yticklabels(
           [
               '0',
               '1GB',
               '2GB',
               '3GB',
           ])
 ax.set_ylim(0, y.max() * 1.08)
 ax.set_xticks(
    [
        0,
        # 28_083,
        204_762,
        458_753,
        655_361,
        851_969,
        1_212_417,
        1_343_489,
        # 1_466_367,
    ])
 ax.set_xticklabels(
    [
        "genesis",
        # "alpha II",
        "alpha III",
        "athens A",
         "babylon",
         "carthage",
         "dephi",
         'edo',
         # "florence",
     ])
 ax.set_xlim(0 - (block_count / 40), block_count * 1.12)
 ax.grid()
 ax.legend(loc='lower right')
 fig.tight_layout()
 plt.savefig('memory.png', dpi=150)

 # **************************************************************************************************
 # **************************************************************************************************
 # **************************************************************************************************
 plt.close('all')
 fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"), figsize=(5, 5))
 df = df0.set_index('key').loc['checkout find mem mem_tree add remove copy commit'.split(' ')].reset_index()
 index = list(df.index)
 seed = int(np.random.rand() * 1000)
 seed = 642
 np.random.RandomState(seed).shuffle(index)
 print("seed!", seed)
 df = df.loc[index]
 print(df)
 bubble_chart = BubbleChart(area=df['count_per_block'], bubble_spacing=4)
 bubble_chart.collapse(5)
 for i in range(len(bubble_chart.bubbles)):
    row = df.iloc[i]
    circ = plt.Circle(
        bubble_chart.bubbles[i, :2], bubble_chart.bubbles[i, 2], color=row['color'])
    ax.add_patch(circ)
    text = row['label'] + '\n{:.1f}'.format(row['count_per_block'])
    if row['label'] in 'commit remove checkout copy mem_tree'.split(' '):
        ax.text(bubble_chart.bubbles[i, 0] + bubble_chart.bubbles[i, 2] / 2 * 1.8 + 1,
                bubble_chart.bubbles[i, 1],
                text,
                horizontalalignment='left', verticalalignment='center',
                )
    else:
        ax.text(*bubble_chart.bubbles[i, :2], text,
                horizontalalignment='center', verticalalignment='center')
 ax.axis("off")
 ax.relim()
 ax.autoscale_view()
 # ax.set_title('Average Operation Count per Block')
 plt.tight_layout()
 plt.savefig('counts.png', dpi=100)

 # **************************************************************************************************
 # **************************************************************************************************
 # **************************************************************************************************
 plt.close('all')
 fig, [ax0, ax1] = plt.subplots(ncols=2, subplot_kw=dict(aspect="equal"), figsize=np.asarray([10, 5]) * 0.75)
 df = df0.set_index('key').loc['buildup commit unseen'.split(' ')].reset_index()
 print(df)
 def mypct(v):
    return f'{v:.1f}%'
 ax0.set_title('All')
 wedges, labs, pcts = ax0.pie(df.average_duration,
       labels=df.label,
       explode=df['explode'],
       autopct=mypct,
       pctdistance=0.76,
       # shadow=True,
       # textprops=dict(fontsize=10),
       rotatelabels=True,
       labeldistance=1.03,
       startangle=360 / df['average_duration'].sum() * df.set_index('key').loc['buildup', 'average_duration'] / -2,
       colors=df['color'],
       normalize=True,
       )
 for t in pcts:
    t.set_fontsize(9)
 df = df0.set_index('key').loc['copy find mem checkout mem_tree add remove'.split(' ')].reset_index()
 print(df)
 def mypct(v):
    v *= df0.set_index('key').loc['buildup'].average_duration / df0.set_index('key').loc['block'].average_duration
    return f'{v:.2f}%'
 ax1.set_title('Others')
 wedges, labs, pcts = ax1.pie(df.average_duration,
       labels=df.label,
       explode=df['explode'],
       autopct=mypct,
       pctdistance=0.76,
       # shadow=True,
       rotatelabels=True,
       labeldistance=1.03,
       startangle=360 / df['average_duration'].sum() * df.set_index('key').loc['copy', 'average_duration'] / -2 + 180,
       colors=df['color'],
       normalize=True,
       )
 for t in pcts:
    t.set_fontsize(9)
 plt.tight_layout()
 plt.savefig('durations.png', dpi=150)
diff --git a/tree.ipynb b/tree.ipynb
	(env
	(dev (flags (-w +1..3+5..28+30..39+43+46..47+49..57+61..62-40-26 -strict-sequence -strict-formats -short-paths -keep-locs -g)))
	(release (flags (-w +1..3+5..28+30..39+43+46..47+49..57+61..62-40-26 -strict-sequence -strict-formats -short-paths -keep-locs )))
	)

	(executable
	(name json_to_repr)
	(libraries repr ppx_repr fmt ppx_deriving_yojson mtime mtime.clock.os unix re)
	(preprocess
	(pps ppx_repr ppx_deriving_yojson)))
	"""Plot distribution of timings out of bench/tree.exe on boostrap trace

	Formatted using YAPF
	"""
	import argparse
	import json
	import os

	import pandas as pd
	import numpy as np
	import seaborn as sns
	import matplotlib
	import matplotlib.pyplot as plt

	sns.set_theme(style="ticks", color_codes=True)
	plt.rcParams["legend.labelspacing"] = 1.7

	p = argparse.ArgumentParser()
	p.add_argument('--in',
	help="""path to JSON file.
	Pseudo schema:
	{
	"revision" : string;
	"flatten": int;
	"inode_config": string;
	"max_durations": {string -> (int, float)};
	"moving_average_points": {string -> {"x": float array; "y": float array}};
	"histo_points": {string -> (int * float) array};
	}
	""")
	p.add_argument('--out', help="Suffix of output filename")
	p.add_argument('-o',
	help="Overwrite if --out exists. Fail otherwise",
	action="store_true")

	args = p.parse_args()
	print(args)


	j = json.loads(open(dict(args._get_kwargs())['in']).read())
	if args.o == False and os.path.exists(args.out):
	raise NameError("{} exists!! Pass -o to overrite it".format(args.out))


	def time_to_txt(t):
	h = np.floor(t / 3600)
	t -= h * 3600
	m = np.floor(t / 60)
	t -= m * 60
	return "{:02.0f}:{:02.0f}:{:02.0f}".format(h, m, t)


	def time_to_txt2(t):
	if t < 1e-6:
	return '{:.0f} ns'.format(t / 1e-9)
	if t < 1e-3:
	return '{:.0f} \u03bcs'.format(t / 1e-6)
	if t < 1:
	return '{:.0f} ms'.format(t / 1e-3)
	else:
	return '{:.0f} s'.format(t)


	def time_to_txt3(t):
	if t < 1e-6:
	return '{:.1f} ns'.format(t / 1e-9)
	if t < 1e-3:
	return '{:.1f} \u03bcs'.format(t / 1e-6)
	if t < 1:
	return '{:.1f} ms'.format(t / 1e-3)
	else:
	return '{:.1f} s'.format(t)


	def count_to_txt(v):
	if v <= 9999:
	return '{:.0f}'.format(v)
	if v <= 9999999:
	return '{:.0f} K'.format(v / 1e3)
	if v <= 9999999999:
	return '{:.0f} M'.format(v / 1e6)
	return '{:.0f} G'.format(v / 1e9)


	def logn_to_txt(v):
	if v == 0:
	return "1"
	v = 10**v
	if v >= 1e9:
	return '< {:.0f} G'.format(v / 1e9)
	if v >= 1e6:
	return '< {:.0f} M'.format(v / 1e6)
	if v >= 1e3:
	return '< {:.0f} K'.format(v / 1e3)
	return '< {:.0f}'.format(v)


	columns = []
	for i, k in enumerate(sorted(j["histo_points"].keys())):
	df = pd.DataFrame(j["histo_points"][k], columns=['n', 'center'])
	n = df.n.sum()
	total = (df.center * df.n).sum()
	df["op"] = k
	df["label"] = k + '\n' + count_to_txt(n) + '\n' + time_to_txt(total)
	print(f"column {i:<2d}: k:{k:<15s} points:{len(df):<3d} "
	f"samples:{n:<10,d} total:{total:<.9f}")
	columns.append(df)
	df = pd.concat(columns)

	print("Stats per op -----------------")
	df1 = df.copy()
	df1['duration'] = df1.center * df1.n
	df1 = df1.groupby("op").sum().drop(columns=['center']).reset_index()
	df1['mean_duration'] = df1.duration / df1.n
	df1['max_duration'] = df1.op.apply(lambda op: j["max_durations"][op][1])
	df1['label'] = df.groupby("op").first().label.reset_index(drop=True)
	print(df1)

	print("All data points -----------------")
	df["duration"] = df.center
	df['logn'] = np.ceil(np.log10(df.n))
	print(df)

	# Seaborn plot ----------------
	# https://seaborn.pydata.org/examples/scatter_bubbles.html
	g = sns.relplot(
	x="op",
	y="duration",
	size="logn",
	palette="muted",
	alpha=.5,
	sizes=(20, 750),
	height=6,
	data=df,
	legend="full",
	# facet_kws=dict(legend_out=True, )
	)
	g.set(yscale="log")
	plt.grid(axis='y')
	v = plt.plot(df1["op"],
	df1.max_duration,
	'_',
	color='black',
	alpha=0.5,
	ms=15,
	label="max")
	w = plt.plot(df1["op"],
	df1.mean_duration,
	'_',
	color='red',
	alpha=0.5,
	ms=15,
	label="mean")

	for _, row in df1.iterrows():
	plt.annotate(
	time_to_txt3(row.mean_duration),
	(row["op"], row.mean_duration),
	xytext=(10, 0),
	textcoords='offset pixels',
	ha='left',
	va='center',
	size=8,
	weight='bold',
	)
	plt.annotate(
	time_to_txt3(row.max_duration),
	(row["op"], row.max_duration),
	xytext=(10, 0),
	textcoords='offset pixels',
	ha='left',
	va='center',
	size=8,
	weight='bold',
	)
	plt.legend(handles=v + w, loc="upper right")
	plt.xlabel('op name / call count / total time spent in op', weight="bold")
	plt.ylabel('duration of call (log scale)', weight="bold")

	# Legend ----------------
	g.legend.set_title("Number of calls")

	for txt in g.legend.texts:
	print(float(txt.get_text()), logn_to_txt(float(txt.get_text())))
	txt.set_text(logn_to_txt(float(txt.get_text())))

	# Title ----------------
	el_wall_inop = time_to_txt(df1.duration.sum())
	el_wall = time_to_txt(float(j["elapsed"]))
	el_cpu = time_to_txt(float(j["elapsed_cpu"]))
	flatten = {
	"0": False,
	"1": True,
	0: False,
	1: True,
	}[j["flatten"]]
	plt.title(
	("bench/tree.exe on the bootstrap trace / commits:{} / flatten:{} / inode-config:{}\n"
	"wall in ops:{} / wall:{} / cpu:{} / revision:{}")
	.format(
	df[df["op"].str.contains("Commit")].n.sum(),
	flatten,
	j["inode_config"],
	el_wall_inop, el_wall, el_cpu,
	j["revision"],
	), weight="bold")

	# Tick label x ----------------
	xticks, _ = plt.xticks()
	plt.xticks(xticks, df1.label)

	# Tick label y ----------------
	a = min(df["duration"].min(), 1e-7)
	b = max(pd.DataFrame(j["max_durations"]).T.iloc[:, 1].max(), 1e+3)
	a, b = np.log10([a, b])

	yticks = 10**np.arange(np.ceil(a), np.floor(b) + 1)
	ymin = 10*(np.floor(a 4) / 4)
	ymax = 10*(np.ceil(b 4) / 4)

	plt.ylim(ymin, ymax)
	plt.yticks(yticks, [time_to_txt2(v) for v in yticks])
	plt.minorticks_off()

	# Save ----------------
	# https://stackoverflow.com/a/56970556
	g.fig.set_figwidth(10)
	g.fig.set_figheight(6)
	g.tight_layout()
	g.savefig(args.out)
	import datetime
	import argparse
	import json
	import os

	import pandas as pd
	import numpy as np
	import seaborn as sns
	import matplotlib
	import matplotlib.pyplot as plt
	# import plotly.express as px
	# import plotly.graph_objects as go

	class BubbleChart:
	def __init__(self, area, bubble_spacing=0):
	"""
	Setup for bubble collapse.

	Parameters
	----------
	area : array-like
	Area of the bubbles.
	bubble_spacing : float, default: 0
	Minimal spacing between bubbles after collapsing.

	Notes
	-----
	If "area" is sorted, the results might look weird.
	"""
	area = np.asarray(area)
	r = np.sqrt(area / np.pi)

	self.bubble_spacing = bubble_spacing
	self.bubbles = np.ones((len(area), 4))
	self.bubbles[:, 2] = r
	self.bubbles[:, 3] = area
	self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
	self.step_dist = self.maxstep / 2

	# calculate initial grid layout for bubbles
	length = np.ceil(np.sqrt(len(self.bubbles)))
	grid = np.arange(length) * self.maxstep
	gx, gy = np.meshgrid(grid, grid)
	self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
	self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]

	self.com = self.center_of_mass()

	def center_of_mass(self):
	return np.average(
	self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3]
	)

	def center_distance(self, bubble, bubbles):
	return np.hypot(bubble[0] - bubbles[:, 0],
	bubble[1] - bubbles[:, 1])

	def outline_distance(self, bubble, bubbles):
	center_distance = self.center_distance(bubble, bubbles)
	return center_distance - bubble[2] - \
	bubbles[:, 2] - self.bubble_spacing

	def check_collisions(self, bubble, bubbles):
	distance = self.outline_distance(bubble, bubbles)
	return len(distance[distance < 0])

	def collides_with(self, bubble, bubbles):
	distance = self.outline_distance(bubble, bubbles)
	idx_min = np.argmin(distance)
	return idx_min if type(idx_min) == np.ndarray else [idx_min]

	def collapse(self, n_iterations=50):
	"""
	Move bubbles to the center of mass.

	Parameters
	----------
	n_iterations : int, default: 50
	Number of moves to perform.
	"""
	for _i in range(n_iterations):
	moves = 0
	for i in range(len(self.bubbles)):
	rest_bub = np.delete(self.bubbles, i, 0)
	# try to move directly towards the center of mass
	# direction vector from bubble to the center of mass
	dir_vec = self.com - self.bubbles[i, :2]

	# shorten direction vector to have length of 1
	dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))

	# calculate new bubble position
	new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
	new_bubble = np.append(new_point, self.bubbles[i, 2:4])

	# check whether new bubble collides with other bubbles
	if not self.check_collisions(new_bubble, rest_bub):
	self.bubbles[i, :] = new_bubble
	self.com = self.center_of_mass()
	moves += 1
	else:
	# try to move around a bubble that you collide with
	# find colliding bubble
	for colliding in self.collides_with(new_bubble, rest_bub):
	# calculate direction vector
	dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
	dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
	# calculate orthogonal vector
	orth = np.array([dir_vec[1], -dir_vec[0]])
	# test which direction to go
	new_point1 = (self.bubbles[i, :2] + orth *
	self.step_dist)
	new_point2 = (self.bubbles[i, :2] - orth *
	self.step_dist)
	dist1 = self.center_distance(
	self.com, np.array([new_point1]))
	dist2 = self.center_distance(
	self.com, np.array([new_point2]))
	new_point = new_point1 if dist1 < dist2 else new_point2
	new_bubble = np.append(new_point, self.bubbles[i, 2:4])
	if not self.check_collisions(new_bubble, rest_bub):
	self.bubbles[i, :] = new_bubble
	self.com = self.center_of_mass()

	if moves / len(self.bubbles) < 0.1:
	self.step_dist = self.step_dist / 2

	def print_time_elapsed(s):
	rhs = str(datetime.timedelta(seconds=s % 3600))[:-7]
	lhs = s // 3600
	if lhs == 0:
	return rhs
	else:
	return f'{lhs:.0f}{rhs[1:]}'

	p = argparse.ArgumentParser()
	p.add_argument('--in', help="""path to JSON file""", required=True)
	p.add_argument('--out', help="output path (e.g. a png or svg path)", required=True)
	p.add_argument('-o',
	help="Overwrite if --out exists. Fail otherwise",
	action="store_true")

	args = p.parse_args()
	print(args)

	if args.o == False and os.path.exists(args.out):
	raise NameError("{} exists!! Pass -o to overrite it".format(args.out))
	j = json.loads(open(dict(args._get_kwargs())['in']).read())
	print(j.keys())
	print(j['config'])
	print(j['hostname'])
	print()
	print('> SPAN:')
	print(j['span'].keys())
	print(j['span']['mem'].keys())
	print(j['span']['mem']['count'].keys())
	print()
	print('> GC:')
	print(j['gc'].keys())
	print(j['gc']['major_collections'].keys())
	print(j['gc']['major_collections']['diff_per_block'].keys())
	print()
	print(j['span']['mem']['count']['min_value'])
	print(j['span']['mem']['count']['max_value'])
	print(j['span']['mem']['count']['mean'])
	print(j['span']['mem']['cumu_count']['diff'])
	block_count = j['block_count']
	total_op_count = j['op_count']
	total_wall = j['elapsed_wall']
	print('total_op_count', total_op_count)
	print('block_count', block_count)
	print('total_wall', total_wall, total_wall / 3600)

	colors = {
	# https://coolors.co/palettes/trending

	# # https://coolors.co/d9ed92-b5e48c-99d98c-76c893-52b69a-34a0a4-168aad-1a759f-1e6091-184e77
	# 'checkout': '#d9ed92',
	# 'find': '#b5e48c',
	# 'mem': '#99d98c',
	# 'mem_tree': '#76c893',
	# 'add': '#52b69a',
	# 'remove': '#34a0a4',
	# 'copy': '#168aad',
	# 'commit': '#1a759f',
	# 'unseen': '#1e6091',
	# 'buildup': '#184e77',

	# https://coolors.co/03045e-023e8a-0077b6-0096c7-00b4d8-48cae4-90e0ef-ade8f4-caf0f8
	'': '#03045e',
	'copy': '#023e8a',
	'mem': '#0077b6',
	'commit': '#0096c7',
	'add': '#00b4d8', 'checkout': '#00b4d8',
	'remove': '#48cae4',
	'find': '#90e0ef',
	'mem_tree': '#ade8f4',
	'unseen': '#caf0f8',

	}
	labels = dict(
	unseen='in-between ops',
	buildup='others',
	)

	# https://plotly.com/python/pie-charts/
	# https://matplotlib.org/stable/gallery/misc/packed_bubbles.html#sphx-glr-gallery-misc-packed-bubbles-py

	rows = []
	for opk in sorted(j['span'].keys()):
	rows.append(dict(label=labels.get(opk, opk),
	key=opk,
	average_duration=j['span'][opk]['duration']['mean'],
	count_per_block=j['span'][opk]['count']['mean'],
	explode=0.2 if opk == 'buildup' else 0,
	color=colors.get(opk, 'grey')))
	df0 = pd.DataFrame(rows)



	# **************************************************************************************************
	# **************************************************************************************************
	# **************************************************************************************************
	plt.close('all')
	fig, ax = plt.subplots(figsize=np.asarray([10, 5]) * 0.6666)
	ax2 = ax.twinx()

	ax.set_yscale('log')
	# ax2.set_yscale('linear')

	y0 = np.asarray(j['index']['bytes_written']['value_after_commit']['evolution']).astype('float')
	x = np.linspace(0, block_count - 1, y0.size)
	ax.plot(x, y0, label='written so far', color=colors['mem'])
	ax.annotate(
	f"{y0[-1] / 1e12:.0f}TB",
	[(block_count - 1) * 1.01 , y0[-1]], verticalalignment="center", horizontalalignment='left',
	)

	y1 = np.asarray([d['value_after_commit']['evolution'] for d in j['disk'].values()]).astype(float).sum(axis=0)
	ax.plot(x, y1, label='irmin-pack', color=colors['find'])
	ax.annotate(
	f"{y1[-1] / 1e9:.0f}GB",
	[(block_count - 1) * 1.01 , y1[-1]], verticalalignment="center", horizontalalignment='left',
	)

	y2 = y0 / y1
	ax2.plot(x, y2, '--', label='write amplification', color='grey')
	ax2.annotate(
	f"x{y2[-1]:.0f}",
	[(block_count - 1) * 1.01 , y2[-1]], verticalalignment="center", horizontalalignment='left',
	)

	ax.set_ylim(100_000_000, 10 ** (np.log10(y0.max()) * 1.025))
	ax.set_yticks([
	1e9,
	1e10,
	1e11,
	1e12,
	1e13,
	# 1e14,
	])
	ax.minorticks_off()

	ax2.set_ylim(1, np.nanmax(y2) * 1.2)
	ax2.set_yticks([1, 100, 200, 300])
	ax2.set_yticklabels(["x1", "x100", "x200", "x300"])

	ax.set_yticklabels([
	"1GB",
	"10GB",
	"100GB",
	"1TB",
	"10TB",
	# "100TB",
	])
	ax.set_xticks(
	[
	0,
	# 28_083,
	204_762,
	458_753,
	655_361,
	851_969,
	1_212_417,
	1_343_489,
	# 1_466_367,
	])
	ax.set_xticklabels(
	[
	"genesis",
	# "alpha II",
	"alpha III",
	"athens A",
	"babylon",
	"carthage",
	"dephi",
	'edo',
	# "florence",
	])
	ax.set_xlim(0 - block_count * 0.05, block_count * 1.12)
	ax.xaxis.grid()
	# ax.grid(which='x')
	ax.legend(loc='upper left')
	ax2.legend(loc='lower right')
	fig.tight_layout()
	plt.savefig('write_amplif.png', dpi=150)

	# **************************************************************************************************
	# **************************************************************************************************
	# **************************************************************************************************
	plt.close('all')
	fig, ax = plt.subplots(figsize=np.asarray([10, 4]) * 0.6666)
	y = np.asarray(j['gc']['major_heap_bytes']['value_after_commit']['evolution']).astype('float') / 1e9
	x = np.linspace(0, block_count - 1, y.size)
	ax.plot(x, y, label='smoothed average', color=colors['copy'])
	ax.annotate(
	f"{y[-1]:.2f}GB",
	[(block_count - 1) * 1.01 , y[-1]], verticalalignment="center", horizontalalignment='left',
	)
	y = np.asarray(j['gc']['major_heap_top_bytes']).astype('float') / 1e9
	mask = np.r_[True, y[1:] != y[:-1]]
	ax.plot([0, block_count - 1], [y.max(), y.max()], color=colors['remove'])
	ax.plot([x[mask][-1]], [y[mask][-1]], 'x', label='highest recorded', color=colors['remove'])
	ax.annotate(
	f"{y.max():.2f}GB",
	[(block_count - 1) * 1.01 , y.max()], verticalalignment="center", horizontalalignment='left',
	)
	ax.set_yticks([0, 1, 2, 3])
	ax.set_yticklabels(
	[
	'0',
	'1GB',
	'2GB',
	'3GB',
	])
	ax.set_ylim(0, y.max() * 1.08)
	ax.set_xticks(
	[
	0,
	# 28_083,
	204_762,
	458_753,
	655_361,
	851_969,
	1_212_417,
	1_343_489,
	# 1_466_367,
	])
	ax.set_xticklabels(
	[
	"genesis",
	# "alpha II",
	"alpha III",
	"athens A",
	"babylon",
	"carthage",
	"dephi",
	'edo',
	# "florence",
	])
	ax.set_xlim(0 - (block_count / 40), block_count * 1.12)
	ax.grid()
	ax.legend(loc='lower right')
	fig.tight_layout()
	plt.savefig('memory.png', dpi=150)

	# **************************************************************************************************
	# **************************************************************************************************
	# **************************************************************************************************
	plt.close('all')
	fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"), figsize=(5, 5))
	df = df0.set_index('key').loc['checkout find mem mem_tree add remove copy commit'.split(' ')].reset_index()
	index = list(df.index)
	seed = int(np.random.rand() * 1000)
	seed = 642
	np.random.RandomState(seed).shuffle(index)
	print("seed!", seed)
	df = df.loc[index]
	print(df)
	bubble_chart = BubbleChart(area=df['count_per_block'], bubble_spacing=4)
	bubble_chart.collapse(5)
	for i in range(len(bubble_chart.bubbles)):
	row = df.iloc[i]
	circ = plt.Circle(
	bubble_chart.bubbles[i, :2], bubble_chart.bubbles[i, 2], color=row['color'])
	ax.add_patch(circ)
	text = row['label'] + '\n{:.1f}'.format(row['count_per_block'])
	if row['label'] in 'commit remove checkout copy mem_tree'.split(' '):
	ax.text(bubble_chart.bubbles[i, 0] + bubble_chart.bubbles[i, 2] / 2 * 1.8 + 1,
	bubble_chart.bubbles[i, 1],
	text,
	horizontalalignment='left', verticalalignment='center',
	)
	else:
	ax.text(*bubble_chart.bubbles[i, :2], text,
	horizontalalignment='center', verticalalignment='center')
	ax.axis("off")
	ax.relim()
	ax.autoscale_view()
	# ax.set_title('Average Operation Count per Block')
	plt.tight_layout()
	plt.savefig('counts.png', dpi=100)

	# **************************************************************************************************
	# **************************************************************************************************
	# **************************************************************************************************
	plt.close('all')
	fig, [ax0, ax1] = plt.subplots(ncols=2, subplot_kw=dict(aspect="equal"), figsize=np.asarray([10, 5]) * 0.75)
	df = df0.set_index('key').loc['buildup commit unseen'.split(' ')].reset_index()
	print(df)
	def mypct(v):
	return f'{v:.1f}%'
	ax0.set_title('All')
	wedges, labs, pcts = ax0.pie(df.average_duration,
	labels=df.label,
	explode=df['explode'],
	autopct=mypct,
	pctdistance=0.76,
	# shadow=True,
	# textprops=dict(fontsize=10),
	rotatelabels=True,
	labeldistance=1.03,
	startangle=360 / df['average_duration'].sum() * df.set_index('key').loc['buildup', 'average_duration'] / -2,
	colors=df['color'],
	normalize=True,
	)
	for t in pcts:
	t.set_fontsize(9)
	df = df0.set_index('key').loc['copy find mem checkout mem_tree add remove'.split(' ')].reset_index()
	print(df)
	def mypct(v):
	v *= df0.set_index('key').loc['buildup'].average_duration / df0.set_index('key').loc['block'].average_duration
	return f'{v:.2f}%'
	ax1.set_title('Others')
	wedges, labs, pcts = ax1.pie(df.average_duration,
	labels=df.label,
	explode=df['explode'],
	autopct=mypct,
	pctdistance=0.76,
	# shadow=True,
	rotatelabels=True,
	labeldistance=1.03,
	startangle=360 / df['average_duration'].sum() * df.set_index('key').loc['copy', 'average_duration'] / -2 + 180,
	colors=df['color'],
	normalize=True,
	)
	for t in pcts:
	t.set_fontsize(9)
	plt.tight_layout()
	plt.savefig('durations.png', dpi=150)