numpde · November 9, 2020 11:32
diff --git a/fdr.py b/fdr.py
 # RA, 2020-10-16

 """
 Visualization of the FDR and estimates by Benjamini-Hochberg and Storey-Tibshirani.

 [1] Benjamini Y and Hochberg Y.
    Controlling the false discovery rate: a practical
    and powerful approach to multiple testing.
    J R Statist Soc B, 1995.

 [2] Storey JD and Tibshirani R.
    Statistical significance for genomewide studies.
    PNAS.
    https://dx.doi.org/10.1073%2Fpnas.1530509100

 Earlier version of this code
    https://github.com/numpde/cbb/tree/master/20200628-FDR
 was based on
    Tan Siew Li, L
    Understanding the Benjamini-Hochberg method
    https://cpb-us-w2.wpmucdn.com/blog.nus.edu.sg/dist/0/3425/files/2018/10/Understanding-Benjamini-Hochberg-method-2ijolq0.pdf

 This code has no pretence of being pretty, sorry.

 RA, 2020-10-17
 """

 from pathlib import Path

 import numpy as np
 from plox import Plox
 from numpy import mean, var, sqrt
 from scipy.stats import t as student_t
 import matplotlib.pyplot as plt

 figures = (Path(__file__).parent / "figures") / Path(__file__).stem
 figures.mkdir(exist_ok=True, parents=True)

 first = (lambda X: next(iter(X)))

 st = None
 M = 10000
 seed = 2

 mixture_coeff = 0.2

 bar_style = dict()
 color_h0 = "C2"
 color_H0 = "C0"
 color_bh = "brown"
 color_st = "mediumvioletred"


 class Hypotheses:
    # Size of groups
    n1 = 3
    n2 = 3

    # Null-hypothesis
    class H0:
        m = 0
        s = 1
        name = "H0"

    # Secret truth
    class h0:
        m = 3
        s = 1
        name = "h0"

    @classmethod
    def generate_dataset(cls, A, B, m):
        rgn = np.random.Generator(np.random.MT19937(seed=seed))
        dataset = [
            (
                rgn.normal(loc=A.m, scale=A.s, size=Hypotheses.n1),
                rgn.normal(loc=B.m, scale=B.s, size=Hypotheses.n2),
            )
            for __ in range(m)
        ]
        return dataset


 def pvalue_pooled(g1, g2):
    """
    t-test with pooled variance.
    H0: Avg(g1) = Avg(g2)

    https://en.wikipedia.org/wiki/Student%27s_t-test
    """

    (m1, v1, n1) = (mean(g1), var(g1, ddof=1), len(g1))
    (m2, v2, n2) = (mean(g2), var(g2, ddof=1), len(g2))

    vp = (v1 * (n1 - 1) + v2 * (n2 - 1)) / ((n1 - 1) + (n2 - 1))
    t = abs(m1 - m2) / (sqrt(vp) * sqrt(1 / n1 + 1 / n2))
    nu = (n1 - 1) + (n2 - 1)

    return student_t.cdf(-t, df=nu) * 2


 def pvalue_welch(g1, g2):
    """
    Welch's t-test.
    H0: Avg(g1) = Avg(g2)

    https://en.wikipedia.org/wiki/Welch%27s_t-test
    """

    (m1, v1, n1) = (mean(g1), var(g1, ddof=1), len(g1))
    (m2, v2, n2) = (mean(g2), var(g2, ddof=1), len(g2))

    t = abs(m1 - m2) / sqrt(v1 / n1 + v2 / n2)
    nu = ((v1 + v2) ** 2) / ((v1 ** 2) / (n1 - 1) + (v2 ** 2) / (n2 - 1))

    return student_t.cdf(-t, df=nu) * 2


 dataset_Hh = Hypotheses.generate_dataset(Hypotheses.H0, Hypotheses.h0, M)
 dataset_HH = Hypotheses.generate_dataset(Hypotheses.H0, Hypotheses.H0, M)
 pvalues_h0 = np.array([pvalue_pooled(g1, g2) for (g1, g2) in dataset_Hh])
 pvalues_H0 = np.array([pvalue_pooled(g1, g2) for (g1, g2) in dataset_HH])


 def compute_fdr_ex(a):
    nh = np.sum(pvalues_h0 <= a) * mixture_coeff
    nH = np.sum(pvalues_H0 <= a) * (1 - mixture_coeff)
    return nH / ((nh + nH) or 1)


 def compute_fdr_bh(a):
    nh = np.sum(pvalues_h0 <= a) * mixture_coeff
    nH = np.sum(pvalues_H0 <= a) * (1 - mixture_coeff)
    return M * a / ((nh + nH) or 1)


 def compute_fdr_st(a, st):
    nh = np.sum(pvalues_h0 <= a) * mixture_coeff
    nH = np.sum(pvalues_H0 <= a) * (1 - mixture_coeff)
    return M * a * st / ((nh + nH) or 1)


 def visualize_dataset(st):
    nh = 7
    nH = 13
    data = np.array([np.hstack(groups) for groups in dataset_Hh[0:nh] + dataset_HH[0:nH]])
    pval = np.array(list(pvalues_h0[0:nh]) + list(pvalues_H0[0:nH]))
    assert len(data) == len(pval)

    ii_random = np.arange(len(data))
    np.random.seed(seed)
    np.random.shuffle(ii_random)

    data = data[ii_random]
    pval = pval[ii_random]

    colnames = [
        F"$\\bf{gn + 1}$-${sn + 1}$"
        for (gn, g) in enumerate(dataset_HH[0])
        for (sn, s) in enumerate(g)
    ]

    rownames = [
        F"Gene {n}"
        for n in range(len(data))
    ]

    SPACER = "..."
    rownames[-2] = SPACER
    rownames[-1] = F"Gene {str(M)}"
    data[-2] = np.nan
    # pval[-2] = np.nan  # doesn't sort right

    # from matplotlib.colors import LinearSegmentedColormap

    # https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html
    import matplotlib.pyplot as plt

    style = {
        'xtick.top': False, 'xtick.bottom': False, 'xtick.labelbottom': False,
        'xtick.labeltop': True,

        'ytick.left': False,

        # sudo apt install cm-super-minimal
        'text.usetex': True,
    }

    for sort in [False, True]:

        if sort:
            ii_pvalue = sorted(range(len(pval)), key=(lambda i: pval[i]))
            data = data[ii_pvalue]
            pval = pval[ii_pvalue]
            rownames = [rownames[i] for i in ii_pvalue]

        with Plox(style) as px:
            from matplotlib.colors import LinearSegmentedColormap
            cmap = LinearSegmentedColormap.from_list('rg', ["r", "w", "g"], N=256)

            px.a.imshow(data, cmap=cmap)
            px.a.set_aspect(0.3)

            px.a.spines['top'].set_visible(False)
            px.a.spines['left'].set_visible(False)
            px.a.spines['right'].set_visible(False)
            px.a.spines['bottom'].set_visible(False)

            px.a.set_xticks(range(len(colnames)))
            px.a.set_xticklabels(colnames)
            # px.a.xaxis.set_label_position('top')

            px.a.set_yticks(range(len(rownames)))
            px.a.set_yticklabels(rownames, fontsize="xx-small")

            px.f.savefig(figures / F"dataset_sort={sort}_expression.png", dpi=300)

        fdr_bh = [compute_fdr_bh(p) for p in pval]
        fdr_st = [compute_fdr_st(p, st) for p in pval]

        for (vals, what) in zip([pval, fdr_bh, fdr_st], ["pvalue", "fdr_bh", "fdr_st"]):
            with Plox(style) as px:
                if (what == "pvalue"):
                    cmap = [color_h0, color_H0]
                elif (what == "fdr_bh"):
                    cmap = [color_bh, "white"]
                elif (what == "fdr_st"):
                    cmap = [color_st, "white"]
                else:
                    raise NotImplementedError

                cmap = LinearSegmentedColormap.from_list('no-name', cmap, N=256)

                (vmin, vmax) = [f(np.log(vals)) for f in [min, max]]
                vals = np.log(np.array(vals).reshape([-1, 1]))
                vals[rownames.index(SPACER)] = np.nan
                px.a.imshow(vals, cmap=cmap, vmin=vmin, vmax=vmax)

                px.a.spines['top'].set_visible(False)
                px.a.spines['left'].set_visible(False)
                px.a.spines['right'].set_visible(False)
                px.a.spines['bottom'].set_visible(False)

                for (i, v) in enumerate(vals.squeeze()):
                    if (rownames[i] != SPACER):
                        text = px.a.text(0, i, F"{np.exp(v):.02}", ha="center", va="center", color="k", fontsize=6)

                px.a.set_aspect(0.3)

                px.a.set_xticks([0])
                label = {'pvalue': "p-values", 'fdr_bh': "BH", 'fdr_st': "ST"}
                px.a.set_xticklabels([label[what]])

                px.a.set_yticks([])

                #px.show()
                px.f.savefig(figures / F"dataset_sort={sort}_{what}.png", dpi=300)


 def bar(*, x, edges=None, **histogram_param):
    edges = edges or np.linspace(0, 1, 17)
    (h, __) = np.histogram(x, bins=edges, **histogram_param)

    r = dict(
        x=(edges[0:-1] + min(np.diff(edges)) * 0.1),
        height=h,
        align="edge",
        width=(np.diff(edges) - 2 * min(np.diff(edges)) * 0.1),
    )

    return r


 def plabel(a: plt.Axes, ticks=None):
    # ticks = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7, 1]
    ticks = ticks or list(np.linspace(0, 1, 21))
    a.set_xlim(-0.01, 1.01)
    a.set_xticks(ticks)
    a.set_xticklabels([
        F"${int(x)}$" if (x in [0]) else F"${round(x * 100)}\%$"
        for x in a.get_xticks()
    ])
    return ticks


 def compute_st():
    # Remark B in [ST, 2003]
    lambdas = np.linspace(0, 0.95, 20)
    assert np.isclose(lambdas[1], 0.05)

    pis = [
        ((np.sum(pvalues_h0 > lam) * mixture_coeff) + (np.sum(pvalues_H0 > lam) * (1 - mixture_coeff))) / (
                M * (1 - lam))
        for lam in lambdas
    ]

    for smooth in [0.97, 0.95]:
        import csaps
        sp = csaps.CubicSmoothingSpline(lambdas, pis, smooth=smooth)
        with Plox() as px:
            ticks = plabel(px.a, ticks=list(np.linspace(0, 1, 11)))
            px.a.set_xlim([0, 1.05], auto=False)

            px.a.grid(False)

            px.a.spines['top'].set_visible(False)
            px.a.spines['left'].set_visible(False)
            px.a.spines['right'].set_visible(True)
            px.a.spines['bottom'].set_visible(True)

            px.a.set_xlabel("$p$")
            px.a.yaxis.tick_right()

            px.a.plot(
                [0, max(px.a.get_xlim())], [1 - mixture_coeff] * 2, "-",
                color=color_H0, alpha=0.7, lw=2, zorder=1,
                label="Secret $\\pi_0 = \mathrm{P}(H_{0})$"
            )

            # px.a.plot(lambdas, pis, "ok", color=color_st, label="Plateau on $[\lambda, 1]$")

            for (lam, pi) in zip(lambdas, pis):
                label = dict(label=("Plateau on $[p, 1]$" + (' ' * 10))) if (lam == first(lambdas)) else {}
                px.a.plot(lam, pi, "s-", ms=3, lw=3, color=color_st, zorder=10, **label)
                px.a.plot([lam, lam + 0.03], [pi, pi], "-", lw=3, color=color_st, zorder=10)
                px.a.plot([lam, 1], [pi, pi], "-", lw=3, color=color_st, alpha=0.1, zorder=5)

            px.a.legend(loc="upper right")
            px.f.savefig(figures / F"st_smooth={smooth}_1.png", dpi=300)

            xx = np.linspace(0, 1, 100)
            px.a.plot(xx, sp(xx), "--", label="Smoothing spline fit", zorder=15, lw=2, color="darkorange")

            px.a.legend(loc="upper right")
            px.f.savefig(figures / F"st_smooth={smooth}_2.png", dpi=300)

            px.a.plot(1, sp(1), "o--", lw=2, color=color_st, zorder=10, label="Estimated $\\pi_0$")
            px.a.plot([0, max(px.a.get_xlim())], [sp(1)] * 2, "-", color=color_st, alpha=0.7, lw=2, zorder=1)

            px.a.legend(loc="upper right")
            px.f.savefig(figures / F"st_smooth={smooth}_3.png", dpi=300)

    return sp(1)


 def fdr_intro(p, st):
    def save(count=[0]):
        px.f.savefig(figures / F"p={p}_{count[0]:02}.png", dpi=300)
        count[0] += 1

    with Plox() as px:
        ticks = plabel(px.a)
        px.a.set_xlim(0, 0.5, auto=False)
        px.a.set_yticks([])

        px.a.set_xlabel("$p$")

        px.a.spines['top'].set_visible(False)
        px.a.spines['right'].set_visible(False)
        px.a.spines['left'].set_visible(False)
        px.a.spines['bottom'].set_visible(True)

        save()

        edges = sorted(set(np.round(list(set(np.linspace(0, 1, 41))), 6)))

        #

        px.a.plot([0, 1], [1, 1], ls='--', color=color_bh, alpha=0.7, lw=2, label="1")

        bar_h0 = bar(x=pvalues_h0, edges=edges, density=True)
        lines_h0 = px.a.bar(**bar_h0, **bar_style, alpha=0.6, color=color_h0)
        px.a.bar(0, height=0, width=0, color=color_h0, label=("$H_{*}$" + (' ' * 25)))

        px.a.legend()
        save()

        #

        bar_H0 = bar(x=pvalues_H0, edges=edges, density=True)
        lines_H0 = px.a.bar(**bar_H0, **bar_style, alpha=0.6, color=color_H0)
        px.a.bar(0, height=0, width=0, color=color_H0, label="$H_0$")

        px.a.legend()
        save()

        #

        bar_h0['height'] *= mixture_coeff
        bar_H0['height'] *= (1 - mixture_coeff)
        bar_mix = {**bar_h0}
        bar_mix['height'] = bar_h0['height'] + bar_H0['height']

        my = max(bar_mix['height']) * 1.1
        mx = 0.3

        frame = px.a.plot((0, mx, mx, 0, 0), (my, my, 0, 0, my), ls='--', lw=2, color='k')

        save()

        #

        for x in frame:
            x.remove()

        px.a.set_ylim(0, my, auto=False)
        px.a.set_xlim(0, mx, auto=False)

        px.a.legend()
        save()

        #

        lines_mix = px.a.bar(
            **bar_mix, **bar_style,
            alpha=0.7, lw=2, color="none", edgecolor="black",
            label="$\pi_0 H_{0} + (1 - \pi_0) H_{*}$", zorder=100
        )

        px.a.legend()
        save()

        #

        lines_H0.remove()
        lines_h0.remove()

        lines_H0 = px.a.bar(**bar_H0, **bar_style, alpha=0.6, color=color_H0)
        lines_h0 = px.a.bar(**bar_h0, **bar_style, alpha=0.6, color=color_h0, bottom=bar_H0['height'])

        px.a.legend()
        save()

        def filt(b):
            b = {**b}
            b['height'] = b['height'][b['x'] <= p]
            b['width'] = b['width'][b['x'] <= p]
            b['x'] = b['x'][b['x'] <= p]
            return b

        lines_H0 = px.a.bar(**filt(bar_H0), **bar_style, color=color_H0)
        lines_h0 = px.a.bar(**filt(bar_h0), **bar_style, color=color_h0, bottom=filt(bar_H0)['height'])
        px.a.bar(
            x=[0], height=max(px.a.get_ylim()), width=(p), align="edge",
            edgecolor="none", color="gray", alpha=0.3, zorder=-10
        )

        text_discoveries = px.a.text(
            s="Discoveries", x=(p * 0.95), y=(0.95 * max(px.a.get_ylim())),
            va="top", ha="right", rotation=90,
            color="white",
            zorder=1000,
        )

        save()

        #

        p0 = 0.05
        (x1, y1) = (p0 * 1.5, 0.4 * max(px.a.get_ylim()))

        text1 = px.a.text(s="False discoveries", x=x1, y=(y1 * 1.05), color="k")
        fds = filt(bar_H0)
        arrows = []
        for (x, w, h) in zip(fds['x'], fds['width'], fds['height']):
            x0 = x + w / 2
            y0 = h / 2
            arrow = px.a.plot([x0, x0 + (x1 - x0) * 0.96], [y0, y0 + (y1 - y0) * 0.96], ls='-', lw=0.8, color="black",
                              zorder=120, alpha=0.9)
            arrows.append(arrow)

        save()

        #

        fds = filt(bar_H0)
        (dy1, dx1) = (np.mean(fds['width']), np.sum(fds['height']))
        fds = filt(bar_mix)
        (dy2, dx2) = (np.mean(fds['width']), np.sum(fds['height']))

        dx1_H0 = dx1

        x1 *= 1.1
        y1 *= 1.1

        dx1 = dx1 / np.diff(px.a.get_ylim()) * np.diff(px.a.get_xlim())
        dx2 = dx2 / np.diff(px.a.get_ylim()) * np.diff(px.a.get_xlim())
        scale_dx = 1 / (dx2) * (max(px.a.get_xlim()) - p0) * 0.7
        dx1 *= scale_dx
        dx2 *= scale_dx
        dy1 = y1 * 0.15
        dy2 = dy1
        dy = (dy1 + dy2) / 6

        px.a.bar(
            align="edge", x=(x1 + (-dx1 / 2 + dx2 / 2)),
            bottom=y1 + dy, height=+dy1, width=dx1, color=color_H0, edgecolor="none"
        )
        px.a.bar(
            align="edge", x=x1,
            bottom=y1 - dy, height=-dy2, width=dx1, color=color_H0, edgecolor="none"
        )
        px.a.bar(
            align="edge", x=(x1 + dx1),
            bottom=y1 - dy, height=-dy2, width=dx2 - dx1, color=color_h0, edgecolor="none"
        )
        px.a.bar(
            align="edge", x=x1,
            bottom=y1 - dy, height=-dy2, width=dx2,
            lw=2, color="none", edgecolor="black", alpha=0.7,
        )
        px.a.plot(
            [x1 - dx2 * 0.05, x1 + dx2 * 1.05], [y1, y1], 'k-', lw=1,
        )
        px.a.bar(
            x=(x1 - dx2 * 0.1), bottom=(y1 - 2 * dy2), height=(4 * dy2), width=(dx2 * 1.2),
            align="edge",
            edgecolor="none", color="gray", alpha=0.3, zorder=-10,
        )

        text1.remove()
        text1 = px.a.text(s="FDR:", x=(x1 - dx2 * 0.1), y=(y1 + dy1 * 2.3), ha="left", va="baseline")

        save()

        #

        for arrow in arrows:
            for line in arrow:
                line.remove()

        text_discoveries.remove()

        bar_H0_1 = filt(bar_H0)
        bar_H0_1['height'] = np.ones_like(bar_H0_1['height'])
        lines_BH_bar = px.a.bar(**bar_H0_1, **bar_style, edgecolor=color_bh, ls="--", lw=2, color="none", zorder=120)

        lines_BH_box = px.a.bar(
            align="edge", x=(x1 + (-dx1 / 2 + dx2 / 2)),
            bottom=y1 + dy, height=+dy1, width=(dx1 / dx1_H0 * sum(bar_H0_1['height'])),
            color="none", alpha=0.7, edgecolor=color_bh, lw=2, ls="--",
        )
        from matplotlib.pyplot import Text
        text1: Text
        text1.set_text("FDR estimate by Benjamini-Hochberg (1995):")

        save()

        #

        lines_BH_bar.remove()
        lines_BH_box.remove()
        px.a.get_legend().remove()

        # lam = 0.2
        # ii = (bar_H0['x'] >= lam)
        # st = np.dot(bar_H0['height'][ii], bar_H0['x'][ii]) / np.sum(bar_H0['x'][ii])

        lines_ST_box = px.a.bar(
            align="edge", x=(x1 + (-dx1 / 2 + dx2 / 2)),
            bottom=y1 + dy, height=+dy1, width=(dx1 / dx1_H0 * st * len(bar_H0_1['height'])),
            color="none", alpha=1, edgecolor=color_st, lw=2, ls="--",
        )

        (lh0, ll0) = px.a.get_legend_handles_labels()

        lam = 0.2
        px.a.plot([lam, 1], [st, st], ls="-", lw=2, alpha=0.95, color=color_st, zorder=200)
        px.a.plot(lam, st, "s-", ms=3, lw=3, color=color_st, zorder=200, label="Plateau $\\approx \\pi_0$")

        bar_H0_ST = filt(bar_H0)
        bar_H0_ST['height'] = np.ones_like(bar_H0_ST['height']) * st
        lines_ST_bar = px.a.bar(**bar_H0_ST, **bar_style, edgecolor=color_st, ls="--", lw=2, color="none", zorder=120)

        text1.set_text("FDR estimate by Storey-Tibshirani (2003):")

        (lh1, ll1) = zip(*tuple(
            (h, lab)
            for (h, lab) in zip(*px.a.get_legend_handles_labels())
            if (lab not in ll0)
        ))
        px.a.legend(list(lh0) + list(lh1), list(ll0) + list(ll1))

        save()


 def fdr_plot_by_alpha(st):
    def save(count=[0]):
        count[0] += 1
        px.f.savefig(figures / F"alpha_{count[0]:02}.png", dpi=300)

    # alpha = np.array(
    #     sorted(
    #         set(np.logspace(np.log10(min(min(pvalues_h0), min(pvalues_H0))), np.log10(0.055), 150)) |
    #         {0.025, 0.05}
    #     )
    # )

    alpha = np.sort(np.unique(np.clip(np.concatenate([pvalues_h0, pvalues_H0]), a_min=0, a_max=0.055)))

    fdr_ex = [compute_fdr_ex(a) for a in alpha]
    fdr_bh = [compute_fdr_bh(a) for a in alpha]
    fdr_st = [compute_fdr_st(a, st) for a in alpha]

    with Plox() as px:
        plabel(px.a, ticks=list(np.linspace(0, 0.05, 6)))

        px.a.set_yticks(np.linspace(0, 1, 21))
        px.a.set_yticklabels([
            F"${int(y)}$" if (y in [0]) else F"${round(y * 100)}\%$"
            for y in px.a.get_yticks()
        ])

        px.a.set_xlim(-max(alpha) * 0.03, max(alpha) * 1.03, auto=False)
        px.a.set_ylim(0, max(fdr_bh) * 1.05)
        px.a.set_xlabel("$p$ discovery cutoff")
        px.a.set_ylabel('FDR')
        px.a.grid(True)

        exact_fdr = px.a.plot(alpha, fdr_ex, color="black", label="Exact FDR")
        px.a.legend(loc="upper left")

        save()

        i = np.argmin(np.abs(alpha - 0.05))
        # https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/marker_fillstyle_reference.html
        # http://scipy-lectures.org/intro/matplotlib/auto_examples/options/plot_mew.html
        px.a.plot(alpha[i], fdr_bh[i], '--s', color=color_bh, fillstyle="none", lw=2, markeredgewidth=2, label="Benjamini-Hochberg")
        px.a.plot(alpha[i], fdr_st[i], '--o', color=color_st, fillstyle="none", lw=2, markeredgewidth=2, label='Storey-Tibshirani, "q-value"')
        px.a.plot(alpha[i], fdr_bh[i], 's', color=color_bh, fillstyle="none", markeredgewidth=2, zorder=100)
        px.a.plot(alpha[i], fdr_st[i], 'o', color=color_st, fillstyle="none", markeredgewidth=2, zorder=100)
        i = np.argmin(np.abs(alpha - 0.025))
        px.a.plot(alpha[i], fdr_bh[i], 's', color=color_bh, fillstyle="none", markeredgewidth=2, zorder=100)
        px.a.plot(alpha[i], fdr_st[i], 'o', color=color_st, fillstyle="none", markeredgewidth=2, zorder=100)

        px.a.legend(loc="upper left")

        save()

        wiggly_bh = px.a.plot(alpha, fdr_bh, '.', color=color_bh, lw=2, zorder=50)
        wiggly_st = px.a.plot(alpha, fdr_st, '.', color=color_st, lw=2, zorder=50)

        save()

        exact_fdr[0].set_alpha(0.2)

        save()

        px.a.set_xlim(min(alpha) / 2, max(alpha) * 2)
        # px.a.set_ylim(min(min(fdr_bh), min(fdr_st)) / 2, max(px.a.get_ylim()))
        px.a.set_xscale('log')
        # px.a.set_yscale('log')

        # px.a.plot(pvalues_h0, 0.0 * np.ones_like(pvalues_h0), 'o', color=color_h0, markersize=4, zorder=100)
        # px.a.plot(pvalues_H0, 0.0 * np.ones_like(pvalues_H0), 'o', color=color_H0, markersize=4, zorder=200)

        save()

        from matplotlib.colors import to_rgb

        wiggly_bh[0].set_color(1 - (1 - np.array(to_rgb(color_bh))) / 2)
        wiggly_st[0].set_color(1 - (1 - np.array(to_rgb(color_st))) / 2)

        fdr_bh = np.flip(np.minimum.accumulate(np.flip(fdr_bh)))
        fdr_st = np.flip(np.minimum.accumulate(np.flip(fdr_st)))
        px.a.plot(alpha, fdr_bh, '--', color=color_bh, lw=2, zorder=100)
        px.a.plot(alpha, fdr_st, '--', color=color_st, lw=2, zorder=100)

        save()

        wiggly_bh[0].remove()
        wiggly_st[0].remove()
        px.a.plot(alpha, fdr_bh, '.', color=color_bh, lw=2, zorder=70)
        px.a.plot(alpha, fdr_st, '.', color=color_st, lw=2, zorder=70)

        save()


 def main():
    st = compute_st()
    visualize_dataset(st)
    fdr_plot_by_alpha(st)
    fdr_intro(0.050, st)
    fdr_intro(0.025, st)


 if __name__ == '__main__':
    main()
	# RA, 2020-10-16

	"""
	Visualization of the FDR and estimates by Benjamini-Hochberg and Storey-Tibshirani.

	[1] Benjamini Y and Hochberg Y.
	Controlling the false discovery rate: a practical
	and powerful approach to multiple testing.
	J R Statist Soc B, 1995.

	[2] Storey JD and Tibshirani R.
	Statistical significance for genomewide studies.
	PNAS.
	https://dx.doi.org/10.1073%2Fpnas.1530509100

	Earlier version of this code
	https://github.com/numpde/cbb/tree/master/20200628-FDR
	was based on
	Tan Siew Li, L
	Understanding the Benjamini-Hochberg method
	https://cpb-us-w2.wpmucdn.com/blog.nus.edu.sg/dist/0/3425/files/2018/10/Understanding-Benjamini-Hochberg-method-2ijolq0.pdf

	This code has no pretence of being pretty, sorry.

	RA, 2020-10-17
	"""

	from pathlib import Path

	import numpy as np
	from plox import Plox
	from numpy import mean, var, sqrt
	from scipy.stats import t as student_t
	import matplotlib.pyplot as plt

	figures = (Path(__file__).parent / "figures") / Path(__file__).stem
	figures.mkdir(exist_ok=True, parents=True)

	first = (lambda X: next(iter(X)))

	st = None
	M = 10000
	seed = 2

	mixture_coeff = 0.2

	bar_style = dict()
	color_h0 = "C2"
	color_H0 = "C0"
	color_bh = "brown"
	color_st = "mediumvioletred"


	class Hypotheses:
	# Size of groups
	n1 = 3
	n2 = 3

	# Null-hypothesis
	class H0:
	m = 0
	s = 1
	name = "H0"

	# Secret truth
	class h0:
	m = 3
	s = 1
	name = "h0"

	@classmethod
	def generate_dataset(cls, A, B, m):
	rgn = np.random.Generator(np.random.MT19937(seed=seed))
	dataset = [
	(
	rgn.normal(loc=A.m, scale=A.s, size=Hypotheses.n1),
	rgn.normal(loc=B.m, scale=B.s, size=Hypotheses.n2),
	)
	for __ in range(m)
	]
	return dataset


	def pvalue_pooled(g1, g2):
	"""
	t-test with pooled variance.
	H0: Avg(g1) = Avg(g2)

	https://en.wikipedia.org/wiki/Student%27s_t-test
	"""

	(m1, v1, n1) = (mean(g1), var(g1, ddof=1), len(g1))
	(m2, v2, n2) = (mean(g2), var(g2, ddof=1), len(g2))

	vp = (v1 * (n1 - 1) + v2 * (n2 - 1)) / ((n1 - 1) + (n2 - 1))
	t = abs(m1 - m2) / (sqrt(vp) * sqrt(1 / n1 + 1 / n2))
	nu = (n1 - 1) + (n2 - 1)

	return student_t.cdf(-t, df=nu) * 2


	def pvalue_welch(g1, g2):
	"""
	Welch's t-test.
	H0: Avg(g1) = Avg(g2)

	https://en.wikipedia.org/wiki/Welch%27s_t-test
	"""

	(m1, v1, n1) = (mean(g1), var(g1, ddof=1), len(g1))
	(m2, v2, n2) = (mean(g2), var(g2, ddof=1), len(g2))

	t = abs(m1 - m2) / sqrt(v1 / n1 + v2 / n2)
	nu = ((v1 + v2) 2) / ((v1 2) / (n1 - 1) + (v2 ** 2) / (n2 - 1))

	return student_t.cdf(-t, df=nu) * 2


	dataset_Hh = Hypotheses.generate_dataset(Hypotheses.H0, Hypotheses.h0, M)
	dataset_HH = Hypotheses.generate_dataset(Hypotheses.H0, Hypotheses.H0, M)
	pvalues_h0 = np.array([pvalue_pooled(g1, g2) for (g1, g2) in dataset_Hh])
	pvalues_H0 = np.array([pvalue_pooled(g1, g2) for (g1, g2) in dataset_HH])


	def compute_fdr_ex(a):
	nh = np.sum(pvalues_h0 <= a) * mixture_coeff
	nH = np.sum(pvalues_H0 <= a) * (1 - mixture_coeff)
	return nH / ((nh + nH) or 1)


	def compute_fdr_bh(a):
	nh = np.sum(pvalues_h0 <= a) * mixture_coeff
	nH = np.sum(pvalues_H0 <= a) * (1 - mixture_coeff)
	return M * a / ((nh + nH) or 1)


	def compute_fdr_st(a, st):
	nh = np.sum(pvalues_h0 <= a) * mixture_coeff
	nH = np.sum(pvalues_H0 <= a) * (1 - mixture_coeff)
	return M * a * st / ((nh + nH) or 1)


	def visualize_dataset(st):
	nh = 7
	nH = 13
	data = np.array([np.hstack(groups) for groups in dataset_Hh[0:nh] + dataset_HH[0:nH]])
	pval = np.array(list(pvalues_h0[0:nh]) + list(pvalues_H0[0:nH]))
	assert len(data) == len(pval)

	ii_random = np.arange(len(data))
	np.random.seed(seed)
	np.random.shuffle(ii_random)

	data = data[ii_random]
	pval = pval[ii_random]

	colnames = [
	F"$\\bf{gn + 1}$-${sn + 1}$"
	for (gn, g) in enumerate(dataset_HH[0])
	for (sn, s) in enumerate(g)
	]

	rownames = [
	F"Gene {n}"
	for n in range(len(data))
	]

	SPACER = "..."
	rownames[-2] = SPACER
	rownames[-1] = F"Gene {str(M)}"
	data[-2] = np.nan
	# pval[-2] = np.nan # doesn't sort right

	# from matplotlib.colors import LinearSegmentedColormap

	# https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html
	import matplotlib.pyplot as plt

	style = {
	'xtick.top': False, 'xtick.bottom': False, 'xtick.labelbottom': False,
	'xtick.labeltop': True,

	'ytick.left': False,

	# sudo apt install cm-super-minimal
	'text.usetex': True,
	}

	for sort in [False, True]:

	if sort:
	ii_pvalue = sorted(range(len(pval)), key=(lambda i: pval[i]))
	data = data[ii_pvalue]
	pval = pval[ii_pvalue]
	rownames = [rownames[i] for i in ii_pvalue]

	with Plox(style) as px:
	from matplotlib.colors import LinearSegmentedColormap
	cmap = LinearSegmentedColormap.from_list('rg', ["r", "w", "g"], N=256)

	px.a.imshow(data, cmap=cmap)
	px.a.set_aspect(0.3)

	px.a.spines['top'].set_visible(False)
	px.a.spines['left'].set_visible(False)
	px.a.spines['right'].set_visible(False)
	px.a.spines['bottom'].set_visible(False)

	px.a.set_xticks(range(len(colnames)))
	px.a.set_xticklabels(colnames)
	# px.a.xaxis.set_label_position('top')

	px.a.set_yticks(range(len(rownames)))
	px.a.set_yticklabels(rownames, fontsize="xx-small")

	px.f.savefig(figures / F"dataset_sort={sort}_expression.png", dpi=300)

	fdr_bh = [compute_fdr_bh(p) for p in pval]
	fdr_st = [compute_fdr_st(p, st) for p in pval]

	for (vals, what) in zip([pval, fdr_bh, fdr_st], ["pvalue", "fdr_bh", "fdr_st"]):
	with Plox(style) as px:
	if (what == "pvalue"):
	cmap = [color_h0, color_H0]
	elif (what == "fdr_bh"):
	cmap = [color_bh, "white"]
	elif (what == "fdr_st"):
	cmap = [color_st, "white"]
	else:
	raise NotImplementedError

	cmap = LinearSegmentedColormap.from_list('no-name', cmap, N=256)

	(vmin, vmax) = [f(np.log(vals)) for f in [min, max]]
	vals = np.log(np.array(vals).reshape([-1, 1]))
	vals[rownames.index(SPACER)] = np.nan
	px.a.imshow(vals, cmap=cmap, vmin=vmin, vmax=vmax)

	px.a.spines['top'].set_visible(False)
	px.a.spines['left'].set_visible(False)
	px.a.spines['right'].set_visible(False)
	px.a.spines['bottom'].set_visible(False)

	for (i, v) in enumerate(vals.squeeze()):
	if (rownames[i] != SPACER):
	text = px.a.text(0, i, F"{np.exp(v):.02}", ha="center", va="center", color="k", fontsize=6)

	px.a.set_aspect(0.3)

	px.a.set_xticks([0])
	label = {'pvalue': "p-values", 'fdr_bh': "BH", 'fdr_st': "ST"}
	px.a.set_xticklabels([label[what]])

	px.a.set_yticks([])

	#px.show()
	px.f.savefig(figures / F"dataset_sort={sort}_{what}.png", dpi=300)


	def bar(, x, edges=None, *histogram_param):
	edges = edges or np.linspace(0, 1, 17)
	(h, __) = np.histogram(x, bins=edges, **histogram_param)

	r = dict(
	x=(edges[0:-1] + min(np.diff(edges)) * 0.1),
	height=h,
	align="edge",
	width=(np.diff(edges) - 2 * min(np.diff(edges)) * 0.1),
	)

	return r


	def plabel(a: plt.Axes, ticks=None):
	# ticks = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.7, 1]
	ticks = ticks or list(np.linspace(0, 1, 21))
	a.set_xlim(-0.01, 1.01)
	a.set_xticks(ticks)
	a.set_xticklabels([
	F"${int(x)}$" if (x in [0]) else F"${round(x * 100)}\%$"
	for x in a.get_xticks()
	])
	return ticks


	def compute_st():
	# Remark B in [ST, 2003]
	lambdas = np.linspace(0, 0.95, 20)
	assert np.isclose(lambdas[1], 0.05)

	pis = [
	((np.sum(pvalues_h0 > lam) * mixture_coeff) + (np.sum(pvalues_H0 > lam) * (1 - mixture_coeff))) / (
	M * (1 - lam))
	for lam in lambdas
	]

	for smooth in [0.97, 0.95]:
	import csaps
	sp = csaps.CubicSmoothingSpline(lambdas, pis, smooth=smooth)
	with Plox() as px:
	ticks = plabel(px.a, ticks=list(np.linspace(0, 1, 11)))
	px.a.set_xlim([0, 1.05], auto=False)

	px.a.grid(False)

	px.a.spines['top'].set_visible(False)
	px.a.spines['left'].set_visible(False)
	px.a.spines['right'].set_visible(True)
	px.a.spines['bottom'].set_visible(True)

	px.a.set_xlabel("$p$")
	px.a.yaxis.tick_right()

	px.a.plot(
	[0, max(px.a.get_xlim())], [1 - mixture_coeff] * 2, "-",
	color=color_H0, alpha=0.7, lw=2, zorder=1,
	label="Secret $\\pi_0 = \mathrm{P}(H_{0})$"
	)

	# px.a.plot(lambdas, pis, "ok", color=color_st, label="Plateau on $[\lambda, 1]$")

	for (lam, pi) in zip(lambdas, pis):
	label = dict(label=("Plateau on $[p, 1]$" + (' ' * 10))) if (lam == first(lambdas)) else {}
	px.a.plot(lam, pi, "s-", ms=3, lw=3, color=color_st, zorder=10, **label)
	px.a.plot([lam, lam + 0.03], [pi, pi], "-", lw=3, color=color_st, zorder=10)
	px.a.plot([lam, 1], [pi, pi], "-", lw=3, color=color_st, alpha=0.1, zorder=5)

	px.a.legend(loc="upper right")
	px.f.savefig(figures / F"st_smooth={smooth}_1.png", dpi=300)

	xx = np.linspace(0, 1, 100)
	px.a.plot(xx, sp(xx), "--", label="Smoothing spline fit", zorder=15, lw=2, color="darkorange")

	px.a.legend(loc="upper right")
	px.f.savefig(figures / F"st_smooth={smooth}_2.png", dpi=300)

	px.a.plot(1, sp(1), "o--", lw=2, color=color_st, zorder=10, label="Estimated $\\pi_0$")
	px.a.plot([0, max(px.a.get_xlim())], [sp(1)] * 2, "-", color=color_st, alpha=0.7, lw=2, zorder=1)

	px.a.legend(loc="upper right")
	px.f.savefig(figures / F"st_smooth={smooth}_3.png", dpi=300)

	return sp(1)


	def fdr_intro(p, st):
	def save(count=[0]):
	px.f.savefig(figures / F"p={p}_{count[0]:02}.png", dpi=300)
	count[0] += 1

	with Plox() as px:
	ticks = plabel(px.a)
	px.a.set_xlim(0, 0.5, auto=False)
	px.a.set_yticks([])

	px.a.set_xlabel("$p$")

	px.a.spines['top'].set_visible(False)
	px.a.spines['right'].set_visible(False)
	px.a.spines['left'].set_visible(False)
	px.a.spines['bottom'].set_visible(True)

	save()

	edges = sorted(set(np.round(list(set(np.linspace(0, 1, 41))), 6)))

	#

	px.a.plot([0, 1], [1, 1], ls='--', color=color_bh, alpha=0.7, lw=2, label="1")

	bar_h0 = bar(x=pvalues_h0, edges=edges, density=True)
	lines_h0 = px.a.bar(bar_h0, bar_style, alpha=0.6, color=color_h0)
	px.a.bar(0, height=0, width=0, color=color_h0, label=("$H_{}$" + (' ' 25)))

	px.a.legend()
	save()

	#

	bar_H0 = bar(x=pvalues_H0, edges=edges, density=True)
	lines_H0 = px.a.bar(bar_H0, bar_style, alpha=0.6, color=color_H0)
	px.a.bar(0, height=0, width=0, color=color_H0, label="$H_0$")

	px.a.legend()
	save()

	#

	bar_h0['height'] *= mixture_coeff
	bar_H0['height'] *= (1 - mixture_coeff)
	bar_mix = {**bar_h0}
	bar_mix['height'] = bar_h0['height'] + bar_H0['height']

	my = max(bar_mix['height']) * 1.1
	mx = 0.3

	frame = px.a.plot((0, mx, mx, 0, 0), (my, my, 0, 0, my), ls='--', lw=2, color='k')

	save()

	#

	for x in frame:
	x.remove()

	px.a.set_ylim(0, my, auto=False)
	px.a.set_xlim(0, mx, auto=False)

	px.a.legend()
	save()

	#

	lines_mix = px.a.bar(
	bar_mix, bar_style,
	alpha=0.7, lw=2, color="none", edgecolor="black",
	label="$\pi_0 H_{0} + (1 - \pi_0) H_{*}$", zorder=100
	)

	px.a.legend()
	save()

	#

	lines_H0.remove()
	lines_h0.remove()

	lines_H0 = px.a.bar(bar_H0, bar_style, alpha=0.6, color=color_H0)
	lines_h0 = px.a.bar(bar_h0, bar_style, alpha=0.6, color=color_h0, bottom=bar_H0['height'])

	px.a.legend()
	save()

	def filt(b):
	b = {**b}
	b['height'] = b['height'][b['x'] <= p]
	b['width'] = b['width'][b['x'] <= p]
	b['x'] = b['x'][b['x'] <= p]
	return b

	lines_H0 = px.a.bar(filt(bar_H0), bar_style, color=color_H0)
	lines_h0 = px.a.bar(filt(bar_h0), bar_style, color=color_h0, bottom=filt(bar_H0)['height'])
	px.a.bar(
	x=[0], height=max(px.a.get_ylim()), width=(p), align="edge",
	edgecolor="none", color="gray", alpha=0.3, zorder=-10
	)

	text_discoveries = px.a.text(
	s="Discoveries", x=(p * 0.95), y=(0.95 * max(px.a.get_ylim())),
	va="top", ha="right", rotation=90,
	color="white",
	zorder=1000,
	)

	save()

	#

	p0 = 0.05
	(x1, y1) = (p0 * 1.5, 0.4 * max(px.a.get_ylim()))

	text1 = px.a.text(s="False discoveries", x=x1, y=(y1 * 1.05), color="k")
	fds = filt(bar_H0)
	arrows = []
	for (x, w, h) in zip(fds['x'], fds['width'], fds['height']):
	x0 = x + w / 2
	y0 = h / 2
	arrow = px.a.plot([x0, x0 + (x1 - x0) * 0.96], [y0, y0 + (y1 - y0) * 0.96], ls='-', lw=0.8, color="black",
	zorder=120, alpha=0.9)
	arrows.append(arrow)

	save()

	#

	fds = filt(bar_H0)
	(dy1, dx1) = (np.mean(fds['width']), np.sum(fds['height']))
	fds = filt(bar_mix)
	(dy2, dx2) = (np.mean(fds['width']), np.sum(fds['height']))

	dx1_H0 = dx1

	x1 *= 1.1
	y1 *= 1.1

	dx1 = dx1 / np.diff(px.a.get_ylim()) * np.diff(px.a.get_xlim())
	dx2 = dx2 / np.diff(px.a.get_ylim()) * np.diff(px.a.get_xlim())
	scale_dx = 1 / (dx2) * (max(px.a.get_xlim()) - p0) * 0.7
	dx1 *= scale_dx
	dx2 *= scale_dx
	dy1 = y1 * 0.15
	dy2 = dy1
	dy = (dy1 + dy2) / 6

	px.a.bar(
	align="edge", x=(x1 + (-dx1 / 2 + dx2 / 2)),
	bottom=y1 + dy, height=+dy1, width=dx1, color=color_H0, edgecolor="none"
	)
	px.a.bar(
	align="edge", x=x1,
	bottom=y1 - dy, height=-dy2, width=dx1, color=color_H0, edgecolor="none"
	)
	px.a.bar(
	align="edge", x=(x1 + dx1),
	bottom=y1 - dy, height=-dy2, width=dx2 - dx1, color=color_h0, edgecolor="none"
	)
	px.a.bar(
	align="edge", x=x1,
	bottom=y1 - dy, height=-dy2, width=dx2,
	lw=2, color="none", edgecolor="black", alpha=0.7,
	)
	px.a.plot(
	[x1 - dx2 * 0.05, x1 + dx2 * 1.05], [y1, y1], 'k-', lw=1,
	)
	px.a.bar(
	x=(x1 - dx2 * 0.1), bottom=(y1 - 2 * dy2), height=(4 * dy2), width=(dx2 * 1.2),
	align="edge",
	edgecolor="none", color="gray", alpha=0.3, zorder=-10,
	)

	text1.remove()
	text1 = px.a.text(s="FDR:", x=(x1 - dx2 * 0.1), y=(y1 + dy1 * 2.3), ha="left", va="baseline")

	save()

	#

	for arrow in arrows:
	for line in arrow:
	line.remove()

	text_discoveries.remove()

	bar_H0_1 = filt(bar_H0)
	bar_H0_1['height'] = np.ones_like(bar_H0_1['height'])
	lines_BH_bar = px.a.bar(bar_H0_1, bar_style, edgecolor=color_bh, ls="--", lw=2, color="none", zorder=120)

	lines_BH_box = px.a.bar(
	align="edge", x=(x1 + (-dx1 / 2 + dx2 / 2)),
	bottom=y1 + dy, height=+dy1, width=(dx1 / dx1_H0 * sum(bar_H0_1['height'])),
	color="none", alpha=0.7, edgecolor=color_bh, lw=2, ls="--",
	)
	from matplotlib.pyplot import Text
	text1: Text
	text1.set_text("FDR estimate by Benjamini-Hochberg (1995):")

	save()

	#

	lines_BH_bar.remove()
	lines_BH_box.remove()
	px.a.get_legend().remove()

	# lam = 0.2
	# ii = (bar_H0['x'] >= lam)
	# st = np.dot(bar_H0['height'][ii], bar_H0['x'][ii]) / np.sum(bar_H0['x'][ii])

	lines_ST_box = px.a.bar(
	align="edge", x=(x1 + (-dx1 / 2 + dx2 / 2)),
	bottom=y1 + dy, height=+dy1, width=(dx1 / dx1_H0 * st * len(bar_H0_1['height'])),
	color="none", alpha=1, edgecolor=color_st, lw=2, ls="--",
	)

	(lh0, ll0) = px.a.get_legend_handles_labels()

	lam = 0.2
	px.a.plot([lam, 1], [st, st], ls="-", lw=2, alpha=0.95, color=color_st, zorder=200)
	px.a.plot(lam, st, "s-", ms=3, lw=3, color=color_st, zorder=200, label="Plateau $\\approx \\pi_0$")

	bar_H0_ST = filt(bar_H0)
	bar_H0_ST['height'] = np.ones_like(bar_H0_ST['height']) * st
	lines_ST_bar = px.a.bar(bar_H0_ST, bar_style, edgecolor=color_st, ls="--", lw=2, color="none", zorder=120)

	text1.set_text("FDR estimate by Storey-Tibshirani (2003):")

	(lh1, ll1) = zip(*tuple(
	(h, lab)
	for (h, lab) in zip(*px.a.get_legend_handles_labels())
	if (lab not in ll0)
	))
	px.a.legend(list(lh0) + list(lh1), list(ll0) + list(ll1))

	save()


	def fdr_plot_by_alpha(st):
	def save(count=[0]):
	count[0] += 1
	px.f.savefig(figures / F"alpha_{count[0]:02}.png", dpi=300)

	# alpha = np.array(
	# sorted(
	# set(np.logspace(np.log10(min(min(pvalues_h0), min(pvalues_H0))), np.log10(0.055), 150)) \|
	# {0.025, 0.05}
	# )
	# )

	alpha = np.sort(np.unique(np.clip(np.concatenate([pvalues_h0, pvalues_H0]), a_min=0, a_max=0.055)))

	fdr_ex = [compute_fdr_ex(a) for a in alpha]
	fdr_bh = [compute_fdr_bh(a) for a in alpha]
	fdr_st = [compute_fdr_st(a, st) for a in alpha]

	with Plox() as px:
	plabel(px.a, ticks=list(np.linspace(0, 0.05, 6)))

	px.a.set_yticks(np.linspace(0, 1, 21))
	px.a.set_yticklabels([
	F"${int(y)}$" if (y in [0]) else F"${round(y * 100)}\%$"
	for y in px.a.get_yticks()
	])

	px.a.set_xlim(-max(alpha) * 0.03, max(alpha) * 1.03, auto=False)
	px.a.set_ylim(0, max(fdr_bh) * 1.05)
	px.a.set_xlabel("$p$ discovery cutoff")
	px.a.set_ylabel('FDR')
	px.a.grid(True)

	exact_fdr = px.a.plot(alpha, fdr_ex, color="black", label="Exact FDR")
	px.a.legend(loc="upper left")

	save()

	i = np.argmin(np.abs(alpha - 0.05))
	# https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/marker_fillstyle_reference.html
	# http://scipy-lectures.org/intro/matplotlib/auto_examples/options/plot_mew.html
	px.a.plot(alpha[i], fdr_bh[i], '--s', color=color_bh, fillstyle="none", lw=2, markeredgewidth=2, label="Benjamini-Hochberg")
	px.a.plot(alpha[i], fdr_st[i], '--o', color=color_st, fillstyle="none", lw=2, markeredgewidth=2, label='Storey-Tibshirani, "q-value"')
	px.a.plot(alpha[i], fdr_bh[i], 's', color=color_bh, fillstyle="none", markeredgewidth=2, zorder=100)
	px.a.plot(alpha[i], fdr_st[i], 'o', color=color_st, fillstyle="none", markeredgewidth=2, zorder=100)
	i = np.argmin(np.abs(alpha - 0.025))
	px.a.plot(alpha[i], fdr_bh[i], 's', color=color_bh, fillstyle="none", markeredgewidth=2, zorder=100)
	px.a.plot(alpha[i], fdr_st[i], 'o', color=color_st, fillstyle="none", markeredgewidth=2, zorder=100)

	px.a.legend(loc="upper left")

	save()

	wiggly_bh = px.a.plot(alpha, fdr_bh, '.', color=color_bh, lw=2, zorder=50)
	wiggly_st = px.a.plot(alpha, fdr_st, '.', color=color_st, lw=2, zorder=50)

	save()

	exact_fdr[0].set_alpha(0.2)

	save()

	px.a.set_xlim(min(alpha) / 2, max(alpha) * 2)
	# px.a.set_ylim(min(min(fdr_bh), min(fdr_st)) / 2, max(px.a.get_ylim()))
	px.a.set_xscale('log')
	# px.a.set_yscale('log')

	# px.a.plot(pvalues_h0, 0.0 * np.ones_like(pvalues_h0), 'o', color=color_h0, markersize=4, zorder=100)
	# px.a.plot(pvalues_H0, 0.0 * np.ones_like(pvalues_H0), 'o', color=color_H0, markersize=4, zorder=200)

	save()

	from matplotlib.colors import to_rgb

	wiggly_bh[0].set_color(1 - (1 - np.array(to_rgb(color_bh))) / 2)
	wiggly_st[0].set_color(1 - (1 - np.array(to_rgb(color_st))) / 2)

	fdr_bh = np.flip(np.minimum.accumulate(np.flip(fdr_bh)))
	fdr_st = np.flip(np.minimum.accumulate(np.flip(fdr_st)))
	px.a.plot(alpha, fdr_bh, '--', color=color_bh, lw=2, zorder=100)
	px.a.plot(alpha, fdr_st, '--', color=color_st, lw=2, zorder=100)

	save()

	wiggly_bh[0].remove()
	wiggly_st[0].remove()
	px.a.plot(alpha, fdr_bh, '.', color=color_bh, lw=2, zorder=70)
	px.a.plot(alpha, fdr_st, '.', color=color_st, lw=2, zorder=70)

	save()


	def main():
	st = compute_st()
	visualize_dataset(st)
	fdr_plot_by_alpha(st)
	fdr_intro(0.050, st)
	fdr_intro(0.025, st)


	if __name__ == '__main__':
	main()