shaybensasson · June 14, 2022 17:54
diff --git a/my_snippets.js b/my_snippets.js
 var MY_SNIPPETS = {
    'name': 'Snippets',
    'sub-menu': [
        {
            'name': 'config file',
            'snippet': ['!cat ~/.local/share/jupyter/nbextensions/snippets_menu/my_snippets.js']
        },
        '---',
        {
            'name': 'Header',
            'sub-menu': [
                {
                    'name': 'numpy|pandas|matplotlib|seaborn',
                    "snippet": [
                        "%matplotlib inline",
                        "%config InlineBackend.figure_format = 'retina'",
                        "",
                        "import matplotlib",
                        "import matplotlib.pyplot as plt",
                        "import seaborn as sns",
                        "from pylab import rcParams",
                        "",
                        "sns.set(style='whitegrid', palette='muted', font_scale=1.33)",
                        "# plt.style.use('ggplot')",
                        "",
                        "HAPPY_COLORS_PALETTE = ['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']",
                        "",
                        "sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))",
                        "",
                        "rcParams['figure.figsize'] = 10, 8",
                        "",
                        "import pandas as pd",
                        "import numpy as np",
                        "",
                        "# x = np.arange(100)",
                        "y=np.random.randint(0, 5+1, size=(100))",
                        "#sns.scatterplot(x,y)",
                        "#sns.regplot(x,y)",
                        "",
                        "ax = plt.figure(figsize=(10,5)).gca() #w,h",
                        "sns.countplot(y, ax=ax)",
                        "ax.set_xticklabels(labels=HAPPY_COLORS_PALETTE)",
                        "ax.yaxis.set_major_locator(plt.MaxNLocator(10)) #see https://matplotlib.org/3.1.1/gallery/ticks_and_spines/tick-locators.html"
                    ]
                },
                {
                    'name': 'Plot.ly 4.5',
                    'snippet': ['#online mode (just comment for offline mode)',
                        '# import chart_studio',
                        "# chart_studio.tools.set_credentials_file(username='bensshay', api_key='YuUeRFOAsKffHg3NpLbJ')",
                        '',
                        'from plotly.offline import iplot',
                        'import plotly.graph_objects as go',
                        '',
                        'import plotly.io as pio',
                        "pio.templates.default = 'none' #set theme",
                        '',
                        '# Cufflinks wrapper on plotly',
                        'import cufflinks',
                        '',
                        'cufflinks.go_offline()',
                        '',
                        '# Set global theme',
                        "cufflinks.set_config_file(world_readable=True, theme='pearl')",
                        '',
                        '#After importing cufflinks, plotly plots can be made using df.iplot() and then specifying parameters. ',
                        '# This is a great replacement for matplotlib!',
                        '',
                        '',
                        '#quick demo',
                        'fig = go.Figure(go.Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1]))',
                        "fig.update_layout(title_text='hello world')",
                        '',
                        'iplot(fig)',
                        ''
                    ]
                },
                {
                    'name': 'versions of everything',
                    'snippet': ['%reload_ext watermark',
                        'import warnings',
                        '',
                        'import os',
                        "print('VirtualEnv: {}'.format(os.getenv('VIRTUAL_ENV').split('/')[-1]))",
                        "print('')",
                        'with warnings.catch_warnings():',
                        "    warnings.simplefilter('ignore')",
                        '    %watermark -v --packages numpy,scipy,sklearn,pandas,matplotlib,seaborn,tqdm,keras,tensorflow',
                        "print('')",
                        '!cat /usr/local/cuda/version.txt',
                        '',
                        '#import pandas as pd',
                        '#pd.show_versions()'
                    ]
                },
                {
                    'name': 'ignore warnings',
                    'snippet': ["import warnings; warnings.simplefilter('ignore')"]
                },
                {
                    'name': 'reload an existing module',
                    'snippet': ['#import module',
                        'import importlib',
                        'importlib.reload(module)'
                    ]
                },
                {
                    'name': 'add packages to python path',
                    'snippet': ['import sys, os',
                        "paths = ['~/Homer/', '~/Homer/lib/hyperopt/', '~/Homer/lib/PDPbox/', '~/Homer/lib/PyCEbox/', '~/Homer/lib/ALEPython/']",
                        'sys.path.extend([os.path.expanduser(p) for p in paths])'
                    ]
                },
                {
                    "name": "Float formatting",
                    "snippet": [
                        "np.set_printoptions(formatter={'float_kind': '{:3f}'.format})",
                        "%precision 3 #ipython float formatter",
                        "pd.options.display.float_format='{:.3f}'.format"
                    ]
                },
                '---',
                {
                    'name': 'OLD Plot.ly<4.5',
                    'snippet': ['import plotly ',
                        '#online mode',
                        "plotly.tools.set_credentials_file(username='bensshay', api_key='YuUeRFOAsKffHg3NpLbJ')",
                        '',
                        '#offline mode',
                        '#from plotly.offline import init_notebook_mode, iplot',
                        '#Always run this the command before at the start of notebook',
                        '#init_notebook_mode(connected=False)',
                        '',
                        '# plotly standard imports',
                        'import plotly.graph_objs as go',
                        'import plotly.plotly as py',
                        'import plotly.figure_factory as ff',
                        '',
                        '# Cufflinks wrapper on plotly',
                        'import cufflinks',
                        '',
                        'from plotly.offline import iplot',
                        'cufflinks.go_offline()',
                        '',
                        '# Set global theme',
                        "cufflinks.set_config_file(world_readable=True, theme='pearl')",
                        '',
                        '#After importing cufflinks, plotly plots can be made using df.iplot() and then specifying parameters. ',
                        '# This is a great replacement for matplotlib!'
                    ]
                }
            ]
        },
        {
            'name': 'Thesis',
            'sub-menu': [
                {
                    'name': 'Boilerplate',
                    'sub-menu': [
                        {
                            "name": "1. dirs",
                            "snippet": [
                                "import pandas as pd",
                                "import numpy as np",
                                "import pickle",
                                "",
                                "import sys, os",
                                "",
                                "HOMER_DIR = os.path.expanduser('~/Homer')",
                                "sys.path.extend([HOMER_DIR])",
                                "",
                                "from homer import options",
                                "RANDOM_SEED = options.RANDOM_SEED",
                                "",
                                "STYLES_DIR = os.path.join(HOMER_DIR, 'styles')",
                                "HOME_DIR = os.path.join(HOMER_DIR, 'Intelligence/v2') #project home_dir",
                                "DATA_DIR = os.path.join(HOME_DIR, 'data')",
                                "ENSEMBLES_DIR = os.path.join(HOME_DIR, 'out/ensembles')",
                                "TOP_FEATS_DIR = os.path.join(ENSEMBLES_DIR, 'top_feats')",
                                "",
                                "_ANALYSIS_DIR = os.path.join(HOME_DIR, 'out/analysis')",
                                "FIGURES_DIR = os.path.join(_ANALYSIS_DIR, 'figures')",
                                "INTERMEDIATE_DIR = os.path.join(_ANALYSIS_DIR, 'intermediate')"
                            ]
                        },
                        {
                            "name": "2. styles",
                            "snippet": [
                                "# IMPORTANT: It is essential that the use.style will be on difference cell than the %matplotlib magic",
                                "import matplotlib.pyplot as plt",
                                "import seaborn as sns",
                                "",
                                "BASELINE_STYLE = os.path.join(STYLES_DIR, 'baseline.mplstyle')",
                                "THESIS_STYLE = os.path.join(STYLES_DIR, 'thesis.mplstyle')",
                                "THESIS_CB_STYLE = os.path.join(STYLES_DIR, 'thesis.colorblind.mplstyle')",
                                "THESIS_SHAP_STYLE = os.path.join(STYLES_DIR, 'thesis.shap.mplstyle')",
                                "# plt.style.use([BASELINE_STYLE])",
                                "plt.style.use([BASELINE_STYLE, THESIS_STYLE])",
                                "",
                                "#can be used inside a context manger:",
                                "#with plt.style.context([BASELINE_STYLE, THESIS_STYLE,THESIS_SHAP_STYLE]):",
                                "#with plt.rc_context({'axes.grid': False}):",
                                ""
                            ]
                        },
                    ]
                },
            ]
        },

        {
            'name': 'Jupyter',
            'sub-menu': [
                {
                    'name': 'Magics',
                    'sub-menu': [
                        {
                            "name": "autoreload",
                            "snippet": [
                                "%load_ext autoreload",
                                "",
                                "%autoreload 2 # reloads all modules every time this cell is executed"
                            ]
                        },
                        {
                            "name": "timeit (run multiple times)",
                            "snippet": [
                                "%%timeit -r2 -n3 # 2 runs x 3 iterations/loops",
                                "import time",
                                "time.sleep(1)"
                            ]
                        }],
                },
                '---',
                {
                    'name': 'print all pathes',
                    'snippet': ['!jupyter --path']
                },
                {
                    'name': 'auto save when executed',
                    'snippet': ['from IPython.display import Javascript',
                        '',
                        "script = ''",
                        'if (AUTO_SAVE_WHEN_COMPLETE):',
                        "    script = '''",
                        '    require(["base/js/namespace"],function(Jupyter) {',
                        '        Jupyter.notebook.save_checkpoint();',
                        '    });',
                        "    '''",
                        'Javascript(script)'
                    ]
                },
                {
                    'name': 'time notebook',
                    'snippet': ['#Start block',
                        'import time',
                        'start_time = time.time()',
                        '',
                        '#End block',
                        'import datetime',
                        'duration = str(datetime.timedelta(seconds=time.time()-start_time))',
                        "print(f'The whole notebook took: {duration}')"
                    ]
                }]
        },
        {
            'name': 'Plotting',
            'sub-menu': [
                {
                    'name': 'Matplotlib',
                    'sub-menu': [
                        {
                            'name': 'subplotting (plt.subplots)',
                            'snippet': ['from tqdm.auto import tqdm, trange',
                                '',
                                'N = 6',
                                'NCOLS = min(5,N)',
                                'NROWS = int(np.ceil(N/NCOLS))',
                                '# print(N, NROWS, NCOLS)',
                                'f, axes = plt.subplots(nrows=NROWS, ncols=NCOLS, figsize=(24,3*NROWS), squeeze=False) #w,h',
                                '',
                                'for i in trange(N):',
                                '    ax = axes[int(i/NCOLS),i%NCOLS]    ',
                                '    x = np.random.randint(10, size=(10,))',
                                '    ax.scatter(x,x)',
                                "    ax.set_title('#%s' % i)",
                                '',
                                '#delete leftovers',
                                'for i in range(N, N + NROWS*NCOLS-N):',
                                '    ax: plt.Axes = axes[int(i/NCOLS),i%NCOLS]',
                                '    f.delaxes(ax)',
                                '    ',
                                'plt.tight_layout(w_pad=2.5, h_pad=2) #pads are specified in fraction of fontsize'
                            ]
                        },
                        {
                            'name': 'subplotting (matlab style)',
                            'snippet': ['from tqdm.auto import tqdm, trange',
                                '',
                                'N = 6',
                                'NCOLS = min(5,N)',
                                'NROWS = int(np.ceil(N/NCOLS))',
                                '# print(N, NROWS, NCOLS)',
                                '',
                                '# Matlab style',
                                'plt.subplots(figsize=(24,3*NROWS))',
                                '# plt.subplots_adjust(wspace=0.2,hspace=0.5)',
                                'for i in trange(N):',
                                '    ax = plt.subplot(NROWS,NCOLS,i+1)',
                                '    ',
                                '    x = np.random.randint(10, size=(10,))',
                                '    ax.scatter(x,x)',
                                "    ax.set_title('#%s' % i)",
                                '    ',
                                'plt.tight_layout(w_pad=2.5, h_pad=2) #pads are specified in fraction of fontsize'
                            ]
                        },
                        {
                            'name': 'set plot font_size',
                            'snippet': ["ax = plt.subplot(111, xlabel='x', ylabel='y', title='title')",
                                "ax.scatter([1,2,3], [1,0,3], label='123')",
                                'ax.legend()',
                                'for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +',
                                '             ax.get_xticklabels() + ax.get_yticklabels() + ',
                                '             ax.legend().get_texts()):',
                                '    item.set_fontsize(14)',
                                '# more here: https://stackoverflow.com/questions/3899980/how-to-change-the-font-size-on-a-matplotlib-plot'
                            ]
                        },
                        '---',
                        {
                            "name": "3d scatter",
                            "snippet": [
                                "%matplotlib notebook",
                                "# interactive plots",
                                "",
                                "from mpl_toolkits import mplot3d",
                                "plt.rcParams['figure.figsize'] = 15, 8",
                                "",
                                "import numpy as np",
                                "",
                                "def f(x, y):",
                                "    return np.sin(np.sqrt(x ** 2 + y ** 2))",
                                "",
                                "N = 5000",
                                "theta = 2 * np.pi * np.random.random(N)",
                                "r = 6 * np.random.random(N)",
                                "x = np.ravel(r * np.sin(theta))",
                                "y = np.ravel(r * np.cos(theta))",
                                "z = f(x, y)",
                                "",
                                "ax = plt.axes(projection='3d')",
                                "ax.scatter(x, y, z, c=z, alpha=.4, s=50, cmap='viridis');",
                                "",
                                "# TIP: adding legend: https://stackoverflow.com/a/20505720/1640414",
                                "ax.view_init(elev=45, azim=45) #pan using left mouse button, zoom using right mouse button"
                            ]
                        },
                        {
                            'name': '3d (static) plots examples &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;',
                            'external-link': 'https://www.kaggle.com/saurav9786/interactive-3-d-plots-for-data-visualization',
                        },
                    ],
                },
                {
                    'name': 'Seaborn',
                    'sub-menu': [
                        {
                            "name": "Distinct seaborn color pallete",
                            "snippet": [
                                "%matplotlib inline",
                                "%config InlineBackend.figure_format = 'retina'",
                                "",
                                "import matplotlib.pyplot as plt",
                                "import seaborn as sns",
                                "import numpy as np",
                                "",
                                "#see https://mokole.com/palette.html",
                                "DISTINCT_COLORS_PALLETE = ['#808080','#556b2f','#7f0000','#483d8b','#008000','#008b8b','#000080','#d2691e','#daa520','#8fbc8f','#800080','#b03060','#ff4500','#ffff00','#00ff00','#00ff7f','#dc143c','#00ffff','#00bfff','#0000ff','#a020f0','#adff2f','#1e90ff','#90ee90','#add8e6','#ff1493','#7b68ee','#ee82ee','#ffdead','#ffc0cb'][::-1]",
                                "sns.set(style='whitegrid', font_scale=1.33)",
                                "sns.set_palette(DISTINCT_COLORS_PALLETE)",
                                "",
                                "# sns.palplot(sns.color_palette(DISTINCT_COLORS_PALLETE)) #render the pallete",
                                "",
                                "N_COLORS = min(30, len(DISTINCT_COLORS_PALLETE))",
                                "y=np.random.randint(0, N_COLORS+1, size=(100))",
                                "ax = plt.figure(figsize=(20,10)).gca() #w,h",
                                "sns.countplot(y, ax=ax);"
                            ]
                        },

                        '---',
                        {
                            'name': 'histogram/kde',
                            'snippet': ['data = np.random.randn(100)',
                                "#plt.hist(data, density=True, bins='auto',",
                                '#                            alpha=0.7, rwidth=0.95);',
                                '',
                                "sns.kdeplot(data, color = 'red', linewidth = 2, shade = True);"
                            ]
                        },
                        {
                            "name": "countplot (series value_counts())",
                            "snippet": [
                                "ax = sns.countplot(df.target)",
                                "ax.set_xticklabels(class_names);"
                            ]
                        },
                        {
                            'name': 'scatter/regplot',
                            'snippet': ['x = np.arange(100)',
                                'y=np.random.randint(0, 100, size=(100))',
                                '#sns.scatterplot(x,y)',
                                'sns.regplot(x,y)'
                            ]
                        },
                        {
                            "name": "BoxEn plot: better than box plot",
                            "snippet": [
                                "# https://towardsdatascience.com/5-lesser-known-seaborn-plots-most-people-dont-know-82e5a54baea8",
                                "tips = sns.load_dataset('tips')",
                                "#sns.boxplot(x='day', y='total_bill', data=tips) ",
                                "sns.boxenplot(x='day', y='total_bill', data=tips) "
                            ]
                        },
                        {
                            'name': 'correlation matrix',
                            'snippet': [
                                '#https://seaborn.pydata.org/examples/many_pairwise_correlations.html',
                                '#https://blog.algorexhealth.com/2017/09/10-heatmaps-10-python-libraries/',
                                'plt.figure(figsize=(18,18))  # (w,h)',
                                '',
                                'corr = df1.corr()',
                                'mask = np.zeros_like(corr, dtype=np.bool)',
                                'mask[np.triu_indices_from(mask)] = True',
                                '',
                                "#p=sns.heatmap(corr, annot=True,cmap ='RdYlGn', mask=mask)",
                                '',
                                '# Generate a custom diverging colormap',
                                'cmap = sns.diverging_palette(220, 10, as_cmap=True)',
                                '',
                                'p=sns.heatmap(corr, annot=True, cmap=cmap, mask=mask, center=0,',
                                '              square=True, linewidths=.5, cbar_kws={"shrink": .5}, fmt=".2f")'
                            ]
                        },
                        {
                            "name": "heatmap (better than matshow)",
                            "snippet": ['#https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/',
                                "ax = plt.figure(figsize=(10, 10)).gca()",
                                "mat = np.random.random((10, 10)) * 2 - 1",
                                "p = sns.heatmap(mat,",
                                "                cmap=sns.diverging_palette(220, 10, as_cmap=True),",
                                "                annot=True, linewidths=.5, ",
                                "                cbar_kws={'shrink': .5},",
                                "                center=0,",
                                "                square=True,",
                                "                vmin=-1, vmax=1",
                                "               )",
                                "labels = [chr(i) for i in ord('a') + np.arange(10)]",
                                "ax.set_xticklabels(labels, rotation=45)",
                                "ax.set_yticklabels(labels, rotation=45)"
                            ]
                        },
                        {
                            "name": "Clustered Heatmap/Corr mat",
                            "snippet": [
                                "#https://towardsdatascience.com/5-lesser-known-seaborn-plots-most-people-dont-know-82e5a54baea8",
                                "# load boston housing ...",
                                "",
                                "corr = df.iloc[:, :-1].corr() #features only",
                                "#https://seaborn.pydata.org/examples/many_pairwise_correlations.html",
                                "#https://blog.algorexhealth.com/2017/09/10-heatmaps-10-python-libraries/",
                                "plt.figure(figsize=(18,18))  # (w,h)",
                                "",
                                "mask = np.zeros_like(corr, dtype=np.bool)",
                                "mask[np.triu_indices_from(mask)] = True",
                                "",
                                "#p=sns.heatmap(corr, annot=True,cmap ='RdYlGn', mask=mask)",
                                "",
                                "# Generate a custom diverging colormap",
                                "cmap = sns.diverging_palette(220, 10, as_cmap=True)",
                                "",
                                "#p=sns.heatmap(corr, annot=True, cmap=cmap, mask=mask, center=0,",
                                "#              square=True, linewidths=.5, cbar_kws={'shrink': .5}, fmt='.2f')",
                                "",
                                "sns.clustermap(corr, ",
                                "               figsize=(18,18), annot=True,",
                                "               cmap=cmap, center=0, square=True, linewidths=.5, fmt='.2f') #, #2d array-like rectangular data",
                                "               #metricstr, #distance metric to use for data (default euclidean)",
                                "               #z_scoreint, #whether to calculate z-scores or not",
                                "               #standard_scaleint) #whether to standardize data or not "
                            ]
                        },
                        {
                            "name": "Ridge plots",
                            "snippet": [
                                "sns.set(style='white', rc={'axes.facecolor': (0, 0, 0, 0)})",
                                "",
                                "# Create the data",
                                "rs = np.random.RandomState(1979)",
                                "x = rs.randn(500)",
                                "g = np.tile(list('ABCDEFGHIJ'), 50)",
                                "df = pd.DataFrame(dict(x=x, g=g))",
                                "m = df.g.map(ord)",
                                "df['x'] += m",
                                "",
                                "# Initialize the FacetGrid object",
                                "# pal = sns.c(10, rot=-.25, light=.7)",
                                "g = sns.FacetGrid(df, row='g', hue='g', aspect=15, height=.5, palette='coolwarm')",
                                "",
                                "# Draw the densities in a few steps",
                                "g.map(sns.kdeplot, 'x', clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)",
                                "g.map(sns.kdeplot, 'x', clip_on=False, color='w', lw=2, bw=.2)",
                                "g.map(plt.axhline, y=0, lw=2, clip_on=False)",
                                "",
                                "",
                                "# Define and use a simple function to label the plot in axes coordinates",
                                "def label(x, color, label):",
                                "    ax = plt.gca()",
                                "    ax.text(0, .2, label, fontweight='bold', color='k',",
                                "            ha='left', va='center', transform=ax.transAxes)",
                                "",
                                "",
                                "g.map(label, 'x')",
                                "",
                                "# Set the subplots to overlap",
                                "g.fig.subplots_adjust(hspace=-.25)",
                                "",
                                "# Remove axes details that don't play well with overlap",
                                "g.set_titles('')",
                                "g.set(yticks=[])",
                                "g.despine(bottom=True, left=True)"
                            ]
                        },
                    ],
                },
                {
                    'name': 'Plot.ly',
                    'sub-menu': [
                        {
                            "name": "Timeseries line chart (x is Date)",
                            "snippet": [
                                "# Using graph_objects",
                                "import plotly.graph_objects as go",
                                "",
                                "import pandas as pd",
                                "df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')",
                                "",
                                "fig = go.Figure([go.Scatter(x=df['Date'], y=df['AAPL.High'])])",
                                "fig.show()"
                            ]
                        },
                        {
                            "name": "Default discrete color pallete",
                            "snippet": [
                                "import plotly.graph_objects as go",
                                "import numpy as np",
                                "",
                                "fig = go.Figure()",
                                "",
                                "def hex2rgba(h, alpha=.7):",
                                "    h = h.lstrip('#')",
                                "    rgb = ','.join([str(int(h[i:i+2], 16)) for i in (0, 2, 4)])",
                                "    return f'rgba({rgb},{alpha})'",
                                "    ",
                                "#default_plotly colormap, adding opacity",
                                "colors_hex = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']",
                                "colors = [hex2rgba(c) for c in colors_hex]",
                                "N = len(colors)",
                                "",
                                "fig.add_trace(go.Heatmap(",
                                "    z=[np.arange(N).tolist()],",
                                "    colorscale=[",
                                "        [0, colors[0]],",
                                "        [0.1, colors[0]],",
                                "",
                                "        [0.1, colors[1]],",
                                "        [0.2, colors[1]],",
                                "",
                                "        [0.2, colors[2]],",
                                "        [0.3, colors[2]],",
                                "",
                                "        [0.3, colors[3]],",
                                "        [0.4, colors[3]],",
                                "",
                                "        [0.4, colors[4]],",
                                "        [0.5, colors[4]],",
                                "",
                                "        [0.5, colors[5]],",
                                "        [0.6, colors[5]],",
                                "",
                                "        [0.6, colors[6]],",
                                "        [0.7, colors[6]],",
                                "",
                                "        [0.7, colors[7]],",
                                "        [0.8, colors[7]],",
                                "",
                                "        [0.8, colors[8]],",
                                "        [0.9, colors[8]],",
                                "",
                                "        [0.9, colors[9]],",
                                "        [1.0, colors[9]],",
                                "    ],",
                                "    colorbar=dict(",
                                "        tick0=0,",
                                "        dtick=1",
                                "    )",
                                "))",
                                "",
                                "fig.show()"
                            ]
                        },
                        '---',
                        {
                            'name': 'confusion matrix',
                            'snippet': ['import numpy as np',
                                'from sklearn.metrics import confusion_matrix',
                                'import plotly.figure_factory as ff',
                                '',
                                "NEG_CLASS, POS_CLASS = 'Neg', 'Pos'",
                                '',
                                'y_true = np.random.randint(0, high=1+1, size=(100,))',
                                'y_pred = np.random.randint(0, high=1+1, size=(100,))',
                                '# y_pred = y_true',
                                'cm = confusion_matrix(y_true, y_pred)',
                                '',
                                'cm_ = cm.ravel()',
                                "norm_cm_ = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]).ravel()",
                                "z_text = [[f'TN: {cm_[0]} ({norm_cm_[0]:.2f})', f'FP: {cm_[1]} ({norm_cm_[1]:.2f})'],",
                                "          [f'FN: {cm_[2]} ({norm_cm_[2]:.2f})', f'TP: {cm_[3]} ({norm_cm_[3]:.2f})']]",
                                '',
                                '#use different colors for pos/neg',
                                'cm_masked = cm * np.array([[1,-1],[-1,1]])',
                                'fig = ff.create_annotated_heatmap(',
                                '    x=[NEG_CLASS, POS_CLASS],',
                                '    y=[NEG_CLASS, POS_CLASS],',
                                '    z=cm_masked,',
                                '    annotation_text=z_text, ',
                                '    reversescale=False,',
                                '    showscale=True,',
                                "    colorscale='RdBu',",
                                '    zmid=0,',
                                '    xgap=2,ygap=2,',
                                ')',
                                '',
                                'fig.update_layout(dict(',
                                "    title='Confusion Matrix',",
                                '    xaxis=go.layout.XAxis(',
                                "        title='Predicted label',",
                                "        side='bottom',",
                                '    ),',
                                '    yaxis=go.layout.YAxis(',
                                "        title='True label',",
                                "        autorange='reversed',",
                                '    )))',
                                "fig['data'][0]['colorbar']['showticklabels'] = False #no tick labels",
                                '',
                                '#adjust annot fonts',
                                'mx = np.max(cm_)',
                                'med = mx/2',
                                'for i in range(len(fig.layout.annotations)):',
                                '    fig.layout.annotations[i].font.size = 16',
                                "    fig.layout.annotations[i].font.color = 'white' if (mx-cm_[i]) < med else 'black'",
                                '',
                                'iplot(fig, show_link=True)'
                            ]
                        }]
                },
                {
                    'name': 'Altair',
                    'sub-menu': [
                        {
                            "name": "Scatter",
                            "snippet": [
                                "import pandas as pd",
                                "import altair as alt",
                                "",
                                "data = pd.DataFrame({'country_id': [1, 2, 3, 4, 5, 6],",
                                "                     'population': [1, 100, 200, 300, 400, 500],",
                                "                     'income':     [50, 50, 200, 300, 300, 450]})",
                                "",
                                "# data",
                                "",
                                "alt.Chart(data).mark_circle(size=200).encode(",
                                "                        x='population:Q',",
                                "                        y='income:Q',",
                                "                        color='country_id:N',",
                                "                        tooltip=['country_id', 'population', 'income'])"
                            ]
                        }]
                }]
        },
        {
            'name': 'Datasets',
            'sub-menu': [
                {
                    'name': 'Regression',
                    'sub-menu': [
                        {
                            "name": "Boston Housing",
                            "snippet": [
                                "import pandas as pd",
                                "import sklearn.datasets",
                                "def boston():",
                                "    #from shap: Return the boston housing data in a nice package.",
                                "",
                                "    d = sklearn.datasets.load_boston()",
                                "    df = pd.DataFrame(data=d.data, columns=d.feature_names) # pylint: disable=E1101",
                                "    return df, d.target # pylint: disable=E1101",
                                "",
                                "df, target = boston()",
                                "df['target'] = target",
                                "df.head()"
                            ]
                        },
                    ]
                },
                {
                    'name': 'Binary Classification',
                    'sub-menu': [
                        {
                            "name": "Adult census",
                            "snippet": [
                                "import numpy as np",
                                "import pandas as pd",
                                "import sklearn.datasets",
                                "def adult(display=False):",
                                "    # from shap: Return the Adult census data in a nice package.",
                                "    dtypes = [",
                                "        ('Age', 'float32'), ('Workclass', 'category'), ('fnlwgt', 'float32'),",
                                "        ('Education', 'category'), ('Education-Num', 'float32'), ('Marital Status', 'category'),",
                                "        ('Occupation', 'category'), ('Relationship', 'category'), ('Race', 'category'),",
                                "        ('Sex', 'category'), ('Capital Gain', 'float32'), ('Capital Loss', 'float32'),",
                                "        ('Hours per week', 'float32'), ('Country', 'category'), ('Target', 'category')",
                                "    ]",
                                "    raw_data = pd.read_csv(",
                                "        '/datasets/adult/adult.data',",
                                "        names=[d[0] for d in dtypes],",
                                "        na_values='?',",
                                "        dtype=dict(dtypes)",
                                "    )",
                                "    data = raw_data.drop(['Education'], axis=1)  # redundant with Education-Num",
                                "    filt_dtypes = list(filter(lambda x: not (x[0] in ['Target', 'Education']), dtypes))",
                                "    data['Target'] = data['Target'] == ' >50K'",
                                "    rcode = {",
                                "        'Not-in-family': 0,",
                                "        'Unmarried': 1,",
                                "        'Other-relative': 2,",
                                "        'Own-child': 3,",
                                "        'Husband': 4,",
                                "        'Wife': 5",
                                "    }",
                                "    for k, dtype in filt_dtypes:",
                                "        if dtype == 'category':",
                                "            if k == 'Relationship':",
                                "                data[k] = np.array([rcode[v.strip()] for v in data[k]])",
                                "            else:",
                                "                data[k] = data[k].cat.codes",
                                "",
                                "    if display:",
                                "        return raw_data.drop(['Education', 'Target', 'fnlwgt'], axis=1), data['Target'].values",
                                "    else:",
                                "        return data.drop(['Target', 'fnlwgt'], axis=1), data['Target'].values",
                                "",
                                "df, target = adult()",
                                "df['target'] = target",
                                "df.head()"
                            ]
                        },
                        {
                            "name": "Pima diabetes",
                            "snippet": [
                                "import numpy as np",
                                "import pandas as pd",
                                "import sklearn.datasets",
                                "def pima():",
                                "    # Returns the Pima diabetes data in a nice package.",
                                "    ",
                                "    raw_data = pd.read_csv(",
                                "        '/datasets/pima/diabetes.csv'",
                                "    )",
                                "    return raw_data.drop(['Outcome'], axis=1), raw_data['Outcome'].values",
                                "",
                                "df, target = pima()",
                                "df['target'] = target",
                                "df.head()"
                            ]
                        },
                        {
                            "name": "Titanic",
                            "snippet": [
                                "import numpy as np",
                                "import pandas as pd",
                                "import sklearn.datasets",
                                "def titanic():",
                                "    # Returns the Titanic data in a nice package.",
                                "    # https://www.kaggle.com/c/titanic/data",
                                "    ",
                                "    raw_data = pd.read_csv(",
                                "        '/datasets/titanic/titanic.csv'",
                                "    )",
                                "    return raw_data.drop(['Survived'], axis=1), raw_data['Survived'].values",
                                "",
                                "df, target = titanic()",
                                "df['target'] = target",
                                "#df = df[['target', 'Pclass', 'Sex', 'Age', 'Ticket', 'Fare', 'Embarked']]",
                                "df.head()"
                            ]
                        },
                    ]
                },
                {
                    'name': 'Multi-Class',
                    'sub-menu': [
                        {
                            "name": "Iris",
                            "snippet": [
                                "import pandas as pd",
                                "import sklearn.datasets",
                                "def iris(display=True):",
                                "    #from shap: Return the classic iris data in a nice package.",
                                "    # display: targets are str, otherwise int",
                                "",
                                "    d = sklearn.datasets.load_iris()",
                                "    df = pd.DataFrame(data=d.data, columns=d.feature_names) # pylint: disable=E1101",
                                "    if display:",
                                "        return df, [d.target_names[v] for v in d.target] # pylint: disable=E1101",
                                "    else:",
                                "        return df, d.target # pylint: disable=E1101",
                                "",
                                "df, target = iris()",
                                "df['target'] = target",
                                "df.head()"
                            ]
                        },
                    ]
                },
            ]
        },
        '---',
        {
            'name': 'Bash',
            'sub-menu': [
                {
                    'name': 'nice tree of recursive dir listing',
                    'snippet': ['!tree -d /datasets/dogscats/']
                }]
        },
        {
            'name': 'Debugger',
            'sub-menu': [
                {
                    'name': 'set a breakpoint/set_trace()',
                    'snippet': ['#http://wangchuan.github.io/coding/2017/07/12/ipdb-cheat-sheet.html',
                        '',
                        'from IPython.core.debugger import set_trace',
                        'def my_function(x):',
                        '    answer = 42',
                        '    #set_trace()  # <-- uncomment!',
                        '    #Python 3.7 has `breakpoint()` built-in!',
                        '    #type `exit` to quit the debugger',
                        '    answer += x',
                        '    return answer',
                        '',
                        'my_function(12)'
                    ]
                }]
        },
        {
            'name': 'Testing',
            'sub-menu': [
                {
                    'name': 'unittest (great assert, no class)',
                    'snippet': ['import unittest',
                        "T = unittest.TestCase('__init__')",
                        '#T.assertEqual((1,2), (2,1))'
                    ]
                }]
        },
        {
            'name': 'Formatting & Printing',
            'sub-menu': [
                {
                    'name': 'Formatting strings examples (python 3.5)',
                    "snippet": [
                        "#https://pyformat.info/",
                        "#old",
                        "'s=%s, i=%d' % ('str', 15)",
                        "",
                        "#new",
                        "'{} {}'.format('one', 'two')",
                        "'{1} {0}'.format('one', 'two')",
                        "",
                        "'{:d} {:.2f}'.format(15, 3.1415)",
                        "",
                        "#newest",
                        "data = {'first': 'Hodor', 'last': 'Hodor!'}",
                        "'{first} {last}'.format(**data)",
                        "",
                        "from datetime import datetime",
                        "'{:%d-%m-%Y %H:%M:%S}'.format(datetime(2001, 2, 3, 16, 5))",
                        ""
                    ]
                },
                {
                    'name': 'pprint_color()',
                    "snippet": [
                        "from pprint import pformat, pprint",
                        "",
                        "from pygments import highlight",
                        "# from pygments.formatters.terminal import TerminalFormatter # dark theme",
                        "from pygments.formatters.terminal256 import Terminal256Formatter #light theme",
                        "from pygments.lexers.python import PythonLexer",
                        "",
                        "",
                        "def pprint_color(obj, *args, **kwargs):",
                        "#     print(highlight(pformat(obj), PythonLexer(), TerminalFormatter()))",
                        "    print(highlight(pformat(obj, *args, **kwargs), PythonLexer(), Terminal256Formatter()))"
                    ]
                },
                {
                    'name': 'Print progress in the same line',
                    'snippet': ['num_episodes = 50000',
                        'for i in range(1, num_episodes + 1):',
                        "    # Print out which episode we're on, useful for debugging.",
                        '    if i % 100 == 0:',
                        "       print('\rEpisode {}/{}.'.format(i, num_episodes), end='')",
                        '       sys.stdout.flush()'
                    ]
                },
                {
                    'name': 'Render JSON (great for hierchical dicts)',
                    'external-link': 'https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html',
                }]
        },
        {
            'name': 'Iter',
            'sub-menu': [
                {
                    'name': 'zip and unzip into lists',
                    'snippet': ['sub1a = [1, 3, 8]; sub2a = [2, 4, 9]',
                        'l1 = list(zip(sub1a, sub2a)) #[(1, 2), (3, 4), (8, 9)]',
                        'sub1b, sub2b = list(zip(*l1)) #(1, 3, 8), (2, 4, 9)',
                        'print(list(sub1b)) #[1, 3, 8]',
                        'print(list(sub2b)) #[2, 4, 9]'
                    ]
                }]
        },
        {
            'name': 'TQDM',
            'sub-menu': [
                {
                    'name': 'TQDM for notebook',
                    'snippet': ['from tqdm.auto import tqdm, trange']
                },
                {
                    "name": "TQDM with description",
                    "snippet": [
                        "import time",
                        "from tqdm.auto import tqdm",
                        "",
                        "series_list = [str(i) for i in range(100)]",
                        "with tqdm(total=len(series_list)) as t:",
                        "    for series in series_list:",
                        "        t.set_description(f'Series: `{series}`')",
                        "        t.update()",
                        "",
                        "        time.sleep(0.1)"
                    ]
                },
            ]
        },
        {
            'name': 'Dictionaries',
            'sub-menu': [
                {
                    'name': 'flattening dict',
                    'snippet': ["def flatten_dict(dd, separator='_', prefix=''):",
                        '    """',
                        '    Flattens a dict, adding separator (and prefix or `level0`) between levels',
                        '    """',
                        '    return {',
                        '        prefix + separator + k if prefix else k: v',
                        '        for kk, vv in dd.items()',
                        '        for k, v in flatten_dict(vv, separator, kk).items()',
                        '    } if isinstance(dd, dict) else {prefix: dd}',
                        '',
                        "d = {'a': 1, 'b': {'c':2, 'd':3}}",
                        "flatten_dict(d, '.')"
                    ]
                }]
        },
        {
            'name': 'Parsing',
            'sub-menu': [
                {
                    'name': 'from string to dict',
                    'snippet': ['import ast',
                        '',
                        '# Convert from a string to a dictionary',
                        'ast.literal_eval("{\'a\': 1, \'b\': 2}")'
                    ]
                }]
        },
        {
            'name': 'RegEx',
            'sub-menu': [
                {
                    "name": "pattern exists?",
                    "snippet": [
                        "import re",
                        "p = re.compile(r'^[A]{0,1}F[p|P]{0,1}[\\d|z]$')",
                        "assert p.match('AF1') is not None",
                        "assert p.match('AF1m') is None"
                    ]
                },
                {
                    'name': 'search with groups',
                    'snippet': ['import re',
                        '',
                        "regex = r'\\$\\$ (\\(\\w{1,3}\\))$'",
                        "test_str = '$$\\int S_{xx}(\\omega)d\\omega^{-1/2}$$ (Fp1)'",
                        '',
                        'm = re.search(regex, test_str)',
                        '',
                        "assert m is not None, 'Could not find regex on `%s`' % test_str",
                        '',
                        "print(f'Match {matchNum} was found at {m.start()}-{m.end()}: {m.group()}')",
                        '',
                        'for groupNum in range(0, len(m.groups())):',
                        '    groupNum = groupNum + 1',
                        '',
                        "    print('Group {groupNum} found at {start}-{end}: {group}'.format(groupNum = groupNum, start = m.start(groupNum), end = m.end(groupNum), group = m.group(groupNum)))",
                        '',
                        'g = m.groups()',
                        'print(g)'
                    ]
                },
                '---',
                {
                    'name': 'regex101.com',
                    'external-link': 'https://regex101.com/',
                }
            ]
        },
        {
            'name': 'IO',
            'sub-menu': [
                {
                    'name': 'Pickling',
                    'sub-menu': [
                        {
                            'name': 'dump and load',
                            'snippet': ['import pickle',
                                '',
                                'a = (df_movies, missing)',
                                '',
                                "with open('tmp.pickle', 'wb') as handle:",
                                '    pickle.dump(a, handle)',
                                '',
                                "with open('tmp.pickle', 'rb') as handle:",
                                '    pickle.load(handle)'
                            ]
                        }]
                },
                {
                    "name": "Numpy savez",
                    "snippet": [
                        "np.savez('out/shap_interaction_values.testset.npz', shap_interaction_values=shap_interaction_values)",
                        "shap_interaction_values = np.load('out/shap_interaction_values.testset.npz')['shap_interaction_values']"
                    ]
                },
                '---',
                {
                    "name": "Read file contents to string",
                    "snippet": [
                        "with open('data.txt', 'r') as file:",
                        "    data = file.read()"
                    ]
                },
                {
                    "name": "Using pathlib to create paths",
                    "snippet": [
                        "import pathlib",
                        "",
                        "import homer",
                        "",
                        "PACKAGE_ROOT = pathlib.Path(homer.__file__).resolve().parent #resolve() normalizes the path (sym links extraction, path correction)",
                        "TRAINED_MODEL_DIR = PACKAGE_ROOT / 'trained_models'",
                        "DATASET_DIR = PACKAGE_ROOT / 'datasets'"
                    ]
                },
            ]
        },
        {
            'name': 'Timing',
            'sub-menu': [
                {
                    'name': 'with contextmanager',
                    'snippet': ['import time',
                        'from contextlib import contextmanager',
                        '',
                        '@contextmanager',
                        'def timer(title):',
                        '    t0 = time.time()',
                        '    yield',
                        "    print('{} - done in {:.0f}s'.format(title, time.time() - t0))",
                        '',
                        "with timer('ABC'):",
                        '    time.sleep(2)'
                    ]
                }]
        },
        {
            'name': 'Download',
            'sub-menu': [
                {
                    'name': 'Download and extract zip with tqdm',
                    'snippet': ['from urllib.request import urlretrieve',
                        'from os.path import isfile, isdir',
                        'from tqdm import tqdm',
                        'import zipfile',
                        '',
                        "dataset_folder_path = 'data'",
                        "dataset_filename = 'text8.zip'",
                        "dataset_name = 'Text8 Dataset'",
                        '',
                        'class DLProgress(tqdm):',
                        '    last_block = 0',
                        '',
                        '    def hook(self, block_num=1, block_size=1, total_size=None):',
                        '        self.total = total_size',
                        '        self.update((block_num - self.last_block) * block_size)',
                        '        self.last_block = block_num',
                        '',
                        'if not isfile(dataset_filename):',
                        "    with DLProgress(unit='B', unit_scale=True, miniters=1, desc=dataset_name) as pbar:",
                        '        urlretrieve(',
                        "            'http://mattmahoney.net/dc/text8.zip',",
                        '            dataset_filename,',
                        '            pbar.hook)',
                        '',
                        'if not isdir(dataset_folder_path):',
                        '    with zipfile.ZipFile(dataset_filename) as zip_ref:',
                        '        zip_ref.extractall(dataset_folder_path)',
                        '',
                        "with open('data/text8') as f:",
                        '    text = f.read()'
                    ]
                },
                {
                    'name': 'download url to file',
                    'snippet': ['import urllib.request',
                        "urllib.request.urlretrieve('about:blank', 'file.txt')"
                    ]
                }]
        },
        '---',
        {
            'name': 'EDA',
            'sub-menu': [
                {
                    "name": "Predictive Power Score heatmap (seaborn)",
                    "snippet": [
                        "#https://towardsdatascience.com/rip-correlation-introducing-the-predictive-power-score-3d90808b9598",
                        "import ppscore as pps",
                        "df_pps = pps.matrix_tqdm(df)",
                        "",
                        "#https://blog.algorexhealth.com/2017/09/10-heatmaps-10-python-libraries/",
                        "ax = plt.figure(figsize=(18,18)).gca()  # (w,h)",
                        "",
                        "mask = np.zeros_like(df_pps.values, dtype=np.bool)",
                        "mask[np.diag_indices_from(mask)] = True",
                        "",
                        "# Generate a custom colormap",
                        "cmap = sns.color_palette('Blues')",
                        "# cmap = sns.color_palette('YlOrRd')",
                        "",
                        "p=sns.heatmap(df_pps, annot=True, cmap=cmap, mask=mask,",
                        "              square=True, linewidths=.5, cbar_kws={'shrink': .5},  fmt='.2f',",
                        "              ax=ax)",
                        "",
                        "labels=df_pps.columns",
                        "ax.xaxis.tick_top(); ax.tick_params(direction='out', width=1, colors='k', top=True, left=True)",
                        "",
                        "ax.set_xticklabels(labels, rotation=90);",
                        "ax.set_yticklabels(labels, rotation=0);",
                        "",
                        "ax.set_ylabel('Predictee')",
                        "ax.set_xlabel('Predictor');",
                        "",
                        "# The `target` row of the matrix tells you that the best univariate predictor of the it",
                        "# on regression, MAE=0 yield 1.0 score and regressor that always predicts the median yields 0.0 score.",
                        "# on classification, F1=1 yield 1.0 score and classifier that always predicts the most freq class yields 0.0 score."
                    ]
                },
                {
                    "name": "Predictive Power Score heatmap (plotly)",
                    "snippet": [
                        "#https://towardsdatascience.com/rip-correlation-introducing-the-predictive-power-score-3d90808b9598",
                        "fig = df_pps.T.iplot(kind='heatmap', colorscale='Blues', asFigure=True)",
                        "",
                        "FONT_SIZE = 10",
                        "",
                        "#NOTE: annotations are too heavy for a matrix with ~100 features",
                        "annotations = []",
                        "for n, row in enumerate(df_pps.itertuples()):",
                        "    ix = row[0]  #predicted",
                        "    for m, val in enumerate(row[1:]): # but index",
                        "        annotations.append(",
                        "            go.layout.Annotation(text=f'{val:.2f}',",
                        "                                 x=ix,",
                        "                                 y=df_pps.columns[m],",
                        "                                 xref='x1',",
                        "                                 yref='y1',",
                        "                                 showarrow=False,",
                        "                                 font=dict(size=FONT_SIZE, color='black' if val<.8 else 'white')))",
                        "",
                        "fig.update_layout(autosize=False,",
                        "                  width=500,",
                        "                  height=500,",
                        "                  paper_bgcolor='rgba(0,0,0,0)',",
                        "                  plot_bgcolor='rgba(0,0,0,0)',",
                        "                  xaxis={",
                        "                      'title': {'text': '<b>Predictor</b>'},",
                        "                      'side': 'top',",
                        "                      'tickfont': {'size': FONT_SIZE}",
                        "                  },",
                        "                  yaxis={",
                        "                      'title': {'text': '<b>Predictee</b>'},",
                        "                      'autorange': 'reversed',",
                        "                      'tickfont': {'size': FONT_SIZE}",
                        "                  },",
                        "                  annotations=annotations)",
                        "fig.show()",
                        "# The `target` row of the matrix tells you that the best univariate predictor of the it",
                        "# on regression, MAE=0 yield 1.0 score and regressor that always predicts the median yields 0.0 score.",
                        "# on classification, F1=1 yield 1.0 score and classifier that always predicts the most freq class yields 0.0 score."
                    ]
                },
                {
                    "name": "Predictive Power Score target hmap (plotly)",
                    "snippet": [
                        "#https://towardsdatascience.com/rip-correlation-introducing-the-predictive-power-score-3d90808b9598",
                        "target = df_all.columns[-1]",
                        "feats =df_all.columns[:-1]",
                        "d = {}",
                        "for f in feats:",
                        "    res = pps.score(df_all, f, target)",
                        "    d[f] = res['ppscore']",
                        "",
                        "df_pps_target = pd.DataFrame([d], index=['target']).T",
                        "FONT_SIZE = 10",
                        "fig = df_pps_target.iplot(kind='heatmap', colorscale='Blues', asFigure=True)",
                        "",
                        "#NOTE: annotations are too heavy for a matrix with ~100 features",
                        "annotations = []",
                        "for n, row in enumerate(df_pps_target.itertuples()):",
                        "    ix = row[0]  #predicted",
                        "    for m, val in enumerate(row[1:]): # but index",
                        "        annotations.append(",
                        "            go.layout.Annotation(text='{:.2f}'.format(val).lstrip('0'),",
                        "                                 x=ix,",
                        "                                 y=df_pps_target.columns[m],",
                        "                                 xref='x1',",
                        "                                 yref='y1',",
                        "                                 showarrow=False,",
                        "                                 font=dict(size=FONT_SIZE, color='black' if val<(.8*df_pps_target.values.max()) else 'white')))",
                        "",
                        "fig.update_layout(autosize=False,",
                        "                  width=1000,",
                        "                  height=300,",
                        "                  paper_bgcolor='rgba(0,0,0,0)',",
                        "                  plot_bgcolor='rgba(0,0,0,0)',",
                        "                  xaxis={",
                        "                      'title': {'text': '<b>Predictor</b>'},",
                        "                      'side': 'top',",
                        "                      'tickfont': {'size': FONT_SIZE}",
                        "                  },",
                        "                  yaxis={",
                        "                      'title': {'text': '<b>Predictee</b>'},",
                        "                      'autorange': 'reversed',",
                        "                      'tickfont': {'size': FONT_SIZE}",
                        "                  },",
                        "                  annotations=annotations)",
                        "fig.show()",
                        "# The `target` row of the matrix tells you that the best univariate predictor of the it",
                        "# on regression, MAE=0 yield 1.0 score and regressor that always predicts the median yields 0.0 score.",
                        "# on classification, F1=1 yield 1.0 score and classifier that always predicts the most freq class yields 0.0 score."
                    ]
                },
            ]
        },
        {
            'name': 'Numpy',
            'sub-menu': [
                {
                    'name': 'reshaping',
                    'snippet': ['import numpy as np',
                        'arr = np.random.randn(100) # (100,)',
                        'col_vec = arr[:, np.newaxis] # (100,1)',
                        'row_vec = arr[np.newaxis, :] # (1, 100)',
                        'arr2 = col_vec.ravel() # (100,)',
                        'print(arr.shape, col_vec.shape, row_vec.shape, arr2.shape)'
                    ]
                },
                {
                    "name": "Numpy unique (like Series.value_counts())",
                    "snippet": [
                        "y = np.random.randint(2, size=(100,))",
                        "{v:c for (v,c) in np.unique(y, return_counts=True)}"
                    ]
                }]
        },
        {
            'name': 'Pandas',
            'sub-menu': [
                {
                    'name': 'fast DataFrame creation',
                    'snippet': ['import pandas as pd',
                        'import numpy as np',
                        "pd.DataFrame(np.random.rand(4,8), columns=list('abcdefgh'))"
                    ]
                },
                {
                    'name': 'display options and formatting',
                    'snippet': ['pd.options.display.max_rows=50',
                        'pd.options.display.max_columns=200',
                        'pd.options.display.max_colwidth=60 #no ...',
                        "pd.options.display.float_format='{:.2f}'.format",
                        "#pd.reset_drinoption('all') #reset to default",
                        "#pd.describe_option('rows') #describe all options that contains 'rows' in their name",
                        '',
                        '(df.head(10).style.format({',
                        "    'Age': '{:.1f}',",
                        "    'Date': '{:%m/%d/%y}'",
                        '}))'
                    ]
                },
                {
                    'name': 'profile report',
                    'snippet': ['import pandas_profiling',
                        'pandas_profiling.ProfileReport(df)'
                    ]
                },
                '---',
                {
                    'name': 'simple filter',
                    'snippet': ["df = pd.read_csv('http://bit.ly/drinksbycountry')",
                        "df[(df.continent == 'Europe') & (df.beer_servings > 200)]"
                    ]
                },
                {
                    "name": "assert no NaNs",
                    "snippet": [
                        "_ = df.dropna(axis=0, subset=df.columns, how='any', inplace=False)",
                        "assert _.shape == df.shape, '`df` has nans'"
                    ]
                },
                {
                    "name": "binnify and return indexes of bins",
                    "snippet": [
                        "import pandas as pd",
                        "import numpy as np",
                        "np.random.seed(1)",
                        "df = pd.DataFrame(np.random.randint(0, 9+1, size=(10,1)), columns=list('a'))",
                        "",
                        "bins = np.arange(0,9+1,2)",
                        "df['a_bin'] = np.digitize(df['a'], bins=bins)",
                        "df"
                    ]
                },
                {
                    'name': 'pivot_table',
                    'snippet': ["df = pd.read_csv('http://bit.ly/kaggletrain') #titanic",
                        "tbl = df.pivot_table(index='Sex', columns='Pclass', values='Survived', aggfunc='count')",
                        '#add margins=True, for summation',
                        "tbl.iplot(kind='bar', barmode='stack')",
                        'tbl.head()'
                    ]
                },
                {
                    "name": "clip() values by lower/upper",
                    "snippet": [
                        "import pandas as pd",
                        "import numpy as np",
                        "df1 = pd.DataFrame(np.random.randint(2, size=(4,4))*2-1 * np.random.rand(4,4), columns=list('abcd')) #[-1,1]",
                        "df2 = df1.clip(lower=-0.5,upper=0.5)",
                        "",
                        "print(df1.head(1))",
                        "print(df2.head(1))"
                    ]
                },
                {
                    "name": "transform() values using lambda",
                    "snippet": [
                        "import pandas as pd",
                        "import numpy as np",
                        "df = pd.DataFrame(np.random.rand(4,4), columns=list('abcd')) #[0,1]",
                        "df = df.transform(lambda x: np.log(x))",
                        "# df = df.transform([np.sqrt, np.exp])",
                        "",
                        "df.head()"
                    ]
                },
                {
                    "name": "groupby with named aggregation",
                    "snippet": [
                        "import numpy as np",
                        "import pandas as pd",
                        "df = pd.read_csv('/datasets/direct_marketing/DirectMarketing.csv')",
                        "",
                        "# df[['Age','Salary','AmountSpent']].groupby('Age').agg({'Salary':'mean', 'AmountSpent':'sum'}).round(2)",
                        "",
                        "df[['Age','Salary','AmountSpent']].groupby('Age').agg(",
                        "    avgSalary = ('Salary','mean'), #redundant pd.NamedAgg",
                        "    totalSpent = ('AmountSpent','sum'), #this can be a lambda x: also, e.g. np.sum",
                        "    count = ('Age','count') #here is a counter",
                        ")"
                    ]
                },
                {
                    "name": "groupby agg (apply() vs transform())",
                    "snippet": [
                        "import numpy as np",
                        "import pandas as pd",
                        "",
                        "df = pd.DataFrame({",
                        "  'restaurant_id': [101,102,103,104,105,106,107],",
                        "  'address': ['A','B','C','D', 'E', 'F', 'G'],",
                        "  'city': ['London','London','London','Oxford','Oxford', 'Durham', 'Durham'],",
                        "  'sales': [10,500,48,12,21,22,14]",
                        "})",
                        "",
                        "g = df.groupby('city')['sales']",
                        "df_t = g.transform(np.sum) #sum sales by city",
                        "df_a = g.apply(np.sum)",
                        "",
                        "print(df_a) #operates on multiple series",
                        "print(df_t) #operates on a single series",
                        "",
                        "# here is something we can achieve efficently with transform()",
                        "(df['sales']/df_t).apply(lambda x: format(x, '.2%')) #% sales per city"
                    ]
                },
            ]
        },
        {
            'name': 'Pre-Processing',
            'sub-menu': [
                {
                    "name": "zscore",
                    "snippet": [
                        "from scipy.stats import zscore",
                        "z_data = df_all[df_all.columns[:-1]].apply(zscore)",
                        "z_data = z_data[(np.abs(z_data) < 4).all(axis=1)] #remove rows with outliers",
                        "sns.boxplot(data=z_data) #features only"
                    ]
                },
                {
                    'name': 'scale() to {0,1} function',
                    'snippet': ['def scale(x, raw_range=(None, 255), feature_range=(-1, 1)):',
                        '    # scale to (0, 1) ',
                        '    source_range = np.zeros(2)',
                        '    source_range[1] = x.max() if raw_range[1] is None else raw_range[1]',
                        '    source_range[0] = x.min() if raw_range[0] is None else raw_range[0]',
                        '    ',
                        '    x = ((x - source_range[0])/(source_range[1] - source_range[0]))',
                        '    ',
                        '    # scale to feature_range ',
                        '    min, max = feature_range',
                        '    x = x * (max - min) + min',
                        '    return x'
                    ]
                },
            ]
        },
        {
            'name': 'ML',
            'sub-menu': [
                {
                    'name': 'Supervised',
                    'sub-menu': [
                        {
                            'name': 'Random Forest',
                            'sub-menu': [
                                {
                                    'name': 'RandomForestRegressor',
                                    'snippet': ['from sklearn.ensemble import RandomForestRegressor',
                                        'rf = RandomForestRegressor()',
                                        'rf.fit(X_train, y_train)',
                                        'predictions = rf.predict(X_valid)',
                                        'rmse = np.sqrt(np.mean(np.square(prediction - y_valid)))',
                                        'print(rmse)'
                                    ]
                                }]
                        },
                        {
                            'name': 'SVM',
                            'sub-menu': [
                                {
                                    'name': 'svm for binary classification',
                                    'snippet': ['from sklearn import svm',
                                        "# clf = svm.SVC(C=1.0, kernel='rbf', gamma=.7)",
                                        "clf = svm.SVC(kernel='linear', probability=True)",
                                        '',
                                        'clf.fit(X_train, y_train)',
                                        'prob = clf.predict_proba(X_train)[:,1]',
                                        'acc = clf.score(X_train, y_train)',
                                        'print(acc) #WARN: becareful when using imbalanced classes'
                                    ]
                                },
                                {
                                    'name': 'svm with `rbf` kernel for classification',
                                    'snippet': ['import numpy as np; np.random.seed(90210)',
                                        'from numpy.random import permutation',
                                        'from sklearn import svm, datasets',
                                        '',
                                        'iris = datasets.load_iris()',
                                        'per = permutation(iris.target.size)',
                                        'iris.data = iris.data[per]',
                                        'iris.target = iris.target[per]',
                                        '',
                                        "clf = svm.SVC(C=1.0, kernel='rbf', gamma=.7)",
                                        'clf.fit(iris.data[:90], iris.target[:90])',
                                        '',
                                        'acc = clf.score(iris.data[90:], iris.target[90:])',
                                        'print(acc)'
                                    ]
                                }]
                        },
                        {
                            'name': 'LightGBM',
                            'sub-menu': [
                                {
                                    'name': 'train-test classification',
                                    'snippet': ['import lightgbm as lgb',
                                        'from sklearn.metrics import roc_auc_score',
                                        '',
                                        '# Model with default hyperparameters',
                                        "model = lgb.LGBMClassifier(objective = 'binary', random_state=RANDOM_SEED)",
                                        '',
                                        'model.fit(X, y)',
                                        '',
                                        'predictions = model.predict_proba(X_test)[:, 1]',
                                        'auc = roc_auc_score(y_test, predictions)',
                                        '',
                                        "print('The baseline score on the test set is {:.4f}.'.format(auc))"
                                    ]
                                },
                                {
                                    'name': 'train-cv classification',
                                    'snippet': ['import lightgbm as lgb',
                                        '',
                                        '# Create a lgb dataset',
                                        'train_set = lgb.Dataset(X, label = y)',
                                        '',
                                        '# Perform cross validation with 10 folds (with early stopping)',
                                        'params = {} #default',
                                        "r = lgb.cv(params, train_set, num_boost_round = 10000, nfold = 10, metrics = 'auc', ",
                                        '           early_stopping_rounds = 100, verbose_eval = False, seed = RANDOM_SEED)',
                                        '',
                                        '# Highest score',
                                        "r_best = np.max(r['auc-mean'])",
                                        '',
                                        '# Standard deviation of best score',
                                        "r_best_std = r['auc-stdv'][np.argmax(r['auc-mean'])]",
                                        '',
                                        "print('The maximium ROC AUC on the validation set was {:.5f} with std of {:.5f}.'.format(r_best, r_best_std))",
                                        "print('The ideal number of iterations was {}.'.format(np.argmax(r['auc-mean']) + 1))"
                                    ]
                                }]
                        },
                        {
                            "name": "KNN with hpo",
                            "snippet": [
                                "import pandas as pd",
                                "import numpy as np",
                                "",
                                "from sklearn.neighbors import KNeighborsClassifier",
                                "from sklearn.model_selection import GridSearchCV",
                                "",
                                "from sklearn.model_selection import cross_val_score",
                                "from sklearn.model_selection import train_test_split",
                                "",
                                "from sklearn.metrics import accuracy_score, classification_report",
                                "",
                                "df = pd.read_csv('/datasets/diabetes/diabetes_data.csv')",
                                "X,y = df.drop(columns=['diabetes']), df['diabetes'].values",
                                "",
                                "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0, stratify=y)",
                                "",
                                "knn = KNeighborsClassifier()",
                                "param_grid = {'n_neighbors': np.arange(start=1, stop=25+1, step=2), 'weights': ['uniform', 'distance']}",
                                "",
                                "knn_gscv = GridSearchCV(knn, param_grid, cv=5, verbose=1) #n_jobs=4",
                                "knn_gscv.fit(X_train, y_train)",
                                "",
                                "print(f'best params: {knn_gscv.best_params_}, mean cv score: {knn_gscv.best_score_}",
                                "')",
                                "knn = knn_gscv.best_estimator_",
                                "print(knn)",
                                "",
                                "print('')",
                                "pred = knn.predict(X_test)",
                                "    ",
                                "# evaluate and return  accuracy",
                                "print(f'Accuracy of best_estimator on test set: {knn.score(X_test, y_test)}')"
                            ]
                        },
                    ]
                },
                {
                    'name': 'Unsupervised',
                    'sub-menu': [
                        {
                            "name": "Clustering metrics",
                            "snippet": [
                                "## [clustering metrics](https://scikit-learn.org/stable/modules/clustering.html#k-means)",
                                "* `Inertia (within-cluster sum-of-squares)`: $\\sum_{i=0}^{n}\\min_{\\mu_j \\in C}(||x_i - \\mu_j||^2)$",
                                "* `(Adjusted) Random Index`: compares `labels_true` to `labels pred` being permutation-invariant (random labeling = 0)",
                                "* `(Adjusted) Mutual Information`:  compares `labels_true` to `labels pred` being permutation-invariant (random labeing = 0; upper bound = 1)",
                                "* `Homogeneity, completeness and V-measure` ($[0,1]$, highr is better): ",
                                "    * `Homogeneity`: each cluster contains only members of a single class",
                                "    * `completeness`: all members of a given class are assigned to the same cluster",
                                "    * `V-measure`: Their harmonic mean (with eta=1$)",
                                "* `Silhouette`: higher score relates to a model with better defined clusters.",
                                "    * Bounded between -1 for incorrect clustering and +1 for highly dense clustering. Zero indicate overlapping clusters.",
                                "    * The score is higher when clusters are dense and well separated, which relates to a standard concept of a cluster."
                            ]
                        },
                        {
                            "name": "IsolationForest for outlier detection",
                            "snippet": [
                                "import time",
                                "",
                                "import numpy as np",
                                "import matplotlib",
                                "import matplotlib.pyplot as plt",
                                "",
                                "from sklearn.datasets import make_blobs",
                                "from sklearn.ensemble import IsolationForest",
                                "",
                                "RANDOM_STATE = 90210",
                                "",
                                "# Example settings",
                                "n_samples = 300",
                                "outliers_fraction = 0.15",
                                "n_outliers = int(outliers_fraction * n_samples)",
                                "n_inliers = n_samples - n_outliers",
                                "",
                                "",
                                "# define outlier/anomaly detection methods to be compared",
                                "algorithm = IsolationForest(contamination=outliers_fraction,",
                                "                                         random_state=RANDOM_STATE)",
                                "",
                                "# Define datasets",
                                "blobs_params = dict(random_state=RANDOM_STATE, n_samples=n_inliers, n_features=2)",
                                "X = make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5],",
                                "           **blobs_params)[0]",
                                "",
                                "print(f'Shape: {X.shape}')",
                                "",
                                "# Compare given classifiers under given settings",
                                "xx, yy = np.meshgrid(np.linspace(-7, 7, 150),",
                                "                     np.linspace(-7, 7, 150))",
                                "",
                                "rng = np.random.RandomState(RANDOM_STATE)",
                                "",
                                "",
                                "# Add outliers",
                                "X = np.concatenate([X, rng.uniform(low=-6, high=6,",
                                "                   size=(n_outliers, 2))], axis=0)",
                                "",
                                "t0 = time.time()",
                                "algorithm.fit(X)",
                                "t1 = time.time()",
                                "",
                                "",
                                "# fit the data and tag outliers",
                                "y_pred = algorithm.fit(X).predict(X) # [-1,1]",
                                "y_pred = (y_pred *.5 +.5).astype(int) # [0,1] # zeros are outliers",
                                "",
                                "# plot the levels lines and the points",
                                "Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])",
                                "Z = Z.reshape(xx.shape)",
                                "plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')",
                                "",
                                "colors = np.array(['#377eb8', '#ff7f00'])",
                                "plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])",
                                "",
                                "plt.xlim(-7, 7)",
                                "plt.ylim(-7, 7)",
                                "plt.xticks(())",
                                "plt.yticks(())",
                                "",
                                "print('Duration: ' + ('%.2fs' % (t1 - t0)).lstrip('0'))",
                                "",
                                "#value_counts",
                                "y = np.bincount(y_pred)",
                                "ii = np.nonzero(y)[0]",
                                "np.vstack((ii, y[ii])).T"
                            ]
                        },
                    ]
                },
                '---',
                {
                    'name': 'Dimensionality Reduction',
                    'sub-menu': [
                        {
                            "name": "PCA",
                            "snippet": [
                                "import matplotlib.pyplot as plt",
                                "from sklearn.datasets import make_classification",
                                "from sklearn.decomposition import PCA",
                                "X, y = make_classification(1000, 5, n_informative=1, n_classes=2, n_clusters_per_class=1, random_state=0)",
                                "n_components = 2",
                                "pca = PCA(n_components=n_components)",
                                "X_ = pca.fit_transform(X)",
                                "",
                                "print('explained_variance_ratio_:', pca.explained_variance_ratio_)",
                                "# sns.barplot(np.arange(n_components)+1, pca.explained_variance_ratio_)",
                                "print('singular_values_:', pca.singular_values_)",
                                "",
                                "ax = plt.figure(figsize=(8,8)).gca()",
                                "ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=plt.cm.Spectral)"
                            ]
                        },
                        {
                            "name": "T-SNE",
                            "snippet": [
                                "import matplotlib.pyplot as plt",
                                "from sklearn.datasets import make_classification",
                                "from sklearn.manifold import TSNE",
                                "X, y = make_classification(1000, 5, n_informative=1, n_classes=2, n_clusters_per_class=1, random_state=0)",
                                "n_components = 2",
                                "",
                                "#It is highly recommended to use another dimensionality reduction method (e.g. PCA for dense data or TruncatedSVD for sparse data) ",
                                "# to reduce the number of dimensions to a reasonable amount (e.g. 50) if the number of features is very high.",
                                "",
                                "tsne = TSNE(n_components=n_components, init='pca',",
                                "                                 random_state=0)",
                                "",
                                "X_ = tsne.fit_transform(X)",
                                "",
                                "ax = plt.figure(figsize=(8,8)).gca()",
                                "ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=plt.cm.Spectral)",
                                "",
                                "print('n_iter_:', tsne.n_iter_)",
                                "print('kl_divergence_:', tsne.kl_divergence_)"
                            ]
                        }]
                },
                '---',
                {
                    'name': 'binary classification metrices',
                    'snippet': [
                        '* **Sensitivity** (*TPR/recall*) - proportion of positives that are correctly classified',
                        '* **Specificity** (*TNR*) - proportion of negatives that are correctly classified',
                        '',
                        "* **FPR** (*1-Specificity*) - When it's actually no, how often does it predict yes?",
                        '',
                        '* **Precision** - proportion of true positives out of all detected positives'
                    ]
                },
                {
                    'name': 'binary classification AUC ROC and PR',
                    'snippet': ['from sklearn.metrics import roc_auc_score, average_precision_score',
                        '',
                        'auc_roc = roc_auc_score(y_train, prob, sample_weight=None)',
                        "print('auc_roc', auc_roc)",
                        'auc_pr = average_precision_score(y_train, prob, sample_weight=None)',
                        "print('auc_pr', auc_pr)",
                        '',
                        'from sklearn.utils import compute_sample_weight as sklearn_compute_sample_weight',
                        "sample_weight = sklearn_compute_sample_weight(class_weight='balanced', y=y_train)",
                        '',
                        'auc_roc = roc_auc_score(y_train, prob, sample_weight=sample_weight)',
                        "print('auc_roc (using sample_weight)', auc_roc)",
                        'auc_pr = average_precision_score(y_train, prob, sample_weight=sample_weight)',
                        "print('auc_pr (using sample_weight)', auc_pr)"
                    ]
                },
                {
                    'name': 'plotly ROC for binary classification',
                    'snippet': ['from sklearn.metrics import roc_curve, roc_auc_score',
                        '',
                        'y_true = np.random.randint(0, high=1+1, size=(100,))',
                        'y_pred = np.random.rand(*(100,))',
                        '# y_pred = y_true',
                        '',
                        'use_sample_weight = False',
                        'sample_weight=None',
                        'from sklearn.utils import compute_sample_weight as sklearn_compute_sample_weight',
                        'if use_sample_weight:',
                        "    sample_weight = sklearn_compute_sample_weight(class_weight='balanced', y=y_true)",
                        '',
                        'fpr, tpr, thresholds = roc_curve(y_true,y_pred, sample_weight=sample_weight)',
                        'auc = roc_auc_score(y_true,y_pred, sample_weight=sample_weight)',
                        '',
                        'lw = 2',
                        '',
                        'trace1 = go.Scatter(',
                        '    x=fpr,',
                        '    y=tpr,',
                        "    mode='lines',",
                        "    line=dict(color='darkorange', width=lw),",
                        "    fill='tonexty',",
                        "    name=f'ROC curve<br>(area = {auc:.4f})',",
                        "    customdata=[f'TH: {t:.2f}' for t in thresholds],",
                        "    hovertemplate='FPR: %{x:.2f}<br>' + 'TPR: %{y:.2f}<br>' +",
                        "    '%{customdata}<br>' + '<extra></extra>',",
                        '    showlegend=True',
                        ')',
                        '',
                        '#WARN: when classes are imbalanced this might not be the ',
                        'trace2 = go.Scatter(x=[0, 1.01],',
                        '                    y=[0, 1.01],',
                        "                    mode='lines',",
                        "                    line=dict(color='navy', width=lw, dash='dash'),",
                        "                    name=f'Random classifier',",
                        '                    showlegend=True)',
                        '',
                        "layout = go.Layout(title='<b>R</b>eceiver <b>O</b>perating <b>C</b>haracteristic curve' +",
                        "                   ('<br><i>(with sample weighting)</i>' if use_sample_weight else ''),",
                        "                   xaxis=dict(title='<b>FPR</b> (1-Specificity)<br>Incorrectly predicted positives', range=[0.001, 1.01]),",
                        "                   yaxis=dict(title='<b>TPR</b> (Sensitivity)<br>Positives detected out of all positives', range=[0.001, 1.01]))",
                        '',
                        'fig = go.Figure(data=[trace1, trace2], layout=layout)',
                        '',
                        '',
                        "#plotly.io.write_image(fig, 'figures/sup1a.pdf')",
                        '',
                        'iplot(fig, show_link=True)'
                    ]
                },
                {
                    'name': 'plotly PR curve for binary classification',
                    'snippet': ['from sklearn.metrics import precision_recall_curve, average_precision_score',
                        'y_true = np.random.randint(0, high=1+1, size=(100,))',
                        'y_pred = np.random.rand(*(100,))',
                        '# y_pred = y_true',
                        '',
                        'use_sample_weight = False',
                        'sample_weight=None',
                        'from sklearn.utils import compute_sample_weight as sklearn_compute_sample_weight',
                        'if use_sample_weight:',
                        "    sample_weight = sklearn_compute_sample_weight(class_weight='balanced', y=y_true)",
                        '    ',
                        'precision, recall, thresholds = precision_recall_curve(y_true,y_pred, ',
                        '                                                       sample_weight=sample_weight)',
                        '',
                        '#close the curve',
                        'recall = np.append([1.], recall)',
                        'precision = np.append([0.], precision)',
                        'thresholds = np.append([0.], thresholds)',
                        '',
                        'auc = average_precision_score(y_true,y_pred, sample_weight=sample_weight)',
                        '',
                        'lw = 2',
                        '',
                        'trace1 = go.Scatter(',
                        '    x=recall,',
                        '    y=precision,',
                        "    mode='lines',",
                        "    line=dict(color='darkorange', width=lw),",
                        "    fill='tonexty',",
                        "    name=f'ROC curve<br>(area = {auc:.4f})',",
                        "    customdata=[f'TH: {t:.2f}' for t in thresholds],",
                        "    hovertemplate='Recall: %{x:.2f}<br>' + 'Precision: %{y:.2f}<br>' +",
                        "    '%{customdata}<br>' + '<extra></extra>',",
                        '    showlegend=True',
                        ')',
                        '',
                        '#WARN: when classes are imbalanced this might not be accurate',
                        'trace2 = go.Scatter(x=[0, 1.01],',
                        '                    y=[.5, .5],',
                        "                    mode='lines',",
                        "                    line=dict(color='navy', width=lw, dash='dash'),",
                        "                    name=f'Random classifier',",
                        '                    showlegend=True)',
                        '',
                        'eps = np.finfo(np.float32).eps',
                        "layout = go.Layout(title='<b>P</b>recision-<b>R</b>ecall curve' + ",
                        "                   ('<br><i>(with sample weighting)' if use_sample_weight else ''),",
                        "                   xaxis=dict(title='<b>Recall</b><br>(Positives that were correctly classified)', range=[0.001, 1.01]),",
                        "                   yaxis=dict(title='<b>Precision</b><br>(Positives detected out of all positives)', range=[0.001, 1.01]))",
                        '',
                        '',
                        'fig = go.Figure(data=[trace1, trace2], layout=layout)',
                        '',
                        "#plotly.io.write_image(fig, 'figures/sup1b.pdf')",
                        '',
                        'iplot(fig, show_link=True)'
                    ]
                },
                {
                    'name': 'train_test_split',
                    'snippet': ['from sklearn.model_selection import train_test_split',
                        'X, X_val, y, y_val = train_test_split(',
                        '    X, y, test_size=0.33, random_state=RANDOM_SEED)'
                    ]
                }
            ]
        },
        {
            'name': 'DL',
            'sub-menu': [
                {
                    'name': 'PyTorch',
                    'sub-menu': [
                        {
                            'name': 'imports and GPU',
                            'snippet': ['import torch',
                                'import torch.nn as nn',
                                'import torch.nn.functional as F',
                                'import torch.optim as optim',
                                'import torchvision',
                                '',
                                "assert torch.cuda.is_available(), 'No GPU!'",
                                "DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
                                'print(DEVICE)',
                                '',
                                'X = torch.rand(1,5).to(DEVICE)',
                                'X'
                            ]
                        },
                        {
                            'name': 'torch model summary',
                            'snippet': ['import torchsummary',
                                '',
                                "torchsummary.summary(model, (3, 224, 224), device='cpu')"
                            ]
                        },
                        {
                            'name': 'linear regression on gpu',
                            'snippet': ['import torch',
                                'from torch import optim',
                                'from torch import nn',
                                '',
                                'def get_data():',
                                '    from sklearn.datasets import make_regression',
                                '',
                                '    n_features = 1',
                                '    n_samples = 100',
                                '',
                                '    X, y = make_regression(',
                                '        n_samples=n_samples,',
                                '        n_features=n_features,',
                                '        noise=10,',
                                '    )',
                                '    ',
                                '    X = torch.from_numpy(X).float()',
                                '    y = torch.from_numpy(y.reshape((n_samples, n_features))).float()',
                                '    ',
                                '    X, y = X.to(device), y.to(device)',
                                '    return X,y',
                                '',
                                'class LinReg(nn.Module):',
                                '    def __init__(self, input_dim):',
                                '        super().__init__()',
                                '        self.beta = nn.Linear(input_dim, 1)',
                                '        ',
                                '    def forward(self, X):',
                                '        return self.beta(X)',
                                '',
                                "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
                                'X,y = get_data()',
                                '',
                                'n_samples, n_features = X.shape',
                                'print(X.shape, y.shape)',
                                '',
                                '#(Father, Son, Holy Ghost) \\equiv (Model, Loss, Optimizer)',
                                'model = LinReg(n_features).to(device) ',
                                'criterion = nn.MSELoss()',
                                'optimizer = optim.SGD(model.parameters(), lr=1e-1)',
                                '',
                                '',
                                '#Training',
                                'from tqdm.auto import tqdm, trange',
                                'for _ in trange(10):',
                                '    # Train step',
                                '    model.train()',
                                "    optimizer.zero_grad() #IMPORTANT: reset (don't accumulate) gradients",
                                '',
                                '    y_ = model(X)',
                                '    loss = criterion(y_, y)',
                                '',
                                '    loss.backward() #compute gradients wrt the weights',
                                '    optimizer.step() #apply the learning rule',
                                '',
                                '    # Eval (suppose to be on the validation data)',
                                '    model.eval()',
                                '    with torch.no_grad():',
                                '        y_ = model(X)    ',
                                '',
                                '# Vis',
                                'fig, ax = plt.subplots()',
                                "ax.plot(X.cpu().numpy(), y_.cpu().numpy(), '.', label='pred')",
                                "ax.plot(X.cpu().numpy(), y.cpu().numpy(), '.', label='data')",
                                "ax.set_title(f'MSE: {loss.item():0.1f}')",
                                'ax.legend();'
                            ]
                        },
                        '---',
                        {
                            'name': 'Parsimonous MNIST',
                            'snippet': ["#PyTorch ANN with > 99% accuracy (after 20 epochs) on the MNIST dataset.",
                                "",
                                "# ~~~~ Boilerplate ~~~",
                                "import torch #1.4.0",
                                "from torch import nn",
                                "from tqdm.auto import tqdm, trange",
                                "import numpy as np",
                                "",
                                "# ~~~~ Options ~~~",
                                "opts = {",
                                "    'lr': 1e-3,",
                                "    'epochs': 1, #20 achieves 99%",
                                "    'batch_size': 64",
                                "}",
                                "",
                                "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')",
                                "print(device)",
                                "",
                                "# ~~~~ Data loading ~~~",
                                "import torchvision.datasets as dset #0.5.0",
                                "import torchvision.transforms as transforms",
                                "dataroot = '/datasets'",
                                "ds_train = dset.MNIST(root=dataroot, train=True, download=False,",
                                "                      transform=transforms.Compose([",
                                "                          transforms.ToTensor(),",
                                "                          transforms.Normalize((0.1307,), (0.3081,))",
                                "                      ]))",
                                "ds_test = dset.MNIST(root=dataroot, train=False, download=False,",
                                "                     transform=transforms.Compose([",
                                "                         transforms.ToTensor(),",
                                "                         transforms.Normalize((0.1307,), (0.3081,))",
                                "                     ]))",
                                "train_loader = torch.utils.data.DataLoader(dataset=ds_train, batch_size=opts['batch_size'], shuffle=True)",
                                "test_loader = torch.utils.data.DataLoader(dataset=ds_test, batch_size=opts['batch_size'], shuffle=False)",
                                "",
                                "# ~~~~ Model, Optimizer, Loss ~~~",
                                "class CNN(nn.Module):",
                                "    def __init__(self, input_size=(1,28,28), num_classes=10):",
                                "        super(CNN, self).__init__()",
                                "",
                                "        self.layer1 = nn.Sequential(",
                                "            nn.Conv2d(input_size[0], 32, kernel_size=5),",
                                "            nn.ReLU(),",
                                "            nn.MaxPool2d(kernel_size=2))",
                                "        ",
                                "        self.layer2 = nn.Sequential(",
                                "            nn.Conv2d(32, 64, kernel_size=5),",
                                "            nn.ReLU(),",
                                "            nn.MaxPool2d(kernel_size=2))",
                                "",
                                "        self.fc1 = nn.Linear(4 * 4 * 64, num_classes)",
                                "        ",
                                "    ",
                                "    def forward(self, x):",
                                "        # x: (Nx1x28x28) tensor",
                                "        x = self.layer1(x)",
                                "        x = self.layer2(x)",
                                "        x = x.reshape(x.size(0), -1)",
                                "        x = self.fc1(x)",
                                "        return x",
                                "    ",
                                "model = CNN((1, 28, 28), 10).to(device)",
                                "optimizer = torch.optim.Adam(model.parameters(), opts['lr'])",
                                "criterion = torch.nn.CrossEntropyLoss()  # loss function",
                                "",
                                "# ~~~~ Main loop ~~~",
                                "for epoch in range(opts['epochs']):",
                                "    model.train()",
                                "    train_loss = []",
                                "    N = len(train_loader)",
                                "    loss_, NUDGE = np.nan, int(N/10)",
                                "    pbar = tqdm(enumerate(train_loader), total=N,",
                                "               desc=f'Epoch[{epoch+1:^3}], Batch[{0+1:^4}], Loss[{loss_:.2f}]')",
                                "    for i, (data, labels) in pbar:",
                                "        data, labels = data.to(device), labels.to(device)",
                                "        outputs = model(data)",
                                "        loss = criterion(outputs, labels)",
                                "        optimizer.zero_grad()",
                                "        loss.backward()",
                                "        optimizer.step()",
                                "        loss_ = loss.item()",
                                "        train_loss.append(loss_)",
                                "        if i%NUDGE == NUDGE-1:",
                                "            pbar.set_description(f'Epoch[{epoch+1:^3}], Batch[{i+1:^4}], Loss[{loss_:.2f}]')",
                                "        ",
                                "    ",
                                "    model.eval()",
                                "    test_loss = []",
                                "    test_accuracy = []",
                                "    for i, (data, labels) in enumerate(test_loader):",
                                "        data, labels = data.to(device), labels.to(device)",
                                "        outputs = model(data)",
                                "        _, predicted = torch.max(outputs.data, 1)",
                                "        loss = criterion(outputs, labels)",
                                "        test_loss.append(loss.item())",
                                "        test_accuracy.append((predicted == labels).sum().item() / predicted.size(0))",
                                "    ",
                                "    print(f'Epoch: {epoch}, train loss: {np.mean(train_loss):.3f}, test loss: {np.mean(test_loss):.3f}, test accuracy: {np.mean(test_accuracy):.3f}')"
                            ]
                        },
                        {
                            "name": "MLP with BCE",
                            "snippet": [
                                "# ~~~~ Boilerplate ~~~",
                                "import torch #1.5.0",
                                "from torch import nn",
                                "from tqdm.auto import tqdm, trange",
                                "import numpy as np",
                                "",
                                "np.random.seed(90210)",
                                "torch.manual_seed(90210)",
                                "",
                                "# ~~~~ Options ~~~",
                                "opts = {",
                                "    'lr': 1e-3,",
                                "    'epochs': 10,",
                                "    'batch_size': 1",
                                "}",
                                "",
                                "# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')",
                                "device = torch.device('cpu')",
                                "print(device)",
                                "",
                                "# ~~~~ Data loading ~~~",
                                "from sklearn.datasets import make_classification",
                                "n_features = 2",
                                "X, y = make_classification(n_samples=1000, n_features=n_features, n_informative=n_features, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=90210)",
                                "",
                                "from torch.utils.data import DataLoader, TensorDataset",
                                "",
                                "dataset = TensorDataset( torch.FloatTensor(X), torch.FloatTensor(y) )",
                                "",
                                "train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=opts['batch_size'], shuffle=False)",
                                "test_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=opts['batch_size'], shuffle=False)",
                                "",
                                "# ~~~~ Model, Optimizer, Loss ~~~",
                                "class MLP(torch.nn.Module):",
                                "    def __init__(self, input_size, hidden_size):",
                                "        super(MLP, self).__init__()",
                                "        self.fc1 = torch.nn.Linear(input_size, hidden_size)",
                                "        self.relu = torch.nn.ReLU()",
                                "        self.fc2 = torch.nn.Linear(hidden_size, 1)",
                                "        self.sigmoid = torch.nn.Sigmoid()",
                                "    def forward(self, x):",
                                "        hidden = self.fc1(x)",
                                "        relu = self.relu(hidden)",
                                "        output = self.fc2(relu)",
                                "        output = self.sigmoid(output)",
                                "        return output",
                                "    ",
                                "model = MLP(n_features, 50).to(device)",
                                "optimizer = torch.optim.Adam(model.parameters(), opts['lr'])",
                                "# optimizer = torch.optim.SGD(model.parameters(), opts['lr'])",
                                "criterion = torch.nn.BCELoss()  # loss function",
                                "",
                                "# ~~~~ Main loop ~~~",
                                "for epoch in range(opts['epochs']):",
                                "    model.train()",
                                "    train_loss = []",
                                "    N = len(train_loader)",
                                "    loss_, NUDGE = np.nan, int(N/10)",
                                "    pbar = tqdm(enumerate(train_loader), total=N,",
                                "               desc=f'Epoch[{epoch+1:^3}], Batch[{0+1:^4}], Loss[{loss_:.2f}]')",
                                "    for i, (data, labels) in pbar:",
                                "        data, labels = data.to(device), labels.to(device)",
                                "        outputs = model(data)",
                                "        outputs = outputs.squeeze(1)",
                                "        loss = criterion(outputs, labels)",
                                "        optimizer.zero_grad()",
                                "        loss.backward()",
                                "        optimizer.step()",
                                "        loss_ = loss.item()",
                                "        train_loss.append(loss_)",
                                "        if i%NUDGE == NUDGE-1:",
                                "            pbar.set_description(f'Epoch[{epoch+1:^3}], Batch[{i+1:^4}], Loss[{loss_:.2f}]')",
                                "        ",
                                "    ",
                                "    model.eval()",
                                "    test_loss = []",
                                "    test_accuracy = []",
                                "    for i, (data, labels) in enumerate(test_loader):",
                                "        data, labels = data.to(device), labels.to(device)",
                                "        outputs = model(data)",
                                "        outputs = outputs.squeeze(1)",
                                "        predicted = (outputs>0.5).float()",
                                "        loss = criterion(outputs, labels)",
                                "        test_loss.append(loss.item())",
                                "        test_accuracy.append((predicted == labels).sum().item() / opts['batch_size'])",
                                "    ",
                                "    print(f'Epoch: {epoch}, train loss: {np.mean(train_loss):.3f}, test loss: {np.mean(test_loss):.3f}, test accuracy: {np.mean(test_accuracy):.3f}')"
                            ]
                        }]
                },
                {
                    'name': 'Keras',
                    'sub-menu': [
                        {
                            'name': 'plot_model',
                            'snippet': ['#requirements: graphviz (apt-get), pydot (pip)',
                                'from IPython.display import SVG',
                                'from keras.utils.vis_utils import model_to_dot',
                                'def plot_keras_model(model, show_shapes=True, show_layer_names=True):',
                                '    return SVG(model_to_dot(model, show_shapes=show_shapes,',
                                "                            show_layer_names=show_layer_names).create(prog='dot',format='svg'))",
                                'plot_keras_model(model, show_shapes=True, show_layer_names=False)'
                            ]
                        }]
                },
                {
                    'name': 'TensorFlow',
                    'sub-menu': [
                        {
                            'name': 'assert is using GPU',
                            'snippet': ['import tensorflow as tf',
                                "assert tf.test.gpu_device_name(), 'tf does not run on GPU!'"
                            ]
                        },
                        {
                            'name': 'supress warnings',
                            'snippet': ['#supress tf warnings',
                                '#https://stackoverflow.com/a/38645250/1640414',
                                'import os',
                                "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}, higher is less verbose",
                                '#https://stackoverflow.com/a/51327615/1640414',
                                'tf.logging.set_verbosity(tf.logging.FATAL)'
                            ]
                        },
                        {
                            'name': 'eval on CPU',
                            'snippet': ['import tensorflow as tf',
                                'config = tf.ConfigProto(',
                                "        device_count = {'GPU': 0}",
                                '    )',
                                '',
                                "tensor = tf.ones([3,2], dtype=tf.float32, name='ones')",
                                '',
                                'with tf.Session(config=config) as sess:',
                                '    print(sess.run(tensor))'
                            ]
                        }]
                },
                {
                    'name': 'OpenAI Gym',
                    'sub-menu': [
                        {
                            'name': 'no render from notebook',
                            'snippet': ['import gym',
                                'from gym import wrappers',
                                "env = gym.make('CartPole-v0')",
                                '',
                                "#If you'd like rendering use it outside the notebook",
                                '#From: https://stackoverflow.com/a/50866507/1640414',
                                "env = wrappers.Monitor(env, '/tmp/gym', video_callable=False ,force=True)",
                                'env.reset()',
                                'print(env.step(env.action_space.sample())) # take a random action',
                                'env.close()'
                            ]
                        }]
                }]
        },
        '---',
        {
            'name': 'Markdown',
            'sub-menu': [
                {
                    "name": "Table (with column alignment)",
                    "snippet": [
                        "* Table alignment",
                        "| Syntax      | Description | Test Text     |",
                        "| :---        |    :----:   |          ---: |",
                        "| Header      | Title       | Here's this   |",
                        "| Paragraph   | Text        | And more      |"
                    ]
                },
                {
                    'name': 'Table (with code)',
                    'snippet': ['from IPython.display import HTML, display',
                        'import tabulate',
                        "table = [['Sun',696000,1989100000],",
                        "         ['Earth',6371,5973.6],",
                        "         ['Moon',1737,73.5],",
                        "         ['Mars',3390,641.85]]",
                        "display(HTML(tabulate.tabulate(table, headers=['h1', 'h2', 'h3'], tablefmt='html')))"
                    ]
                },
                {
                    'name': 'add YouTube video',
                    'snippet': ['from IPython.lib.display import YouTubeVideo',
                        "YouTubeVideo('Boy3zHVrWB4', start=0)"
                    ]
                },
                {
                    'name': 'add IFrame embedding',
                    'snippet': ['from IPython.display import IFrame',
                        "IFrame('https://www.desmos.com/calculator/osig1u1uwl?embed', width=350, height=350)"
                    ]
                },
                {
                    'name': 'embedded code markdown',
                    'snippet': ['```bash',
                        'git clone about:blank',
                        '```'
                    ]
                },
                {
                    "name": "Figure template in HTML",
                    "snippet": [
                        "<center>",
                        "<figure>",
                        "<img src='http://pyro.ai/_static/img/vae_plots/test_elbo_vae.png'  style='width: 550px;'>",
                        "<figcaption>",
                        "<font size='+1'><b>Figure 3:</b> How the test ELBO evolves over the course of training. </font>",
                        "</figcaption>",
                        "</figure>",
                        "</center>"
                    ]
                },

                '---',
                {
                    'name': 'Cheetsheet',
                    'external-link': 'https://github.com/adam-p/markdown-here/wiki/Markdown-Here-Cheatsheet',
                },
                {
                    'name': 'Extended syntax',
                    'external-link': 'https://www.markdownguide.org/extended-syntax/',
                }
            ]
        },
        {
            'name': 'LaTeX',
            'sub-menu': [
                {
                    'name': 'Equations with numbers',
                    'snippet': ['$$',
                        '\\begin{equation}',
                        'dS_A+dS_B>0 \\\\',
                        'dS_A+dS_B>0',
                        '\\end{equation}',
                        '$$'
                    ]
                },
                {
                    'name': 'Aligning multiple equations',
                    'snippet': ['$$\\begin{align*}',
                        'p_A &\\sim \\text{Uniform}[\\text{low}=0,\\text{high}=1) \\\\',
                        'p_B &\\sim \\text{Uniform}[\\text{low}=0,\\text{high}=1) \\\\',
                        'X\\ &\\sim \\text{Bernoulli}(\\text{prob}=p) \\\\',
                        '\\text{for }  i &= 1\\ldots N: \\\\',
                        ' X_i\\ &\\sim \\text{Bernoulli}(p_i)',
                        '\\end{align*}$$'
                    ]
                },
                {
                    'name': 'Vector in matrix notation',
                    'snippet': ['$\\begin{bmatrix} ',
                        '    0 \\\\ ',
                        '    0 \\\\ ',
                        '\\end{bmatrix}\\in\\text{Null Space}$'
                    ]
                }]
        },
        '---',
        {
            'name': 'Best practices',
            'sub-menu': [
                {
                    'name': 'Static typing',
                    'external-link': 'https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html',
                },
                {
                    'name': 'Styling matplotlib __',
                    'external-link': 'https://github.com/matplotlib/matplotlib/blob/master/matplotlibrc.template',
                },
            ]
        }
    ]
 }
No results found