Last active
June 14, 2022 17:54
-
-
Save shaybensasson/6b11b3de18840bb73432c98fe1ec585f to your computer and use it in GitHub Desktop.
My Jupyter nbextension "Snippets Menu" json configuration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var MY_SNIPPETS = { | |
| 'name': 'Snippets', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'config file', | |
| 'snippet': ['!cat ~/.local/share/jupyter/nbextensions/snippets_menu/my_snippets.js'] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Header', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'numpy|pandas|matplotlib|seaborn', | |
| "snippet": [ | |
| "%matplotlib inline", | |
| "%config InlineBackend.figure_format = 'retina'", | |
| "", | |
| "import matplotlib", | |
| "import matplotlib.pyplot as plt", | |
| "import seaborn as sns", | |
| "from pylab import rcParams", | |
| "", | |
| "sns.set(style='whitegrid', palette='muted', font_scale=1.33)", | |
| "# plt.style.use('ggplot')", | |
| "", | |
| "HAPPY_COLORS_PALETTE = ['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']", | |
| "", | |
| "sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))", | |
| "", | |
| "rcParams['figure.figsize'] = 10, 8", | |
| "", | |
| "import pandas as pd", | |
| "import numpy as np", | |
| "", | |
| "# x = np.arange(100)", | |
| "y=np.random.randint(0, 5+1, size=(100))", | |
| "#sns.scatterplot(x,y)", | |
| "#sns.regplot(x,y)", | |
| "", | |
| "ax = plt.figure(figsize=(10,5)).gca() #w,h", | |
| "sns.countplot(y, ax=ax)", | |
| "ax.set_xticklabels(labels=HAPPY_COLORS_PALETTE)", | |
| "ax.yaxis.set_major_locator(plt.MaxNLocator(10)) #see https://matplotlib.org/3.1.1/gallery/ticks_and_spines/tick-locators.html" | |
| ] | |
| }, | |
| { | |
| 'name': 'Plot.ly 4.5', | |
| 'snippet': ['#online mode (just comment for offline mode)', | |
| '# import chart_studio', | |
| "# chart_studio.tools.set_credentials_file(username='bensshay', api_key='YuUeRFOAsKffHg3NpLbJ')", | |
| '', | |
| 'from plotly.offline import iplot', | |
| 'import plotly.graph_objects as go', | |
| '', | |
| 'import plotly.io as pio', | |
| "pio.templates.default = 'none' #set theme", | |
| '', | |
| '# Cufflinks wrapper on plotly', | |
| 'import cufflinks', | |
| '', | |
| 'cufflinks.go_offline()', | |
| '', | |
| '# Set global theme', | |
| "cufflinks.set_config_file(world_readable=True, theme='pearl')", | |
| '', | |
| '#After importing cufflinks, plotly plots can be made using df.iplot() and then specifying parameters. ', | |
| '# This is a great replacement for matplotlib!', | |
| '', | |
| '', | |
| '#quick demo', | |
| 'fig = go.Figure(go.Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1]))', | |
| "fig.update_layout(title_text='hello world')", | |
| '', | |
| 'iplot(fig)', | |
| '' | |
| ] | |
| }, | |
| { | |
| 'name': 'versions of everything', | |
| 'snippet': ['%reload_ext watermark', | |
| 'import warnings', | |
| '', | |
| 'import os', | |
| "print('VirtualEnv: {}'.format(os.getenv('VIRTUAL_ENV').split('/')[-1]))", | |
| "print('')", | |
| 'with warnings.catch_warnings():', | |
| " warnings.simplefilter('ignore')", | |
| ' %watermark -v --packages numpy,scipy,sklearn,pandas,matplotlib,seaborn,tqdm,keras,tensorflow', | |
| "print('')", | |
| '!cat /usr/local/cuda/version.txt', | |
| '', | |
| '#import pandas as pd', | |
| '#pd.show_versions()' | |
| ] | |
| }, | |
| { | |
| 'name': 'ignore warnings', | |
| 'snippet': ["import warnings; warnings.simplefilter('ignore')"] | |
| }, | |
| { | |
| 'name': 'reload an existing module', | |
| 'snippet': ['#import module', | |
| 'import importlib', | |
| 'importlib.reload(module)' | |
| ] | |
| }, | |
| { | |
| 'name': 'add packages to python path', | |
| 'snippet': ['import sys, os', | |
| "paths = ['~/Homer/', '~/Homer/lib/hyperopt/', '~/Homer/lib/PDPbox/', '~/Homer/lib/PyCEbox/', '~/Homer/lib/ALEPython/']", | |
| 'sys.path.extend([os.path.expanduser(p) for p in paths])' | |
| ] | |
| }, | |
| { | |
| "name": "Float formatting", | |
| "snippet": [ | |
| "np.set_printoptions(formatter={'float_kind': '{:3f}'.format})", | |
| "%precision 3 #ipython float formatter", | |
| "pd.options.display.float_format='{:.3f}'.format" | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'OLD Plot.ly<4.5', | |
| 'snippet': ['import plotly ', | |
| '#online mode', | |
| "plotly.tools.set_credentials_file(username='bensshay', api_key='YuUeRFOAsKffHg3NpLbJ')", | |
| '', | |
| '#offline mode', | |
| '#from plotly.offline import init_notebook_mode, iplot', | |
| '#Always run this the command before at the start of notebook', | |
| '#init_notebook_mode(connected=False)', | |
| '', | |
| '# plotly standard imports', | |
| 'import plotly.graph_objs as go', | |
| 'import plotly.plotly as py', | |
| 'import plotly.figure_factory as ff', | |
| '', | |
| '# Cufflinks wrapper on plotly', | |
| 'import cufflinks', | |
| '', | |
| 'from plotly.offline import iplot', | |
| 'cufflinks.go_offline()', | |
| '', | |
| '# Set global theme', | |
| "cufflinks.set_config_file(world_readable=True, theme='pearl')", | |
| '', | |
| '#After importing cufflinks, plotly plots can be made using df.iplot() and then specifying parameters. ', | |
| '# This is a great replacement for matplotlib!' | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| 'name': 'Thesis', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Boilerplate', | |
| 'sub-menu': [ | |
| { | |
| "name": "1. dirs", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import numpy as np", | |
| "import pickle", | |
| "", | |
| "import sys, os", | |
| "", | |
| "HOMER_DIR = os.path.expanduser('~/Homer')", | |
| "sys.path.extend([HOMER_DIR])", | |
| "", | |
| "from homer import options", | |
| "RANDOM_SEED = options.RANDOM_SEED", | |
| "", | |
| "STYLES_DIR = os.path.join(HOMER_DIR, 'styles')", | |
| "HOME_DIR = os.path.join(HOMER_DIR, 'Intelligence/v2') #project home_dir", | |
| "DATA_DIR = os.path.join(HOME_DIR, 'data')", | |
| "ENSEMBLES_DIR = os.path.join(HOME_DIR, 'out/ensembles')", | |
| "TOP_FEATS_DIR = os.path.join(ENSEMBLES_DIR, 'top_feats')", | |
| "", | |
| "_ANALYSIS_DIR = os.path.join(HOME_DIR, 'out/analysis')", | |
| "FIGURES_DIR = os.path.join(_ANALYSIS_DIR, 'figures')", | |
| "INTERMEDIATE_DIR = os.path.join(_ANALYSIS_DIR, 'intermediate')" | |
| ] | |
| }, | |
| { | |
| "name": "2. styles", | |
| "snippet": [ | |
| "# IMPORTANT: It is essential that the use.style will be on difference cell than the %matplotlib magic", | |
| "import matplotlib.pyplot as plt", | |
| "import seaborn as sns", | |
| "", | |
| "BASELINE_STYLE = os.path.join(STYLES_DIR, 'baseline.mplstyle')", | |
| "THESIS_STYLE = os.path.join(STYLES_DIR, 'thesis.mplstyle')", | |
| "THESIS_CB_STYLE = os.path.join(STYLES_DIR, 'thesis.colorblind.mplstyle')", | |
| "THESIS_SHAP_STYLE = os.path.join(STYLES_DIR, 'thesis.shap.mplstyle')", | |
| "# plt.style.use([BASELINE_STYLE])", | |
| "plt.style.use([BASELINE_STYLE, THESIS_STYLE])", | |
| "", | |
| "#can be used inside a context manger:", | |
| "#with plt.style.context([BASELINE_STYLE, THESIS_STYLE,THESIS_SHAP_STYLE]):", | |
| "#with plt.rc_context({'axes.grid': False}):", | |
| "" | |
| ] | |
| }, | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Jupyter', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Magics', | |
| 'sub-menu': [ | |
| { | |
| "name": "autoreload", | |
| "snippet": [ | |
| "%load_ext autoreload", | |
| "", | |
| "%autoreload 2 # reloads all modules every time this cell is executed" | |
| ] | |
| }, | |
| { | |
| "name": "timeit (run multiple times)", | |
| "snippet": [ | |
| "%%timeit -r2 -n3 # 2 runs x 3 iterations/loops", | |
| "import time", | |
| "time.sleep(1)" | |
| ] | |
| }], | |
| }, | |
| '---', | |
| { | |
| 'name': 'print all pathes', | |
| 'snippet': ['!jupyter --path'] | |
| }, | |
| { | |
| 'name': 'auto save when executed', | |
| 'snippet': ['from IPython.display import Javascript', | |
| '', | |
| "script = ''", | |
| 'if (AUTO_SAVE_WHEN_COMPLETE):', | |
| " script = '''", | |
| ' require(["base/js/namespace"],function(Jupyter) {', | |
| ' Jupyter.notebook.save_checkpoint();', | |
| ' });', | |
| " '''", | |
| 'Javascript(script)' | |
| ] | |
| }, | |
| { | |
| 'name': 'time notebook', | |
| 'snippet': ['#Start block', | |
| 'import time', | |
| 'start_time = time.time()', | |
| '', | |
| '#End block', | |
| 'import datetime', | |
| 'duration = str(datetime.timedelta(seconds=time.time()-start_time))', | |
| "print(f'The whole notebook took: {duration}')" | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Plotting', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Matplotlib', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'subplotting (plt.subplots)', | |
| 'snippet': ['from tqdm.auto import tqdm, trange', | |
| '', | |
| 'N = 6', | |
| 'NCOLS = min(5,N)', | |
| 'NROWS = int(np.ceil(N/NCOLS))', | |
| '# print(N, NROWS, NCOLS)', | |
| 'f, axes = plt.subplots(nrows=NROWS, ncols=NCOLS, figsize=(24,3*NROWS), squeeze=False) #w,h', | |
| '', | |
| 'for i in trange(N):', | |
| ' ax = axes[int(i/NCOLS),i%NCOLS] ', | |
| ' x = np.random.randint(10, size=(10,))', | |
| ' ax.scatter(x,x)', | |
| " ax.set_title('#%s' % i)", | |
| '', | |
| '#delete leftovers', | |
| 'for i in range(N, N + NROWS*NCOLS-N):', | |
| ' ax: plt.Axes = axes[int(i/NCOLS),i%NCOLS]', | |
| ' f.delaxes(ax)', | |
| ' ', | |
| 'plt.tight_layout(w_pad=2.5, h_pad=2) #pads are specified in fraction of fontsize' | |
| ] | |
| }, | |
| { | |
| 'name': 'subplotting (matlab style)', | |
| 'snippet': ['from tqdm.auto import tqdm, trange', | |
| '', | |
| 'N = 6', | |
| 'NCOLS = min(5,N)', | |
| 'NROWS = int(np.ceil(N/NCOLS))', | |
| '# print(N, NROWS, NCOLS)', | |
| '', | |
| '# Matlab style', | |
| 'plt.subplots(figsize=(24,3*NROWS))', | |
| '# plt.subplots_adjust(wspace=0.2,hspace=0.5)', | |
| 'for i in trange(N):', | |
| ' ax = plt.subplot(NROWS,NCOLS,i+1)', | |
| ' ', | |
| ' x = np.random.randint(10, size=(10,))', | |
| ' ax.scatter(x,x)', | |
| " ax.set_title('#%s' % i)", | |
| ' ', | |
| 'plt.tight_layout(w_pad=2.5, h_pad=2) #pads are specified in fraction of fontsize' | |
| ] | |
| }, | |
| { | |
| 'name': 'set plot font_size', | |
| 'snippet': ["ax = plt.subplot(111, xlabel='x', ylabel='y', title='title')", | |
| "ax.scatter([1,2,3], [1,0,3], label='123')", | |
| 'ax.legend()', | |
| 'for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +', | |
| ' ax.get_xticklabels() + ax.get_yticklabels() + ', | |
| ' ax.legend().get_texts()):', | |
| ' item.set_fontsize(14)', | |
| '# more here: https://stackoverflow.com/questions/3899980/how-to-change-the-font-size-on-a-matplotlib-plot' | |
| ] | |
| }, | |
| '---', | |
| { | |
| "name": "3d scatter", | |
| "snippet": [ | |
| "%matplotlib notebook", | |
| "# interactive plots", | |
| "", | |
| "from mpl_toolkits import mplot3d", | |
| "plt.rcParams['figure.figsize'] = 15, 8", | |
| "", | |
| "import numpy as np", | |
| "", | |
| "def f(x, y):", | |
| " return np.sin(np.sqrt(x ** 2 + y ** 2))", | |
| "", | |
| "N = 5000", | |
| "theta = 2 * np.pi * np.random.random(N)", | |
| "r = 6 * np.random.random(N)", | |
| "x = np.ravel(r * np.sin(theta))", | |
| "y = np.ravel(r * np.cos(theta))", | |
| "z = f(x, y)", | |
| "", | |
| "ax = plt.axes(projection='3d')", | |
| "ax.scatter(x, y, z, c=z, alpha=.4, s=50, cmap='viridis');", | |
| "", | |
| "# TIP: adding legend: https://stackoverflow.com/a/20505720/1640414", | |
| "ax.view_init(elev=45, azim=45) #pan using left mouse button, zoom using right mouse button" | |
| ] | |
| }, | |
| { | |
| 'name': '3d (static) plots examples ', | |
| 'external-link': 'https://www.kaggle.com/saurav9786/interactive-3-d-plots-for-data-visualization', | |
| }, | |
| ], | |
| }, | |
| { | |
| 'name': 'Seaborn', | |
| 'sub-menu': [ | |
| { | |
| "name": "Distinct seaborn color pallete", | |
| "snippet": [ | |
| "%matplotlib inline", | |
| "%config InlineBackend.figure_format = 'retina'", | |
| "", | |
| "import matplotlib.pyplot as plt", | |
| "import seaborn as sns", | |
| "import numpy as np", | |
| "", | |
| "#see https://mokole.com/palette.html", | |
| "DISTINCT_COLORS_PALLETE = ['#808080','#556b2f','#7f0000','#483d8b','#008000','#008b8b','#000080','#d2691e','#daa520','#8fbc8f','#800080','#b03060','#ff4500','#ffff00','#00ff00','#00ff7f','#dc143c','#00ffff','#00bfff','#0000ff','#a020f0','#adff2f','#1e90ff','#90ee90','#add8e6','#ff1493','#7b68ee','#ee82ee','#ffdead','#ffc0cb'][::-1]", | |
| "sns.set(style='whitegrid', font_scale=1.33)", | |
| "sns.set_palette(DISTINCT_COLORS_PALLETE)", | |
| "", | |
| "# sns.palplot(sns.color_palette(DISTINCT_COLORS_PALLETE)) #render the pallete", | |
| "", | |
| "N_COLORS = min(30, len(DISTINCT_COLORS_PALLETE))", | |
| "y=np.random.randint(0, N_COLORS+1, size=(100))", | |
| "ax = plt.figure(figsize=(20,10)).gca() #w,h", | |
| "sns.countplot(y, ax=ax);" | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'histogram/kde', | |
| 'snippet': ['data = np.random.randn(100)', | |
| "#plt.hist(data, density=True, bins='auto',", | |
| '# alpha=0.7, rwidth=0.95);', | |
| '', | |
| "sns.kdeplot(data, color = 'red', linewidth = 2, shade = True);" | |
| ] | |
| }, | |
| { | |
| "name": "countplot (series value_counts())", | |
| "snippet": [ | |
| "ax = sns.countplot(df.target)", | |
| "ax.set_xticklabels(class_names);" | |
| ] | |
| }, | |
| { | |
| 'name': 'scatter/regplot', | |
| 'snippet': ['x = np.arange(100)', | |
| 'y=np.random.randint(0, 100, size=(100))', | |
| '#sns.scatterplot(x,y)', | |
| 'sns.regplot(x,y)' | |
| ] | |
| }, | |
| { | |
| "name": "BoxEn plot: better than box plot", | |
| "snippet": [ | |
| "# https://towardsdatascience.com/5-lesser-known-seaborn-plots-most-people-dont-know-82e5a54baea8", | |
| "tips = sns.load_dataset('tips')", | |
| "#sns.boxplot(x='day', y='total_bill', data=tips) ", | |
| "sns.boxenplot(x='day', y='total_bill', data=tips) " | |
| ] | |
| }, | |
| { | |
| 'name': 'correlation matrix', | |
| 'snippet': [ | |
| '#https://seaborn.pydata.org/examples/many_pairwise_correlations.html', | |
| '#https://blog.algorexhealth.com/2017/09/10-heatmaps-10-python-libraries/', | |
| 'plt.figure(figsize=(18,18)) # (w,h)', | |
| '', | |
| 'corr = df1.corr()', | |
| 'mask = np.zeros_like(corr, dtype=np.bool)', | |
| 'mask[np.triu_indices_from(mask)] = True', | |
| '', | |
| "#p=sns.heatmap(corr, annot=True,cmap ='RdYlGn', mask=mask)", | |
| '', | |
| '# Generate a custom diverging colormap', | |
| 'cmap = sns.diverging_palette(220, 10, as_cmap=True)', | |
| '', | |
| 'p=sns.heatmap(corr, annot=True, cmap=cmap, mask=mask, center=0,', | |
| ' square=True, linewidths=.5, cbar_kws={"shrink": .5}, fmt=".2f")' | |
| ] | |
| }, | |
| { | |
| "name": "heatmap (better than matshow)", | |
| "snippet": ['#https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/', | |
| "ax = plt.figure(figsize=(10, 10)).gca()", | |
| "mat = np.random.random((10, 10)) * 2 - 1", | |
| "p = sns.heatmap(mat,", | |
| " cmap=sns.diverging_palette(220, 10, as_cmap=True),", | |
| " annot=True, linewidths=.5, ", | |
| " cbar_kws={'shrink': .5},", | |
| " center=0,", | |
| " square=True,", | |
| " vmin=-1, vmax=1", | |
| " )", | |
| "labels = [chr(i) for i in ord('a') + np.arange(10)]", | |
| "ax.set_xticklabels(labels, rotation=45)", | |
| "ax.set_yticklabels(labels, rotation=45)" | |
| ] | |
| }, | |
| { | |
| "name": "Clustered Heatmap/Corr mat", | |
| "snippet": [ | |
| "#https://towardsdatascience.com/5-lesser-known-seaborn-plots-most-people-dont-know-82e5a54baea8", | |
| "# load boston housing ...", | |
| "", | |
| "corr = df.iloc[:, :-1].corr() #features only", | |
| "#https://seaborn.pydata.org/examples/many_pairwise_correlations.html", | |
| "#https://blog.algorexhealth.com/2017/09/10-heatmaps-10-python-libraries/", | |
| "plt.figure(figsize=(18,18)) # (w,h)", | |
| "", | |
| "mask = np.zeros_like(corr, dtype=np.bool)", | |
| "mask[np.triu_indices_from(mask)] = True", | |
| "", | |
| "#p=sns.heatmap(corr, annot=True,cmap ='RdYlGn', mask=mask)", | |
| "", | |
| "# Generate a custom diverging colormap", | |
| "cmap = sns.diverging_palette(220, 10, as_cmap=True)", | |
| "", | |
| "#p=sns.heatmap(corr, annot=True, cmap=cmap, mask=mask, center=0,", | |
| "# square=True, linewidths=.5, cbar_kws={'shrink': .5}, fmt='.2f')", | |
| "", | |
| "sns.clustermap(corr, ", | |
| " figsize=(18,18), annot=True,", | |
| " cmap=cmap, center=0, square=True, linewidths=.5, fmt='.2f') #, #2d array-like rectangular data", | |
| " #metricstr, #distance metric to use for data (default euclidean)", | |
| " #z_scoreint, #whether to calculate z-scores or not", | |
| " #standard_scaleint) #whether to standardize data or not " | |
| ] | |
| }, | |
| { | |
| "name": "Ridge plots", | |
| "snippet": [ | |
| "sns.set(style='white', rc={'axes.facecolor': (0, 0, 0, 0)})", | |
| "", | |
| "# Create the data", | |
| "rs = np.random.RandomState(1979)", | |
| "x = rs.randn(500)", | |
| "g = np.tile(list('ABCDEFGHIJ'), 50)", | |
| "df = pd.DataFrame(dict(x=x, g=g))", | |
| "m = df.g.map(ord)", | |
| "df['x'] += m", | |
| "", | |
| "# Initialize the FacetGrid object", | |
| "# pal = sns.c(10, rot=-.25, light=.7)", | |
| "g = sns.FacetGrid(df, row='g', hue='g', aspect=15, height=.5, palette='coolwarm')", | |
| "", | |
| "# Draw the densities in a few steps", | |
| "g.map(sns.kdeplot, 'x', clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)", | |
| "g.map(sns.kdeplot, 'x', clip_on=False, color='w', lw=2, bw=.2)", | |
| "g.map(plt.axhline, y=0, lw=2, clip_on=False)", | |
| "", | |
| "", | |
| "# Define and use a simple function to label the plot in axes coordinates", | |
| "def label(x, color, label):", | |
| " ax = plt.gca()", | |
| " ax.text(0, .2, label, fontweight='bold', color='k',", | |
| " ha='left', va='center', transform=ax.transAxes)", | |
| "", | |
| "", | |
| "g.map(label, 'x')", | |
| "", | |
| "# Set the subplots to overlap", | |
| "g.fig.subplots_adjust(hspace=-.25)", | |
| "", | |
| "# Remove axes details that don't play well with overlap", | |
| "g.set_titles('')", | |
| "g.set(yticks=[])", | |
| "g.despine(bottom=True, left=True)" | |
| ] | |
| }, | |
| ], | |
| }, | |
| { | |
| 'name': 'Plot.ly', | |
| 'sub-menu': [ | |
| { | |
| "name": "Timeseries line chart (x is Date)", | |
| "snippet": [ | |
| "# Using graph_objects", | |
| "import plotly.graph_objects as go", | |
| "", | |
| "import pandas as pd", | |
| "df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')", | |
| "", | |
| "fig = go.Figure([go.Scatter(x=df['Date'], y=df['AAPL.High'])])", | |
| "fig.show()" | |
| ] | |
| }, | |
| { | |
| "name": "Default discrete color pallete", | |
| "snippet": [ | |
| "import plotly.graph_objects as go", | |
| "import numpy as np", | |
| "", | |
| "fig = go.Figure()", | |
| "", | |
| "def hex2rgba(h, alpha=.7):", | |
| " h = h.lstrip('#')", | |
| " rgb = ','.join([str(int(h[i:i+2], 16)) for i in (0, 2, 4)])", | |
| " return f'rgba({rgb},{alpha})'", | |
| " ", | |
| "#default_plotly colormap, adding opacity", | |
| "colors_hex = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']", | |
| "colors = [hex2rgba(c) for c in colors_hex]", | |
| "N = len(colors)", | |
| "", | |
| "fig.add_trace(go.Heatmap(", | |
| " z=[np.arange(N).tolist()],", | |
| " colorscale=[", | |
| " [0, colors[0]],", | |
| " [0.1, colors[0]],", | |
| "", | |
| " [0.1, colors[1]],", | |
| " [0.2, colors[1]],", | |
| "", | |
| " [0.2, colors[2]],", | |
| " [0.3, colors[2]],", | |
| "", | |
| " [0.3, colors[3]],", | |
| " [0.4, colors[3]],", | |
| "", | |
| " [0.4, colors[4]],", | |
| " [0.5, colors[4]],", | |
| "", | |
| " [0.5, colors[5]],", | |
| " [0.6, colors[5]],", | |
| "", | |
| " [0.6, colors[6]],", | |
| " [0.7, colors[6]],", | |
| "", | |
| " [0.7, colors[7]],", | |
| " [0.8, colors[7]],", | |
| "", | |
| " [0.8, colors[8]],", | |
| " [0.9, colors[8]],", | |
| "", | |
| " [0.9, colors[9]],", | |
| " [1.0, colors[9]],", | |
| " ],", | |
| " colorbar=dict(", | |
| " tick0=0,", | |
| " dtick=1", | |
| " )", | |
| "))", | |
| "", | |
| "fig.show()" | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'confusion matrix', | |
| 'snippet': ['import numpy as np', | |
| 'from sklearn.metrics import confusion_matrix', | |
| 'import plotly.figure_factory as ff', | |
| '', | |
| "NEG_CLASS, POS_CLASS = 'Neg', 'Pos'", | |
| '', | |
| 'y_true = np.random.randint(0, high=1+1, size=(100,))', | |
| 'y_pred = np.random.randint(0, high=1+1, size=(100,))', | |
| '# y_pred = y_true', | |
| 'cm = confusion_matrix(y_true, y_pred)', | |
| '', | |
| 'cm_ = cm.ravel()', | |
| "norm_cm_ = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]).ravel()", | |
| "z_text = [[f'TN: {cm_[0]} ({norm_cm_[0]:.2f})', f'FP: {cm_[1]} ({norm_cm_[1]:.2f})'],", | |
| " [f'FN: {cm_[2]} ({norm_cm_[2]:.2f})', f'TP: {cm_[3]} ({norm_cm_[3]:.2f})']]", | |
| '', | |
| '#use different colors for pos/neg', | |
| 'cm_masked = cm * np.array([[1,-1],[-1,1]])', | |
| 'fig = ff.create_annotated_heatmap(', | |
| ' x=[NEG_CLASS, POS_CLASS],', | |
| ' y=[NEG_CLASS, POS_CLASS],', | |
| ' z=cm_masked,', | |
| ' annotation_text=z_text, ', | |
| ' reversescale=False,', | |
| ' showscale=True,', | |
| " colorscale='RdBu',", | |
| ' zmid=0,', | |
| ' xgap=2,ygap=2,', | |
| ')', | |
| '', | |
| 'fig.update_layout(dict(', | |
| " title='Confusion Matrix',", | |
| ' xaxis=go.layout.XAxis(', | |
| " title='Predicted label',", | |
| " side='bottom',", | |
| ' ),', | |
| ' yaxis=go.layout.YAxis(', | |
| " title='True label',", | |
| " autorange='reversed',", | |
| ' )))', | |
| "fig['data'][0]['colorbar']['showticklabels'] = False #no tick labels", | |
| '', | |
| '#adjust annot fonts', | |
| 'mx = np.max(cm_)', | |
| 'med = mx/2', | |
| 'for i in range(len(fig.layout.annotations)):', | |
| ' fig.layout.annotations[i].font.size = 16', | |
| " fig.layout.annotations[i].font.color = 'white' if (mx-cm_[i]) < med else 'black'", | |
| '', | |
| 'iplot(fig, show_link=True)' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Altair', | |
| 'sub-menu': [ | |
| { | |
| "name": "Scatter", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import altair as alt", | |
| "", | |
| "data = pd.DataFrame({'country_id': [1, 2, 3, 4, 5, 6],", | |
| " 'population': [1, 100, 200, 300, 400, 500],", | |
| " 'income': [50, 50, 200, 300, 300, 450]})", | |
| "", | |
| "# data", | |
| "", | |
| "alt.Chart(data).mark_circle(size=200).encode(", | |
| " x='population:Q',", | |
| " y='income:Q',", | |
| " color='country_id:N',", | |
| " tooltip=['country_id', 'population', 'income'])" | |
| ] | |
| }] | |
| }] | |
| }, | |
| { | |
| 'name': 'Datasets', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Regression', | |
| 'sub-menu': [ | |
| { | |
| "name": "Boston Housing", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import sklearn.datasets", | |
| "def boston():", | |
| " #from shap: Return the boston housing data in a nice package.", | |
| "", | |
| " d = sklearn.datasets.load_boston()", | |
| " df = pd.DataFrame(data=d.data, columns=d.feature_names) # pylint: disable=E1101", | |
| " return df, d.target # pylint: disable=E1101", | |
| "", | |
| "df, target = boston()", | |
| "df['target'] = target", | |
| "df.head()" | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Binary Classification', | |
| 'sub-menu': [ | |
| { | |
| "name": "Adult census", | |
| "snippet": [ | |
| "import numpy as np", | |
| "import pandas as pd", | |
| "import sklearn.datasets", | |
| "def adult(display=False):", | |
| " # from shap: Return the Adult census data in a nice package.", | |
| " dtypes = [", | |
| " ('Age', 'float32'), ('Workclass', 'category'), ('fnlwgt', 'float32'),", | |
| " ('Education', 'category'), ('Education-Num', 'float32'), ('Marital Status', 'category'),", | |
| " ('Occupation', 'category'), ('Relationship', 'category'), ('Race', 'category'),", | |
| " ('Sex', 'category'), ('Capital Gain', 'float32'), ('Capital Loss', 'float32'),", | |
| " ('Hours per week', 'float32'), ('Country', 'category'), ('Target', 'category')", | |
| " ]", | |
| " raw_data = pd.read_csv(", | |
| " '/datasets/adult/adult.data',", | |
| " names=[d[0] for d in dtypes],", | |
| " na_values='?',", | |
| " dtype=dict(dtypes)", | |
| " )", | |
| " data = raw_data.drop(['Education'], axis=1) # redundant with Education-Num", | |
| " filt_dtypes = list(filter(lambda x: not (x[0] in ['Target', 'Education']), dtypes))", | |
| " data['Target'] = data['Target'] == ' >50K'", | |
| " rcode = {", | |
| " 'Not-in-family': 0,", | |
| " 'Unmarried': 1,", | |
| " 'Other-relative': 2,", | |
| " 'Own-child': 3,", | |
| " 'Husband': 4,", | |
| " 'Wife': 5", | |
| " }", | |
| " for k, dtype in filt_dtypes:", | |
| " if dtype == 'category':", | |
| " if k == 'Relationship':", | |
| " data[k] = np.array([rcode[v.strip()] for v in data[k]])", | |
| " else:", | |
| " data[k] = data[k].cat.codes", | |
| "", | |
| " if display:", | |
| " return raw_data.drop(['Education', 'Target', 'fnlwgt'], axis=1), data['Target'].values", | |
| " else:", | |
| " return data.drop(['Target', 'fnlwgt'], axis=1), data['Target'].values", | |
| "", | |
| "df, target = adult()", | |
| "df['target'] = target", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "name": "Pima diabetes", | |
| "snippet": [ | |
| "import numpy as np", | |
| "import pandas as pd", | |
| "import sklearn.datasets", | |
| "def pima():", | |
| " # Returns the Pima diabetes data in a nice package.", | |
| " ", | |
| " raw_data = pd.read_csv(", | |
| " '/datasets/pima/diabetes.csv'", | |
| " )", | |
| " return raw_data.drop(['Outcome'], axis=1), raw_data['Outcome'].values", | |
| "", | |
| "df, target = pima()", | |
| "df['target'] = target", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "name": "Titanic", | |
| "snippet": [ | |
| "import numpy as np", | |
| "import pandas as pd", | |
| "import sklearn.datasets", | |
| "def titanic():", | |
| " # Returns the Titanic data in a nice package.", | |
| " # https://www.kaggle.com/c/titanic/data", | |
| " ", | |
| " raw_data = pd.read_csv(", | |
| " '/datasets/titanic/titanic.csv'", | |
| " )", | |
| " return raw_data.drop(['Survived'], axis=1), raw_data['Survived'].values", | |
| "", | |
| "df, target = titanic()", | |
| "df['target'] = target", | |
| "#df = df[['target', 'Pclass', 'Sex', 'Age', 'Ticket', 'Fare', 'Embarked']]", | |
| "df.head()" | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Multi-Class', | |
| 'sub-menu': [ | |
| { | |
| "name": "Iris", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import sklearn.datasets", | |
| "def iris(display=True):", | |
| " #from shap: Return the classic iris data in a nice package.", | |
| " # display: targets are str, otherwise int", | |
| "", | |
| " d = sklearn.datasets.load_iris()", | |
| " df = pd.DataFrame(data=d.data, columns=d.feature_names) # pylint: disable=E1101", | |
| " if display:", | |
| " return df, [d.target_names[v] for v in d.target] # pylint: disable=E1101", | |
| " else:", | |
| " return df, d.target # pylint: disable=E1101", | |
| "", | |
| "df, target = iris()", | |
| "df['target'] = target", | |
| "df.head()" | |
| ] | |
| }, | |
| ] | |
| }, | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Bash', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'nice tree of recursive dir listing', | |
| 'snippet': ['!tree -d /datasets/dogscats/'] | |
| }] | |
| }, | |
| { | |
| 'name': 'Debugger', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'set a breakpoint/set_trace()', | |
| 'snippet': ['#http://wangchuan.github.io/coding/2017/07/12/ipdb-cheat-sheet.html', | |
| '', | |
| 'from IPython.core.debugger import set_trace', | |
| 'def my_function(x):', | |
| ' answer = 42', | |
| ' #set_trace() # <-- uncomment!', | |
| ' #Python 3.7 has `breakpoint()` built-in!', | |
| ' #type `exit` to quit the debugger', | |
| ' answer += x', | |
| ' return answer', | |
| '', | |
| 'my_function(12)' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Testing', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'unittest (great assert, no class)', | |
| 'snippet': ['import unittest', | |
| "T = unittest.TestCase('__init__')", | |
| '#T.assertEqual((1,2), (2,1))' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Formatting & Printing', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Formatting strings examples (python 3.5)', | |
| "snippet": [ | |
| "#https://pyformat.info/", | |
| "#old", | |
| "'s=%s, i=%d' % ('str', 15)", | |
| "", | |
| "#new", | |
| "'{} {}'.format('one', 'two')", | |
| "'{1} {0}'.format('one', 'two')", | |
| "", | |
| "'{:d} {:.2f}'.format(15, 3.1415)", | |
| "", | |
| "#newest", | |
| "data = {'first': 'Hodor', 'last': 'Hodor!'}", | |
| "'{first} {last}'.format(**data)", | |
| "", | |
| "from datetime import datetime", | |
| "'{:%d-%m-%Y %H:%M:%S}'.format(datetime(2001, 2, 3, 16, 5))", | |
| "" | |
| ] | |
| }, | |
| { | |
| 'name': 'pprint_color()', | |
| "snippet": [ | |
| "from pprint import pformat, pprint", | |
| "", | |
| "from pygments import highlight", | |
| "# from pygments.formatters.terminal import TerminalFormatter # dark theme", | |
| "from pygments.formatters.terminal256 import Terminal256Formatter #light theme", | |
| "from pygments.lexers.python import PythonLexer", | |
| "", | |
| "", | |
| "def pprint_color(obj, *args, **kwargs):", | |
| "# print(highlight(pformat(obj), PythonLexer(), TerminalFormatter()))", | |
| " print(highlight(pformat(obj, *args, **kwargs), PythonLexer(), Terminal256Formatter()))" | |
| ] | |
| }, | |
| { | |
| 'name': 'Print progress in the same line', | |
| 'snippet': ['num_episodes = 50000', | |
| 'for i in range(1, num_episodes + 1):', | |
| " # Print out which episode we're on, useful for debugging.", | |
| ' if i % 100 == 0:', | |
| " print('\rEpisode {}/{}.'.format(i, num_episodes), end='')", | |
| ' sys.stdout.flush()' | |
| ] | |
| }, | |
| { | |
| 'name': 'Render JSON (great for hierchical dicts)', | |
| 'external-link': 'https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html', | |
| }] | |
| }, | |
| { | |
| 'name': 'Iter', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'zip and unzip into lists', | |
| 'snippet': ['sub1a = [1, 3, 8]; sub2a = [2, 4, 9]', | |
| 'l1 = list(zip(sub1a, sub2a)) #[(1, 2), (3, 4), (8, 9)]', | |
| 'sub1b, sub2b = list(zip(*l1)) #(1, 3, 8), (2, 4, 9)', | |
| 'print(list(sub1b)) #[1, 3, 8]', | |
| 'print(list(sub2b)) #[2, 4, 9]' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'TQDM', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'TQDM for notebook', | |
| 'snippet': ['from tqdm.auto import tqdm, trange'] | |
| }, | |
| { | |
| "name": "TQDM with description", | |
| "snippet": [ | |
| "import time", | |
| "from tqdm.auto import tqdm", | |
| "", | |
| "series_list = [str(i) for i in range(100)]", | |
| "with tqdm(total=len(series_list)) as t:", | |
| " for series in series_list:", | |
| " t.set_description(f'Series: `{series}`')", | |
| " t.update()", | |
| "", | |
| " time.sleep(0.1)" | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Dictionaries', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'flattening dict', | |
| 'snippet': ["def flatten_dict(dd, separator='_', prefix=''):", | |
| ' """', | |
| ' Flattens a dict, adding separator (and prefix or `level0`) between levels', | |
| ' """', | |
| ' return {', | |
| ' prefix + separator + k if prefix else k: v', | |
| ' for kk, vv in dd.items()', | |
| ' for k, v in flatten_dict(vv, separator, kk).items()', | |
| ' } if isinstance(dd, dict) else {prefix: dd}', | |
| '', | |
| "d = {'a': 1, 'b': {'c':2, 'd':3}}", | |
| "flatten_dict(d, '.')" | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Parsing', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'from string to dict', | |
| 'snippet': ['import ast', | |
| '', | |
| '# Convert from a string to a dictionary', | |
| 'ast.literal_eval("{\'a\': 1, \'b\': 2}")' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'RegEx', | |
| 'sub-menu': [ | |
| { | |
| "name": "pattern exists?", | |
| "snippet": [ | |
| "import re", | |
| "p = re.compile(r'^[A]{0,1}F[p|P]{0,1}[\\d|z]$')", | |
| "assert p.match('AF1') is not None", | |
| "assert p.match('AF1m') is None" | |
| ] | |
| }, | |
| { | |
| 'name': 'search with groups', | |
| 'snippet': ['import re', | |
| '', | |
| "regex = r'\\$\\$ (\\(\\w{1,3}\\))$'", | |
| "test_str = '$$\\int S_{xx}(\\omega)d\\omega^{-1/2}$$ (Fp1)'", | |
| '', | |
| 'm = re.search(regex, test_str)', | |
| '', | |
| "assert m is not None, 'Could not find regex on `%s`' % test_str", | |
| '', | |
| "print(f'Match {matchNum} was found at {m.start()}-{m.end()}: {m.group()}')", | |
| '', | |
| 'for groupNum in range(0, len(m.groups())):', | |
| ' groupNum = groupNum + 1', | |
| '', | |
| " print('Group {groupNum} found at {start}-{end}: {group}'.format(groupNum = groupNum, start = m.start(groupNum), end = m.end(groupNum), group = m.group(groupNum)))", | |
| '', | |
| 'g = m.groups()', | |
| 'print(g)' | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'regex101.com', | |
| 'external-link': 'https://regex101.com/', | |
| } | |
| ] | |
| }, | |
| { | |
| 'name': 'IO', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Pickling', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'dump and load', | |
| 'snippet': ['import pickle', | |
| '', | |
| 'a = (df_movies, missing)', | |
| '', | |
| "with open('tmp.pickle', 'wb') as handle:", | |
| ' pickle.dump(a, handle)', | |
| '', | |
| "with open('tmp.pickle', 'rb') as handle:", | |
| ' pickle.load(handle)' | |
| ] | |
| }] | |
| }, | |
| { | |
| "name": "Numpy savez", | |
| "snippet": [ | |
| "np.savez('out/shap_interaction_values.testset.npz', shap_interaction_values=shap_interaction_values)", | |
| "shap_interaction_values = np.load('out/shap_interaction_values.testset.npz')['shap_interaction_values']" | |
| ] | |
| }, | |
| '---', | |
| { | |
| "name": "Read file contents to string", | |
| "snippet": [ | |
| "with open('data.txt', 'r') as file:", | |
| " data = file.read()" | |
| ] | |
| }, | |
| { | |
| "name": "Using pathlib to create paths", | |
| "snippet": [ | |
| "import pathlib", | |
| "", | |
| "import homer", | |
| "", | |
| "PACKAGE_ROOT = pathlib.Path(homer.__file__).resolve().parent #resolve() normalizes the path (sym links extraction, path correction)", | |
| "TRAINED_MODEL_DIR = PACKAGE_ROOT / 'trained_models'", | |
| "DATASET_DIR = PACKAGE_ROOT / 'datasets'" | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Timing', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'with contextmanager', | |
| 'snippet': ['import time', | |
| 'from contextlib import contextmanager', | |
| '', | |
| '@contextmanager', | |
| 'def timer(title):', | |
| ' t0 = time.time()', | |
| ' yield', | |
| " print('{} - done in {:.0f}s'.format(title, time.time() - t0))", | |
| '', | |
| "with timer('ABC'):", | |
| ' time.sleep(2)' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Download', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Download and extract zip with tqdm', | |
| 'snippet': ['from urllib.request import urlretrieve', | |
| 'from os.path import isfile, isdir', | |
| 'from tqdm import tqdm', | |
| 'import zipfile', | |
| '', | |
| "dataset_folder_path = 'data'", | |
| "dataset_filename = 'text8.zip'", | |
| "dataset_name = 'Text8 Dataset'", | |
| '', | |
| 'class DLProgress(tqdm):', | |
| ' last_block = 0', | |
| '', | |
| ' def hook(self, block_num=1, block_size=1, total_size=None):', | |
| ' self.total = total_size', | |
| ' self.update((block_num - self.last_block) * block_size)', | |
| ' self.last_block = block_num', | |
| '', | |
| 'if not isfile(dataset_filename):', | |
| " with DLProgress(unit='B', unit_scale=True, miniters=1, desc=dataset_name) as pbar:", | |
| ' urlretrieve(', | |
| " 'http://mattmahoney.net/dc/text8.zip',", | |
| ' dataset_filename,', | |
| ' pbar.hook)', | |
| '', | |
| 'if not isdir(dataset_folder_path):', | |
| ' with zipfile.ZipFile(dataset_filename) as zip_ref:', | |
| ' zip_ref.extractall(dataset_folder_path)', | |
| '', | |
| "with open('data/text8') as f:", | |
| ' text = f.read()' | |
| ] | |
| }, | |
| { | |
| 'name': 'download url to file', | |
| 'snippet': ['import urllib.request', | |
| "urllib.request.urlretrieve('about:blank', 'file.txt')" | |
| ] | |
| }] | |
| }, | |
| '---', | |
| { | |
| 'name': 'EDA', | |
| 'sub-menu': [ | |
| { | |
| "name": "Predictive Power Score heatmap (seaborn)", | |
| "snippet": [ | |
| "#https://towardsdatascience.com/rip-correlation-introducing-the-predictive-power-score-3d90808b9598", | |
| "import ppscore as pps", | |
| "df_pps = pps.matrix_tqdm(df)", | |
| "", | |
| "#https://blog.algorexhealth.com/2017/09/10-heatmaps-10-python-libraries/", | |
| "ax = plt.figure(figsize=(18,18)).gca() # (w,h)", | |
| "", | |
| "mask = np.zeros_like(df_pps.values, dtype=np.bool)", | |
| "mask[np.diag_indices_from(mask)] = True", | |
| "", | |
| "# Generate a custom colormap", | |
| "cmap = sns.color_palette('Blues')", | |
| "# cmap = sns.color_palette('YlOrRd')", | |
| "", | |
| "p=sns.heatmap(df_pps, annot=True, cmap=cmap, mask=mask,", | |
| " square=True, linewidths=.5, cbar_kws={'shrink': .5}, fmt='.2f',", | |
| " ax=ax)", | |
| "", | |
| "labels=df_pps.columns", | |
| "ax.xaxis.tick_top(); ax.tick_params(direction='out', width=1, colors='k', top=True, left=True)", | |
| "", | |
| "ax.set_xticklabels(labels, rotation=90);", | |
| "ax.set_yticklabels(labels, rotation=0);", | |
| "", | |
| "ax.set_ylabel('Predictee')", | |
| "ax.set_xlabel('Predictor');", | |
| "", | |
| "# The `target` row of the matrix tells you that the best univariate predictor of the it", | |
| "# on regression, MAE=0 yield 1.0 score and regressor that always predicts the median yields 0.0 score.", | |
| "# on classification, F1=1 yield 1.0 score and classifier that always predicts the most freq class yields 0.0 score." | |
| ] | |
| }, | |
| { | |
| "name": "Predictive Power Score heatmap (plotly)", | |
| "snippet": [ | |
| "#https://towardsdatascience.com/rip-correlation-introducing-the-predictive-power-score-3d90808b9598", | |
| "fig = df_pps.T.iplot(kind='heatmap', colorscale='Blues', asFigure=True)", | |
| "", | |
| "FONT_SIZE = 10", | |
| "", | |
| "#NOTE: annotations are too heavy for a matrix with ~100 features", | |
| "annotations = []", | |
| "for n, row in enumerate(df_pps.itertuples()):", | |
| " ix = row[0] #predicted", | |
| " for m, val in enumerate(row[1:]): # but index", | |
| " annotations.append(", | |
| " go.layout.Annotation(text=f'{val:.2f}',", | |
| " x=ix,", | |
| " y=df_pps.columns[m],", | |
| " xref='x1',", | |
| " yref='y1',", | |
| " showarrow=False,", | |
| " font=dict(size=FONT_SIZE, color='black' if val<.8 else 'white')))", | |
| "", | |
| "fig.update_layout(autosize=False,", | |
| " width=500,", | |
| " height=500,", | |
| " paper_bgcolor='rgba(0,0,0,0)',", | |
| " plot_bgcolor='rgba(0,0,0,0)',", | |
| " xaxis={", | |
| " 'title': {'text': '<b>Predictor</b>'},", | |
| " 'side': 'top',", | |
| " 'tickfont': {'size': FONT_SIZE}", | |
| " },", | |
| " yaxis={", | |
| " 'title': {'text': '<b>Predictee</b>'},", | |
| " 'autorange': 'reversed',", | |
| " 'tickfont': {'size': FONT_SIZE}", | |
| " },", | |
| " annotations=annotations)", | |
| "fig.show()", | |
| "# The `target` row of the matrix tells you that the best univariate predictor of the it", | |
| "# on regression, MAE=0 yield 1.0 score and regressor that always predicts the median yields 0.0 score.", | |
| "# on classification, F1=1 yield 1.0 score and classifier that always predicts the most freq class yields 0.0 score." | |
| ] | |
| }, | |
| { | |
| "name": "Predictive Power Score target hmap (plotly)", | |
| "snippet": [ | |
| "#https://towardsdatascience.com/rip-correlation-introducing-the-predictive-power-score-3d90808b9598", | |
| "target = df_all.columns[-1]", | |
| "feats =df_all.columns[:-1]", | |
| "d = {}", | |
| "for f in feats:", | |
| " res = pps.score(df_all, f, target)", | |
| " d[f] = res['ppscore']", | |
| "", | |
| "df_pps_target = pd.DataFrame([d], index=['target']).T", | |
| "FONT_SIZE = 10", | |
| "fig = df_pps_target.iplot(kind='heatmap', colorscale='Blues', asFigure=True)", | |
| "", | |
| "#NOTE: annotations are too heavy for a matrix with ~100 features", | |
| "annotations = []", | |
| "for n, row in enumerate(df_pps_target.itertuples()):", | |
| " ix = row[0] #predicted", | |
| " for m, val in enumerate(row[1:]): # but index", | |
| " annotations.append(", | |
| " go.layout.Annotation(text='{:.2f}'.format(val).lstrip('0'),", | |
| " x=ix,", | |
| " y=df_pps_target.columns[m],", | |
| " xref='x1',", | |
| " yref='y1',", | |
| " showarrow=False,", | |
| " font=dict(size=FONT_SIZE, color='black' if val<(.8*df_pps_target.values.max()) else 'white')))", | |
| "", | |
| "fig.update_layout(autosize=False,", | |
| " width=1000,", | |
| " height=300,", | |
| " paper_bgcolor='rgba(0,0,0,0)',", | |
| " plot_bgcolor='rgba(0,0,0,0)',", | |
| " xaxis={", | |
| " 'title': {'text': '<b>Predictor</b>'},", | |
| " 'side': 'top',", | |
| " 'tickfont': {'size': FONT_SIZE}", | |
| " },", | |
| " yaxis={", | |
| " 'title': {'text': '<b>Predictee</b>'},", | |
| " 'autorange': 'reversed',", | |
| " 'tickfont': {'size': FONT_SIZE}", | |
| " },", | |
| " annotations=annotations)", | |
| "fig.show()", | |
| "# The `target` row of the matrix tells you that the best univariate predictor of the it", | |
| "# on regression, MAE=0 yield 1.0 score and regressor that always predicts the median yields 0.0 score.", | |
| "# on classification, F1=1 yield 1.0 score and classifier that always predicts the most freq class yields 0.0 score." | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Numpy', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'reshaping', | |
| 'snippet': ['import numpy as np', | |
| 'arr = np.random.randn(100) # (100,)', | |
| 'col_vec = arr[:, np.newaxis] # (100,1)', | |
| 'row_vec = arr[np.newaxis, :] # (1, 100)', | |
| 'arr2 = col_vec.ravel() # (100,)', | |
| 'print(arr.shape, col_vec.shape, row_vec.shape, arr2.shape)' | |
| ] | |
| }, | |
| { | |
| "name": "Numpy unique (like Series.value_counts())", | |
| "snippet": [ | |
| "y = np.random.randint(2, size=(100,))", | |
| "{v:c for (v,c) in np.unique(y, return_counts=True)}" | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Pandas', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'fast DataFrame creation', | |
| 'snippet': ['import pandas as pd', | |
| 'import numpy as np', | |
| "pd.DataFrame(np.random.rand(4,8), columns=list('abcdefgh'))" | |
| ] | |
| }, | |
| { | |
| 'name': 'display options and formatting', | |
| 'snippet': ['pd.options.display.max_rows=50', | |
| 'pd.options.display.max_columns=200', | |
| 'pd.options.display.max_colwidth=60 #no ...', | |
| "pd.options.display.float_format='{:.2f}'.format", | |
| "#pd.reset_drinoption('all') #reset to default", | |
| "#pd.describe_option('rows') #describe all options that contains 'rows' in their name", | |
| '', | |
| '(df.head(10).style.format({', | |
| " 'Age': '{:.1f}',", | |
| " 'Date': '{:%m/%d/%y}'", | |
| '}))' | |
| ] | |
| }, | |
| { | |
| 'name': 'profile report', | |
| 'snippet': ['import pandas_profiling', | |
| 'pandas_profiling.ProfileReport(df)' | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'simple filter', | |
| 'snippet': ["df = pd.read_csv('http://bit.ly/drinksbycountry')", | |
| "df[(df.continent == 'Europe') & (df.beer_servings > 200)]" | |
| ] | |
| }, | |
| { | |
| "name": "assert no NaNs", | |
| "snippet": [ | |
| "_ = df.dropna(axis=0, subset=df.columns, how='any', inplace=False)", | |
| "assert _.shape == df.shape, '`df` has nans'" | |
| ] | |
| }, | |
| { | |
| "name": "binnify and return indexes of bins", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import numpy as np", | |
| "np.random.seed(1)", | |
| "df = pd.DataFrame(np.random.randint(0, 9+1, size=(10,1)), columns=list('a'))", | |
| "", | |
| "bins = np.arange(0,9+1,2)", | |
| "df['a_bin'] = np.digitize(df['a'], bins=bins)", | |
| "df" | |
| ] | |
| }, | |
| { | |
| 'name': 'pivot_table', | |
| 'snippet': ["df = pd.read_csv('http://bit.ly/kaggletrain') #titanic", | |
| "tbl = df.pivot_table(index='Sex', columns='Pclass', values='Survived', aggfunc='count')", | |
| '#add margins=True, for summation', | |
| "tbl.iplot(kind='bar', barmode='stack')", | |
| 'tbl.head()' | |
| ] | |
| }, | |
| { | |
| "name": "clip() values by lower/upper", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import numpy as np", | |
| "df1 = pd.DataFrame(np.random.randint(2, size=(4,4))*2-1 * np.random.rand(4,4), columns=list('abcd')) #[-1,1]", | |
| "df2 = df1.clip(lower=-0.5,upper=0.5)", | |
| "", | |
| "print(df1.head(1))", | |
| "print(df2.head(1))" | |
| ] | |
| }, | |
| { | |
| "name": "transform() values using lambda", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import numpy as np", | |
| "df = pd.DataFrame(np.random.rand(4,4), columns=list('abcd')) #[0,1]", | |
| "df = df.transform(lambda x: np.log(x))", | |
| "# df = df.transform([np.sqrt, np.exp])", | |
| "", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "name": "groupby with named aggregation", | |
| "snippet": [ | |
| "import numpy as np", | |
| "import pandas as pd", | |
| "df = pd.read_csv('/datasets/direct_marketing/DirectMarketing.csv')", | |
| "", | |
| "# df[['Age','Salary','AmountSpent']].groupby('Age').agg({'Salary':'mean', 'AmountSpent':'sum'}).round(2)", | |
| "", | |
| "df[['Age','Salary','AmountSpent']].groupby('Age').agg(", | |
| " avgSalary = ('Salary','mean'), #redundant pd.NamedAgg", | |
| " totalSpent = ('AmountSpent','sum'), #this can be a lambda x: also, e.g. np.sum", | |
| " count = ('Age','count') #here is a counter", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "name": "groupby agg (apply() vs transform())", | |
| "snippet": [ | |
| "import numpy as np", | |
| "import pandas as pd", | |
| "", | |
| "df = pd.DataFrame({", | |
| " 'restaurant_id': [101,102,103,104,105,106,107],", | |
| " 'address': ['A','B','C','D', 'E', 'F', 'G'],", | |
| " 'city': ['London','London','London','Oxford','Oxford', 'Durham', 'Durham'],", | |
| " 'sales': [10,500,48,12,21,22,14]", | |
| "})", | |
| "", | |
| "g = df.groupby('city')['sales']", | |
| "df_t = g.transform(np.sum) #sum sales by city", | |
| "df_a = g.apply(np.sum)", | |
| "", | |
| "print(df_a) #operates on multiple series", | |
| "print(df_t) #operates on a single series", | |
| "", | |
| "# here is something we can achieve efficently with transform()", | |
| "(df['sales']/df_t).apply(lambda x: format(x, '.2%')) #% sales per city" | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Pre-Processing', | |
| 'sub-menu': [ | |
| { | |
| "name": "zscore", | |
| "snippet": [ | |
| "from scipy.stats import zscore", | |
| "z_data = df_all[df_all.columns[:-1]].apply(zscore)", | |
| "z_data = z_data[(np.abs(z_data) < 4).all(axis=1)] #remove rows with outliers", | |
| "sns.boxplot(data=z_data) #features only" | |
| ] | |
| }, | |
| { | |
| 'name': 'scale() to {0,1} function', | |
| 'snippet': ['def scale(x, raw_range=(None, 255), feature_range=(-1, 1)):', | |
| ' # scale to (0, 1) ', | |
| ' source_range = np.zeros(2)', | |
| ' source_range[1] = x.max() if raw_range[1] is None else raw_range[1]', | |
| ' source_range[0] = x.min() if raw_range[0] is None else raw_range[0]', | |
| ' ', | |
| ' x = ((x - source_range[0])/(source_range[1] - source_range[0]))', | |
| ' ', | |
| ' # scale to feature_range ', | |
| ' min, max = feature_range', | |
| ' x = x * (max - min) + min', | |
| ' return x' | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'ML', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Supervised', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Random Forest', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'RandomForestRegressor', | |
| 'snippet': ['from sklearn.ensemble import RandomForestRegressor', | |
| 'rf = RandomForestRegressor()', | |
| 'rf.fit(X_train, y_train)', | |
| 'predictions = rf.predict(X_valid)', | |
| 'rmse = np.sqrt(np.mean(np.square(prediction - y_valid)))', | |
| 'print(rmse)' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'SVM', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'svm for binary classification', | |
| 'snippet': ['from sklearn import svm', | |
| "# clf = svm.SVC(C=1.0, kernel='rbf', gamma=.7)", | |
| "clf = svm.SVC(kernel='linear', probability=True)", | |
| '', | |
| 'clf.fit(X_train, y_train)', | |
| 'prob = clf.predict_proba(X_train)[:,1]', | |
| 'acc = clf.score(X_train, y_train)', | |
| 'print(acc) #WARN: becareful when using imbalanced classes' | |
| ] | |
| }, | |
| { | |
| 'name': 'svm with `rbf` kernel for classification', | |
| 'snippet': ['import numpy as np; np.random.seed(90210)', | |
| 'from numpy.random import permutation', | |
| 'from sklearn import svm, datasets', | |
| '', | |
| 'iris = datasets.load_iris()', | |
| 'per = permutation(iris.target.size)', | |
| 'iris.data = iris.data[per]', | |
| 'iris.target = iris.target[per]', | |
| '', | |
| "clf = svm.SVC(C=1.0, kernel='rbf', gamma=.7)", | |
| 'clf.fit(iris.data[:90], iris.target[:90])', | |
| '', | |
| 'acc = clf.score(iris.data[90:], iris.target[90:])', | |
| 'print(acc)' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'LightGBM', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'train-test classification', | |
| 'snippet': ['import lightgbm as lgb', | |
| 'from sklearn.metrics import roc_auc_score', | |
| '', | |
| '# Model with default hyperparameters', | |
| "model = lgb.LGBMClassifier(objective = 'binary', random_state=RANDOM_SEED)", | |
| '', | |
| 'model.fit(X, y)', | |
| '', | |
| 'predictions = model.predict_proba(X_test)[:, 1]', | |
| 'auc = roc_auc_score(y_test, predictions)', | |
| '', | |
| "print('The baseline score on the test set is {:.4f}.'.format(auc))" | |
| ] | |
| }, | |
| { | |
| 'name': 'train-cv classification', | |
| 'snippet': ['import lightgbm as lgb', | |
| '', | |
| '# Create a lgb dataset', | |
| 'train_set = lgb.Dataset(X, label = y)', | |
| '', | |
| '# Perform cross validation with 10 folds (with early stopping)', | |
| 'params = {} #default', | |
| "r = lgb.cv(params, train_set, num_boost_round = 10000, nfold = 10, metrics = 'auc', ", | |
| ' early_stopping_rounds = 100, verbose_eval = False, seed = RANDOM_SEED)', | |
| '', | |
| '# Highest score', | |
| "r_best = np.max(r['auc-mean'])", | |
| '', | |
| '# Standard deviation of best score', | |
| "r_best_std = r['auc-stdv'][np.argmax(r['auc-mean'])]", | |
| '', | |
| "print('The maximium ROC AUC on the validation set was {:.5f} with std of {:.5f}.'.format(r_best, r_best_std))", | |
| "print('The ideal number of iterations was {}.'.format(np.argmax(r['auc-mean']) + 1))" | |
| ] | |
| }] | |
| }, | |
| { | |
| "name": "KNN with hpo", | |
| "snippet": [ | |
| "import pandas as pd", | |
| "import numpy as np", | |
| "", | |
| "from sklearn.neighbors import KNeighborsClassifier", | |
| "from sklearn.model_selection import GridSearchCV", | |
| "", | |
| "from sklearn.model_selection import cross_val_score", | |
| "from sklearn.model_selection import train_test_split", | |
| "", | |
| "from sklearn.metrics import accuracy_score, classification_report", | |
| "", | |
| "df = pd.read_csv('/datasets/diabetes/diabetes_data.csv')", | |
| "X,y = df.drop(columns=['diabetes']), df['diabetes'].values", | |
| "", | |
| "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0, stratify=y)", | |
| "", | |
| "knn = KNeighborsClassifier()", | |
| "param_grid = {'n_neighbors': np.arange(start=1, stop=25+1, step=2), 'weights': ['uniform', 'distance']}", | |
| "", | |
| "knn_gscv = GridSearchCV(knn, param_grid, cv=5, verbose=1) #n_jobs=4", | |
| "knn_gscv.fit(X_train, y_train)", | |
| "", | |
| "print(f'best params: {knn_gscv.best_params_}, mean cv score: {knn_gscv.best_score_}", | |
| "')", | |
| "knn = knn_gscv.best_estimator_", | |
| "print(knn)", | |
| "", | |
| "print('')", | |
| "pred = knn.predict(X_test)", | |
| " ", | |
| "# evaluate and return accuracy", | |
| "print(f'Accuracy of best_estimator on test set: {knn.score(X_test, y_test)}')" | |
| ] | |
| }, | |
| ] | |
| }, | |
| { | |
| 'name': 'Unsupervised', | |
| 'sub-menu': [ | |
| { | |
| "name": "Clustering metrics", | |
| "snippet": [ | |
| "## [clustering metrics](https://scikit-learn.org/stable/modules/clustering.html#k-means)", | |
| "* `Inertia (within-cluster sum-of-squares)`: $\\sum_{i=0}^{n}\\min_{\\mu_j \\in C}(||x_i - \\mu_j||^2)$", | |
| "* `(Adjusted) Random Index`: compares `labels_true` to `labels pred` being permutation-invariant (random labeling = 0)", | |
| "* `(Adjusted) Mutual Information`: compares `labels_true` to `labels pred` being permutation-invariant (random labeing = 0; upper bound = 1)", | |
| "* `Homogeneity, completeness and V-measure` ($[0,1]$, highr is better): ", | |
| " * `Homogeneity`: each cluster contains only members of a single class", | |
| " * `completeness`: all members of a given class are assigned to the same cluster", | |
| " * `V-measure`: Their harmonic mean (with eta=1$)", | |
| "* `Silhouette`: higher score relates to a model with better defined clusters.", | |
| " * Bounded between -1 for incorrect clustering and +1 for highly dense clustering. Zero indicate overlapping clusters.", | |
| " * The score is higher when clusters are dense and well separated, which relates to a standard concept of a cluster." | |
| ] | |
| }, | |
| { | |
| "name": "IsolationForest for outlier detection", | |
| "snippet": [ | |
| "import time", | |
| "", | |
| "import numpy as np", | |
| "import matplotlib", | |
| "import matplotlib.pyplot as plt", | |
| "", | |
| "from sklearn.datasets import make_blobs", | |
| "from sklearn.ensemble import IsolationForest", | |
| "", | |
| "RANDOM_STATE = 90210", | |
| "", | |
| "# Example settings", | |
| "n_samples = 300", | |
| "outliers_fraction = 0.15", | |
| "n_outliers = int(outliers_fraction * n_samples)", | |
| "n_inliers = n_samples - n_outliers", | |
| "", | |
| "", | |
| "# define outlier/anomaly detection methods to be compared", | |
| "algorithm = IsolationForest(contamination=outliers_fraction,", | |
| " random_state=RANDOM_STATE)", | |
| "", | |
| "# Define datasets", | |
| "blobs_params = dict(random_state=RANDOM_STATE, n_samples=n_inliers, n_features=2)", | |
| "X = make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5],", | |
| " **blobs_params)[0]", | |
| "", | |
| "print(f'Shape: {X.shape}')", | |
| "", | |
| "# Compare given classifiers under given settings", | |
| "xx, yy = np.meshgrid(np.linspace(-7, 7, 150),", | |
| " np.linspace(-7, 7, 150))", | |
| "", | |
| "rng = np.random.RandomState(RANDOM_STATE)", | |
| "", | |
| "", | |
| "# Add outliers", | |
| "X = np.concatenate([X, rng.uniform(low=-6, high=6,", | |
| " size=(n_outliers, 2))], axis=0)", | |
| "", | |
| "t0 = time.time()", | |
| "algorithm.fit(X)", | |
| "t1 = time.time()", | |
| "", | |
| "", | |
| "# fit the data and tag outliers", | |
| "y_pred = algorithm.fit(X).predict(X) # [-1,1]", | |
| "y_pred = (y_pred *.5 +.5).astype(int) # [0,1] # zeros are outliers", | |
| "", | |
| "# plot the levels lines and the points", | |
| "Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])", | |
| "Z = Z.reshape(xx.shape)", | |
| "plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')", | |
| "", | |
| "colors = np.array(['#377eb8', '#ff7f00'])", | |
| "plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])", | |
| "", | |
| "plt.xlim(-7, 7)", | |
| "plt.ylim(-7, 7)", | |
| "plt.xticks(())", | |
| "plt.yticks(())", | |
| "", | |
| "print('Duration: ' + ('%.2fs' % (t1 - t0)).lstrip('0'))", | |
| "", | |
| "#value_counts", | |
| "y = np.bincount(y_pred)", | |
| "ii = np.nonzero(y)[0]", | |
| "np.vstack((ii, y[ii])).T" | |
| ] | |
| }, | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Dimensionality Reduction', | |
| 'sub-menu': [ | |
| { | |
| "name": "PCA", | |
| "snippet": [ | |
| "import matplotlib.pyplot as plt", | |
| "from sklearn.datasets import make_classification", | |
| "from sklearn.decomposition import PCA", | |
| "X, y = make_classification(1000, 5, n_informative=1, n_classes=2, n_clusters_per_class=1, random_state=0)", | |
| "n_components = 2", | |
| "pca = PCA(n_components=n_components)", | |
| "X_ = pca.fit_transform(X)", | |
| "", | |
| "print('explained_variance_ratio_:', pca.explained_variance_ratio_)", | |
| "# sns.barplot(np.arange(n_components)+1, pca.explained_variance_ratio_)", | |
| "print('singular_values_:', pca.singular_values_)", | |
| "", | |
| "ax = plt.figure(figsize=(8,8)).gca()", | |
| "ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=plt.cm.Spectral)" | |
| ] | |
| }, | |
| { | |
| "name": "T-SNE", | |
| "snippet": [ | |
| "import matplotlib.pyplot as plt", | |
| "from sklearn.datasets import make_classification", | |
| "from sklearn.manifold import TSNE", | |
| "X, y = make_classification(1000, 5, n_informative=1, n_classes=2, n_clusters_per_class=1, random_state=0)", | |
| "n_components = 2", | |
| "", | |
| "#It is highly recommended to use another dimensionality reduction method (e.g. PCA for dense data or TruncatedSVD for sparse data) ", | |
| "# to reduce the number of dimensions to a reasonable amount (e.g. 50) if the number of features is very high.", | |
| "", | |
| "tsne = TSNE(n_components=n_components, init='pca',", | |
| " random_state=0)", | |
| "", | |
| "X_ = tsne.fit_transform(X)", | |
| "", | |
| "ax = plt.figure(figsize=(8,8)).gca()", | |
| "ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=plt.cm.Spectral)", | |
| "", | |
| "print('n_iter_:', tsne.n_iter_)", | |
| "print('kl_divergence_:', tsne.kl_divergence_)" | |
| ] | |
| }] | |
| }, | |
| '---', | |
| { | |
| 'name': 'binary classification metrices', | |
| 'snippet': [ | |
| '* **Sensitivity** (*TPR/recall*) - proportion of positives that are correctly classified', | |
| '* **Specificity** (*TNR*) - proportion of negatives that are correctly classified', | |
| '', | |
| "* **FPR** (*1-Specificity*) - When it's actually no, how often does it predict yes?", | |
| '', | |
| '* **Precision** - proportion of true positives out of all detected positives' | |
| ] | |
| }, | |
| { | |
| 'name': 'binary classification AUC ROC and PR', | |
| 'snippet': ['from sklearn.metrics import roc_auc_score, average_precision_score', | |
| '', | |
| 'auc_roc = roc_auc_score(y_train, prob, sample_weight=None)', | |
| "print('auc_roc', auc_roc)", | |
| 'auc_pr = average_precision_score(y_train, prob, sample_weight=None)', | |
| "print('auc_pr', auc_pr)", | |
| '', | |
| 'from sklearn.utils import compute_sample_weight as sklearn_compute_sample_weight', | |
| "sample_weight = sklearn_compute_sample_weight(class_weight='balanced', y=y_train)", | |
| '', | |
| 'auc_roc = roc_auc_score(y_train, prob, sample_weight=sample_weight)', | |
| "print('auc_roc (using sample_weight)', auc_roc)", | |
| 'auc_pr = average_precision_score(y_train, prob, sample_weight=sample_weight)', | |
| "print('auc_pr (using sample_weight)', auc_pr)" | |
| ] | |
| }, | |
| { | |
| 'name': 'plotly ROC for binary classification', | |
| 'snippet': ['from sklearn.metrics import roc_curve, roc_auc_score', | |
| '', | |
| 'y_true = np.random.randint(0, high=1+1, size=(100,))', | |
| 'y_pred = np.random.rand(*(100,))', | |
| '# y_pred = y_true', | |
| '', | |
| 'use_sample_weight = False', | |
| 'sample_weight=None', | |
| 'from sklearn.utils import compute_sample_weight as sklearn_compute_sample_weight', | |
| 'if use_sample_weight:', | |
| " sample_weight = sklearn_compute_sample_weight(class_weight='balanced', y=y_true)", | |
| '', | |
| 'fpr, tpr, thresholds = roc_curve(y_true,y_pred, sample_weight=sample_weight)', | |
| 'auc = roc_auc_score(y_true,y_pred, sample_weight=sample_weight)', | |
| '', | |
| 'lw = 2', | |
| '', | |
| 'trace1 = go.Scatter(', | |
| ' x=fpr,', | |
| ' y=tpr,', | |
| " mode='lines',", | |
| " line=dict(color='darkorange', width=lw),", | |
| " fill='tonexty',", | |
| " name=f'ROC curve<br>(area = {auc:.4f})',", | |
| " customdata=[f'TH: {t:.2f}' for t in thresholds],", | |
| " hovertemplate='FPR: %{x:.2f}<br>' + 'TPR: %{y:.2f}<br>' +", | |
| " '%{customdata}<br>' + '<extra></extra>',", | |
| ' showlegend=True', | |
| ')', | |
| '', | |
| '#WARN: when classes are imbalanced this might not be the ', | |
| 'trace2 = go.Scatter(x=[0, 1.01],', | |
| ' y=[0, 1.01],', | |
| " mode='lines',", | |
| " line=dict(color='navy', width=lw, dash='dash'),", | |
| " name=f'Random classifier',", | |
| ' showlegend=True)', | |
| '', | |
| "layout = go.Layout(title='<b>R</b>eceiver <b>O</b>perating <b>C</b>haracteristic curve' +", | |
| " ('<br><i>(with sample weighting)</i>' if use_sample_weight else ''),", | |
| " xaxis=dict(title='<b>FPR</b> (1-Specificity)<br>Incorrectly predicted positives', range=[0.001, 1.01]),", | |
| " yaxis=dict(title='<b>TPR</b> (Sensitivity)<br>Positives detected out of all positives', range=[0.001, 1.01]))", | |
| '', | |
| 'fig = go.Figure(data=[trace1, trace2], layout=layout)', | |
| '', | |
| '', | |
| "#plotly.io.write_image(fig, 'figures/sup1a.pdf')", | |
| '', | |
| 'iplot(fig, show_link=True)' | |
| ] | |
| }, | |
| { | |
| 'name': 'plotly PR curve for binary classification', | |
| 'snippet': ['from sklearn.metrics import precision_recall_curve, average_precision_score', | |
| 'y_true = np.random.randint(0, high=1+1, size=(100,))', | |
| 'y_pred = np.random.rand(*(100,))', | |
| '# y_pred = y_true', | |
| '', | |
| 'use_sample_weight = False', | |
| 'sample_weight=None', | |
| 'from sklearn.utils import compute_sample_weight as sklearn_compute_sample_weight', | |
| 'if use_sample_weight:', | |
| " sample_weight = sklearn_compute_sample_weight(class_weight='balanced', y=y_true)", | |
| ' ', | |
| 'precision, recall, thresholds = precision_recall_curve(y_true,y_pred, ', | |
| ' sample_weight=sample_weight)', | |
| '', | |
| '#close the curve', | |
| 'recall = np.append([1.], recall)', | |
| 'precision = np.append([0.], precision)', | |
| 'thresholds = np.append([0.], thresholds)', | |
| '', | |
| 'auc = average_precision_score(y_true,y_pred, sample_weight=sample_weight)', | |
| '', | |
| 'lw = 2', | |
| '', | |
| 'trace1 = go.Scatter(', | |
| ' x=recall,', | |
| ' y=precision,', | |
| " mode='lines',", | |
| " line=dict(color='darkorange', width=lw),", | |
| " fill='tonexty',", | |
| " name=f'ROC curve<br>(area = {auc:.4f})',", | |
| " customdata=[f'TH: {t:.2f}' for t in thresholds],", | |
| " hovertemplate='Recall: %{x:.2f}<br>' + 'Precision: %{y:.2f}<br>' +", | |
| " '%{customdata}<br>' + '<extra></extra>',", | |
| ' showlegend=True', | |
| ')', | |
| '', | |
| '#WARN: when classes are imbalanced this might not be accurate', | |
| 'trace2 = go.Scatter(x=[0, 1.01],', | |
| ' y=[.5, .5],', | |
| " mode='lines',", | |
| " line=dict(color='navy', width=lw, dash='dash'),", | |
| " name=f'Random classifier',", | |
| ' showlegend=True)', | |
| '', | |
| 'eps = np.finfo(np.float32).eps', | |
| "layout = go.Layout(title='<b>P</b>recision-<b>R</b>ecall curve' + ", | |
| " ('<br><i>(with sample weighting)' if use_sample_weight else ''),", | |
| " xaxis=dict(title='<b>Recall</b><br>(Positives that were correctly classified)', range=[0.001, 1.01]),", | |
| " yaxis=dict(title='<b>Precision</b><br>(Positives detected out of all positives)', range=[0.001, 1.01]))", | |
| '', | |
| '', | |
| 'fig = go.Figure(data=[trace1, trace2], layout=layout)', | |
| '', | |
| "#plotly.io.write_image(fig, 'figures/sup1b.pdf')", | |
| '', | |
| 'iplot(fig, show_link=True)' | |
| ] | |
| }, | |
| { | |
| 'name': 'train_test_split', | |
| 'snippet': ['from sklearn.model_selection import train_test_split', | |
| 'X, X_val, y, y_val = train_test_split(', | |
| ' X, y, test_size=0.33, random_state=RANDOM_SEED)' | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| 'name': 'DL', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'PyTorch', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'imports and GPU', | |
| 'snippet': ['import torch', | |
| 'import torch.nn as nn', | |
| 'import torch.nn.functional as F', | |
| 'import torch.optim as optim', | |
| 'import torchvision', | |
| '', | |
| "assert torch.cuda.is_available(), 'No GPU!'", | |
| "DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')", | |
| 'print(DEVICE)', | |
| '', | |
| 'X = torch.rand(1,5).to(DEVICE)', | |
| 'X' | |
| ] | |
| }, | |
| { | |
| 'name': 'torch model summary', | |
| 'snippet': ['import torchsummary', | |
| '', | |
| "torchsummary.summary(model, (3, 224, 224), device='cpu')" | |
| ] | |
| }, | |
| { | |
| 'name': 'linear regression on gpu', | |
| 'snippet': ['import torch', | |
| 'from torch import optim', | |
| 'from torch import nn', | |
| '', | |
| 'def get_data():', | |
| ' from sklearn.datasets import make_regression', | |
| '', | |
| ' n_features = 1', | |
| ' n_samples = 100', | |
| '', | |
| ' X, y = make_regression(', | |
| ' n_samples=n_samples,', | |
| ' n_features=n_features,', | |
| ' noise=10,', | |
| ' )', | |
| ' ', | |
| ' X = torch.from_numpy(X).float()', | |
| ' y = torch.from_numpy(y.reshape((n_samples, n_features))).float()', | |
| ' ', | |
| ' X, y = X.to(device), y.to(device)', | |
| ' return X,y', | |
| '', | |
| 'class LinReg(nn.Module):', | |
| ' def __init__(self, input_dim):', | |
| ' super().__init__()', | |
| ' self.beta = nn.Linear(input_dim, 1)', | |
| ' ', | |
| ' def forward(self, X):', | |
| ' return self.beta(X)', | |
| '', | |
| "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')", | |
| 'X,y = get_data()', | |
| '', | |
| 'n_samples, n_features = X.shape', | |
| 'print(X.shape, y.shape)', | |
| '', | |
| '#(Father, Son, Holy Ghost) \\equiv (Model, Loss, Optimizer)', | |
| 'model = LinReg(n_features).to(device) ', | |
| 'criterion = nn.MSELoss()', | |
| 'optimizer = optim.SGD(model.parameters(), lr=1e-1)', | |
| '', | |
| '', | |
| '#Training', | |
| 'from tqdm.auto import tqdm, trange', | |
| 'for _ in trange(10):', | |
| ' # Train step', | |
| ' model.train()', | |
| " optimizer.zero_grad() #IMPORTANT: reset (don't accumulate) gradients", | |
| '', | |
| ' y_ = model(X)', | |
| ' loss = criterion(y_, y)', | |
| '', | |
| ' loss.backward() #compute gradients wrt the weights', | |
| ' optimizer.step() #apply the learning rule', | |
| '', | |
| ' # Eval (suppose to be on the validation data)', | |
| ' model.eval()', | |
| ' with torch.no_grad():', | |
| ' y_ = model(X) ', | |
| '', | |
| '# Vis', | |
| 'fig, ax = plt.subplots()', | |
| "ax.plot(X.cpu().numpy(), y_.cpu().numpy(), '.', label='pred')", | |
| "ax.plot(X.cpu().numpy(), y.cpu().numpy(), '.', label='data')", | |
| "ax.set_title(f'MSE: {loss.item():0.1f}')", | |
| 'ax.legend();' | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Parsimonous MNIST', | |
| 'snippet': ["#PyTorch ANN with > 99% accuracy (after 20 epochs) on the MNIST dataset.", | |
| "", | |
| "# ~~~~ Boilerplate ~~~", | |
| "import torch #1.4.0", | |
| "from torch import nn", | |
| "from tqdm.auto import tqdm, trange", | |
| "import numpy as np", | |
| "", | |
| "# ~~~~ Options ~~~", | |
| "opts = {", | |
| " 'lr': 1e-3,", | |
| " 'epochs': 1, #20 achieves 99%", | |
| " 'batch_size': 64", | |
| "}", | |
| "", | |
| "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')", | |
| "print(device)", | |
| "", | |
| "# ~~~~ Data loading ~~~", | |
| "import torchvision.datasets as dset #0.5.0", | |
| "import torchvision.transforms as transforms", | |
| "dataroot = '/datasets'", | |
| "ds_train = dset.MNIST(root=dataroot, train=True, download=False,", | |
| " transform=transforms.Compose([", | |
| " transforms.ToTensor(),", | |
| " transforms.Normalize((0.1307,), (0.3081,))", | |
| " ]))", | |
| "ds_test = dset.MNIST(root=dataroot, train=False, download=False,", | |
| " transform=transforms.Compose([", | |
| " transforms.ToTensor(),", | |
| " transforms.Normalize((0.1307,), (0.3081,))", | |
| " ]))", | |
| "train_loader = torch.utils.data.DataLoader(dataset=ds_train, batch_size=opts['batch_size'], shuffle=True)", | |
| "test_loader = torch.utils.data.DataLoader(dataset=ds_test, batch_size=opts['batch_size'], shuffle=False)", | |
| "", | |
| "# ~~~~ Model, Optimizer, Loss ~~~", | |
| "class CNN(nn.Module):", | |
| " def __init__(self, input_size=(1,28,28), num_classes=10):", | |
| " super(CNN, self).__init__()", | |
| "", | |
| " self.layer1 = nn.Sequential(", | |
| " nn.Conv2d(input_size[0], 32, kernel_size=5),", | |
| " nn.ReLU(),", | |
| " nn.MaxPool2d(kernel_size=2))", | |
| " ", | |
| " self.layer2 = nn.Sequential(", | |
| " nn.Conv2d(32, 64, kernel_size=5),", | |
| " nn.ReLU(),", | |
| " nn.MaxPool2d(kernel_size=2))", | |
| "", | |
| " self.fc1 = nn.Linear(4 * 4 * 64, num_classes)", | |
| " ", | |
| " ", | |
| " def forward(self, x):", | |
| " # x: (Nx1x28x28) tensor", | |
| " x = self.layer1(x)", | |
| " x = self.layer2(x)", | |
| " x = x.reshape(x.size(0), -1)", | |
| " x = self.fc1(x)", | |
| " return x", | |
| " ", | |
| "model = CNN((1, 28, 28), 10).to(device)", | |
| "optimizer = torch.optim.Adam(model.parameters(), opts['lr'])", | |
| "criterion = torch.nn.CrossEntropyLoss() # loss function", | |
| "", | |
| "# ~~~~ Main loop ~~~", | |
| "for epoch in range(opts['epochs']):", | |
| " model.train()", | |
| " train_loss = []", | |
| " N = len(train_loader)", | |
| " loss_, NUDGE = np.nan, int(N/10)", | |
| " pbar = tqdm(enumerate(train_loader), total=N,", | |
| " desc=f'Epoch[{epoch+1:^3}], Batch[{0+1:^4}], Loss[{loss_:.2f}]')", | |
| " for i, (data, labels) in pbar:", | |
| " data, labels = data.to(device), labels.to(device)", | |
| " outputs = model(data)", | |
| " loss = criterion(outputs, labels)", | |
| " optimizer.zero_grad()", | |
| " loss.backward()", | |
| " optimizer.step()", | |
| " loss_ = loss.item()", | |
| " train_loss.append(loss_)", | |
| " if i%NUDGE == NUDGE-1:", | |
| " pbar.set_description(f'Epoch[{epoch+1:^3}], Batch[{i+1:^4}], Loss[{loss_:.2f}]')", | |
| " ", | |
| " ", | |
| " model.eval()", | |
| " test_loss = []", | |
| " test_accuracy = []", | |
| " for i, (data, labels) in enumerate(test_loader):", | |
| " data, labels = data.to(device), labels.to(device)", | |
| " outputs = model(data)", | |
| " _, predicted = torch.max(outputs.data, 1)", | |
| " loss = criterion(outputs, labels)", | |
| " test_loss.append(loss.item())", | |
| " test_accuracy.append((predicted == labels).sum().item() / predicted.size(0))", | |
| " ", | |
| " print(f'Epoch: {epoch}, train loss: {np.mean(train_loss):.3f}, test loss: {np.mean(test_loss):.3f}, test accuracy: {np.mean(test_accuracy):.3f}')" | |
| ] | |
| }, | |
| { | |
| "name": "MLP with BCE", | |
| "snippet": [ | |
| "# ~~~~ Boilerplate ~~~", | |
| "import torch #1.5.0", | |
| "from torch import nn", | |
| "from tqdm.auto import tqdm, trange", | |
| "import numpy as np", | |
| "", | |
| "np.random.seed(90210)", | |
| "torch.manual_seed(90210)", | |
| "", | |
| "# ~~~~ Options ~~~", | |
| "opts = {", | |
| " 'lr': 1e-3,", | |
| " 'epochs': 10,", | |
| " 'batch_size': 1", | |
| "}", | |
| "", | |
| "# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')", | |
| "device = torch.device('cpu')", | |
| "print(device)", | |
| "", | |
| "# ~~~~ Data loading ~~~", | |
| "from sklearn.datasets import make_classification", | |
| "n_features = 2", | |
| "X, y = make_classification(n_samples=1000, n_features=n_features, n_informative=n_features, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=90210)", | |
| "", | |
| "from torch.utils.data import DataLoader, TensorDataset", | |
| "", | |
| "dataset = TensorDataset( torch.FloatTensor(X), torch.FloatTensor(y) )", | |
| "", | |
| "train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=opts['batch_size'], shuffle=False)", | |
| "test_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=opts['batch_size'], shuffle=False)", | |
| "", | |
| "# ~~~~ Model, Optimizer, Loss ~~~", | |
| "class MLP(torch.nn.Module):", | |
| " def __init__(self, input_size, hidden_size):", | |
| " super(MLP, self).__init__()", | |
| " self.fc1 = torch.nn.Linear(input_size, hidden_size)", | |
| " self.relu = torch.nn.ReLU()", | |
| " self.fc2 = torch.nn.Linear(hidden_size, 1)", | |
| " self.sigmoid = torch.nn.Sigmoid()", | |
| " def forward(self, x):", | |
| " hidden = self.fc1(x)", | |
| " relu = self.relu(hidden)", | |
| " output = self.fc2(relu)", | |
| " output = self.sigmoid(output)", | |
| " return output", | |
| " ", | |
| "model = MLP(n_features, 50).to(device)", | |
| "optimizer = torch.optim.Adam(model.parameters(), opts['lr'])", | |
| "# optimizer = torch.optim.SGD(model.parameters(), opts['lr'])", | |
| "criterion = torch.nn.BCELoss() # loss function", | |
| "", | |
| "# ~~~~ Main loop ~~~", | |
| "for epoch in range(opts['epochs']):", | |
| " model.train()", | |
| " train_loss = []", | |
| " N = len(train_loader)", | |
| " loss_, NUDGE = np.nan, int(N/10)", | |
| " pbar = tqdm(enumerate(train_loader), total=N,", | |
| " desc=f'Epoch[{epoch+1:^3}], Batch[{0+1:^4}], Loss[{loss_:.2f}]')", | |
| " for i, (data, labels) in pbar:", | |
| " data, labels = data.to(device), labels.to(device)", | |
| " outputs = model(data)", | |
| " outputs = outputs.squeeze(1)", | |
| " loss = criterion(outputs, labels)", | |
| " optimizer.zero_grad()", | |
| " loss.backward()", | |
| " optimizer.step()", | |
| " loss_ = loss.item()", | |
| " train_loss.append(loss_)", | |
| " if i%NUDGE == NUDGE-1:", | |
| " pbar.set_description(f'Epoch[{epoch+1:^3}], Batch[{i+1:^4}], Loss[{loss_:.2f}]')", | |
| " ", | |
| " ", | |
| " model.eval()", | |
| " test_loss = []", | |
| " test_accuracy = []", | |
| " for i, (data, labels) in enumerate(test_loader):", | |
| " data, labels = data.to(device), labels.to(device)", | |
| " outputs = model(data)", | |
| " outputs = outputs.squeeze(1)", | |
| " predicted = (outputs>0.5).float()", | |
| " loss = criterion(outputs, labels)", | |
| " test_loss.append(loss.item())", | |
| " test_accuracy.append((predicted == labels).sum().item() / opts['batch_size'])", | |
| " ", | |
| " print(f'Epoch: {epoch}, train loss: {np.mean(train_loss):.3f}, test loss: {np.mean(test_loss):.3f}, test accuracy: {np.mean(test_accuracy):.3f}')" | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'Keras', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'plot_model', | |
| 'snippet': ['#requirements: graphviz (apt-get), pydot (pip)', | |
| 'from IPython.display import SVG', | |
| 'from keras.utils.vis_utils import model_to_dot', | |
| 'def plot_keras_model(model, show_shapes=True, show_layer_names=True):', | |
| ' return SVG(model_to_dot(model, show_shapes=show_shapes,', | |
| " show_layer_names=show_layer_names).create(prog='dot',format='svg'))", | |
| 'plot_keras_model(model, show_shapes=True, show_layer_names=False)' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'TensorFlow', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'assert is using GPU', | |
| 'snippet': ['import tensorflow as tf', | |
| "assert tf.test.gpu_device_name(), 'tf does not run on GPU!'" | |
| ] | |
| }, | |
| { | |
| 'name': 'supress warnings', | |
| 'snippet': ['#supress tf warnings', | |
| '#https://stackoverflow.com/a/38645250/1640414', | |
| 'import os', | |
| "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'}, higher is less verbose", | |
| '#https://stackoverflow.com/a/51327615/1640414', | |
| 'tf.logging.set_verbosity(tf.logging.FATAL)' | |
| ] | |
| }, | |
| { | |
| 'name': 'eval on CPU', | |
| 'snippet': ['import tensorflow as tf', | |
| 'config = tf.ConfigProto(', | |
| " device_count = {'GPU': 0}", | |
| ' )', | |
| '', | |
| "tensor = tf.ones([3,2], dtype=tf.float32, name='ones')", | |
| '', | |
| 'with tf.Session(config=config) as sess:', | |
| ' print(sess.run(tensor))' | |
| ] | |
| }] | |
| }, | |
| { | |
| 'name': 'OpenAI Gym', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'no render from notebook', | |
| 'snippet': ['import gym', | |
| 'from gym import wrappers', | |
| "env = gym.make('CartPole-v0')", | |
| '', | |
| "#If you'd like rendering use it outside the notebook", | |
| '#From: https://stackoverflow.com/a/50866507/1640414', | |
| "env = wrappers.Monitor(env, '/tmp/gym', video_callable=False ,force=True)", | |
| 'env.reset()', | |
| 'print(env.step(env.action_space.sample())) # take a random action', | |
| 'env.close()' | |
| ] | |
| }] | |
| }] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Markdown', | |
| 'sub-menu': [ | |
| { | |
| "name": "Table (with column alignment)", | |
| "snippet": [ | |
| "* Table alignment", | |
| "| Syntax | Description | Test Text |", | |
| "| :--- | :----: | ---: |", | |
| "| Header | Title | Here's this |", | |
| "| Paragraph | Text | And more |" | |
| ] | |
| }, | |
| { | |
| 'name': 'Table (with code)', | |
| 'snippet': ['from IPython.display import HTML, display', | |
| 'import tabulate', | |
| "table = [['Sun',696000,1989100000],", | |
| " ['Earth',6371,5973.6],", | |
| " ['Moon',1737,73.5],", | |
| " ['Mars',3390,641.85]]", | |
| "display(HTML(tabulate.tabulate(table, headers=['h1', 'h2', 'h3'], tablefmt='html')))" | |
| ] | |
| }, | |
| { | |
| 'name': 'add YouTube video', | |
| 'snippet': ['from IPython.lib.display import YouTubeVideo', | |
| "YouTubeVideo('Boy3zHVrWB4', start=0)" | |
| ] | |
| }, | |
| { | |
| 'name': 'add IFrame embedding', | |
| 'snippet': ['from IPython.display import IFrame', | |
| "IFrame('https://www.desmos.com/calculator/osig1u1uwl?embed', width=350, height=350)" | |
| ] | |
| }, | |
| { | |
| 'name': 'embedded code markdown', | |
| 'snippet': ['```bash', | |
| 'git clone about:blank', | |
| '```' | |
| ] | |
| }, | |
| { | |
| "name": "Figure template in HTML", | |
| "snippet": [ | |
| "<center>", | |
| "<figure>", | |
| "<img src='http://pyro.ai/_static/img/vae_plots/test_elbo_vae.png' style='width: 550px;'>", | |
| "<figcaption>", | |
| "<font size='+1'><b>Figure 3:</b> How the test ELBO evolves over the course of training. </font>", | |
| "</figcaption>", | |
| "</figure>", | |
| "</center>" | |
| ] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Cheetsheet', | |
| 'external-link': 'https://github.com/adam-p/markdown-here/wiki/Markdown-Here-Cheatsheet', | |
| }, | |
| { | |
| 'name': 'Extended syntax', | |
| 'external-link': 'https://www.markdownguide.org/extended-syntax/', | |
| } | |
| ] | |
| }, | |
| { | |
| 'name': 'LaTeX', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Equations with numbers', | |
| 'snippet': ['$$', | |
| '\\begin{equation}', | |
| 'dS_A+dS_B>0 \\\\', | |
| 'dS_A+dS_B>0', | |
| '\\end{equation}', | |
| '$$' | |
| ] | |
| }, | |
| { | |
| 'name': 'Aligning multiple equations', | |
| 'snippet': ['$$\\begin{align*}', | |
| 'p_A &\\sim \\text{Uniform}[\\text{low}=0,\\text{high}=1) \\\\', | |
| 'p_B &\\sim \\text{Uniform}[\\text{low}=0,\\text{high}=1) \\\\', | |
| 'X\\ &\\sim \\text{Bernoulli}(\\text{prob}=p) \\\\', | |
| '\\text{for } i &= 1\\ldots N: \\\\', | |
| ' X_i\\ &\\sim \\text{Bernoulli}(p_i)', | |
| '\\end{align*}$$' | |
| ] | |
| }, | |
| { | |
| 'name': 'Vector in matrix notation', | |
| 'snippet': ['$\\begin{bmatrix} ', | |
| ' 0 \\\\ ', | |
| ' 0 \\\\ ', | |
| '\\end{bmatrix}\\in\\text{Null Space}$' | |
| ] | |
| }] | |
| }, | |
| '---', | |
| { | |
| 'name': 'Best practices', | |
| 'sub-menu': [ | |
| { | |
| 'name': 'Static typing', | |
| 'external-link': 'https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html', | |
| }, | |
| { | |
| 'name': 'Styling matplotlib __', | |
| 'external-link': 'https://github.com/matplotlib/matplotlib/blob/master/matplotlibrc.template', | |
| }, | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment