Last active
January 21, 2020 12:04
-
-
Save psychemedia/9b7808d81e3ee3461444330f3b0971ac to your computer and use it in GitHub Desktop.
Simple visualisation of cell structure (md and code cells) for a Jupyter notebook. For more context, see https://blog.ouseful.info/2019/12/16/fragment-visualising-jupyter-notebook-structure/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # Simple Notebook Visualiser | |
# | |
# Simple notebook visualiser for one or more Jupyter notebooks. | |
# | |
# Visualises markdown and code cells, with block size determined by code cell line count and estimated screen line count for markdown cells. | |
# + | |
import math | |
import matplotlib.pyplot as plt | |
def nb_vis(cell_map, img_file='', linewidth = 5, w=20, gap=None, gap_boost=1, gap_colour='lightgrey'): | |
"""Visualise notebook gross cell structure.""" | |
def get_gap(cell_map): | |
"""Automatically set the gap value based on overall length""" | |
def get_overall_length(cell_map): | |
"""Get overall line length of a notebook.""" | |
overall_len = 0 | |
gap = 0 | |
for i ,(l,t) in enumerate(cell_map): | |
#i is number of cells if that's useful too? | |
overall_len = overall_len + l | |
return overall_len | |
max_overall_len = 0 | |
#If we are generating a plot for multiple notebooks, get the largest overall length | |
if isinstance(cell_map,dict): | |
for k in cell_map: | |
_overall_len = get_overall_length(cell_map[k]) | |
max_overall_len = _overall_len if _overall_len > max_overall_len else max_overall_len | |
else: | |
max_overall_len = get_overall_length(cell_map) | |
#Set the gap at 0.5% of the overall length | |
return math.ceil(max_overall_len * 0.01) | |
def plotter(cell_map, x, y, label='', header_gap = 0.2): | |
"""Plot visualisation of gross cell structure for a single notebook.""" | |
#Plot notebook path | |
plt.text(y, x, label) | |
x = x + header_gap | |
for _cell_map in cell_map: | |
#Add a coloured bar between cells | |
if y > 0: | |
if gap_colour: | |
plt.plot([y,y+gap],[x,x], gap_colour, linewidth=linewidth) | |
y = y + gap | |
_y = y + _cell_map[0] + 1 #Make tiny cells slightly bigger | |
plt.plot([y,_y],[x,x], _cell_map[1], linewidth=linewidth) | |
y = _y | |
x=0 | |
y=0 | |
#If we have a single cell_map for a single notebook | |
if isinstance(cell_map,list): | |
gap = gap if gap is not None else get_gap(cell_map) * gap_boost | |
fig, ax = plt.subplots(figsize=(w, 1)) | |
plotter(cell_map, x, y) | |
#If we are plotting cell_maps for multiple notebooks | |
elif isinstance(cell_map,dict): | |
gap = gap if gap is not None else get_gap(cell_map) * gap_boost | |
fig, ax = plt.subplots(figsize=(w,len(cell_map))) | |
for k in cell_map: | |
plotter(cell_map[k], x, y, k) | |
x = x + 1 | |
else: | |
print('wtf') | |
ax.axis('off') | |
plt.gca().invert_yaxis() | |
if img_file: | |
plt.savefig(img_file) | |
# - | |
# Define the colour map for different cell types: | |
VIS_COLOUR_MAP = {'markdown':'cornflowerblue','code':'pink'} | |
LINE_WIDTH = 160 | |
# The following function will find one or more notebooks on a path and generate cell maps for each of them. All the cell maps are then passed for visualisation on the same canvas. | |
# + | |
import nbformat | |
import os | |
import textwrap | |
def nb_vis_parse_nb(path, img_file='', linewidth = 5, w=20, **kwargs): | |
#gap=None, gap_boost=1, gap): | |
"""Parse one or more notebooks on a path.""" | |
def _count_screen_lines(txt, width=LINE_WIDTH): | |
"""Count the number of screen lines that an overflowing text line takes up.""" | |
ll = txt.split('\n') | |
_ll = [] | |
for l in ll: | |
#Model screen flow: split a line if it is more than `width` characters long | |
_ll=_ll+textwrap.wrap(l, width) | |
n_screen_lines = len(_ll) | |
return n_screen_lines | |
def _nb_vis_parse_nb(fn): | |
"""Parse a notebook and generate the nb_vis cell map for it.""" | |
cell_map = [] | |
_fn, fn_ext = os.path.splitext(fn) | |
if not fn_ext=='.ipynb' or not os.path.isfile(fn): | |
return cell_map | |
with open(fn,'r') as f: | |
nb = nbformat.reads(f.read(), as_version=4) | |
for cell in nb.cells: | |
cell_map.append((_count_screen_lines(cell['source']), VIS_COLOUR_MAP[cell['cell_type']])) | |
return cell_map | |
def _dir_walker(path, exclude = 'default'): | |
"""Profile all the notebooks in a specific directory and in any child directories.""" | |
if exclude == 'default': | |
exclude_paths = ['.ipynb_checkpoints', '.git', '.ipynb', '__MACOSX'] | |
else: | |
#If we set exclude, we need to pass it as a list | |
exclude_paths = exclude | |
nb_multidir_cell_map = {} | |
for _path, dirs, files in os.walk(path): | |
#Start walking... | |
#If we're in a directory that is not excluded... | |
if not set(exclude_paths).intersection(set(_path.split('/'))): | |
#Profile that directory... | |
for _f in files: | |
fn = os.path.join(_path, _f) | |
cell_map = _nb_vis_parse_nb(fn) | |
if cell_map: | |
nb_multidir_cell_map[fn] = cell_map | |
return nb_multidir_cell_map | |
if os.path.isdir(path): | |
cell_map = _dir_walker(path) | |
else: | |
cell_map = _nb_vis_parse_nb(path) | |
nb_vis(cell_map, img_file, linewidth, w, **kwargs) | |
# - | |
# Test a single notebook mapper: | |
TEST_NOTEBOOK = 'Notebook_profile_test.ipynb' | |
nb_vis_parse_nb(TEST_NOTEBOOK) | |
# Test a plot of multiple notebooks down a path: | |
nb_vis_parse_nb('../Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 02 Notebooks', | |
linewidth=10, gap_colour='white', gap=1, img_file='test-nbvis.png') | |
nb_vis_parse_nb('../Documents/GitHub/tm351-undercertainty/notebooks/tm351/Part 02 Notebooks', | |
linewidth=10, gap=0, img_file='test-nbvis.png') | |
# Can we see the saved test file? | |
# | |
#  | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment