Skip to content

Instantly share code, notes, and snippets.

@jessstringham
Last active May 20, 2018 21:25
Show Gist options
  • Save jessstringham/1ff8ec24dafc0fcff15d4a0e88be074e to your computer and use it in GitHub Desktop.
Save jessstringham/1ff8ec24dafc0fcff15d4a0e88be074e to your computer and use it in GitHub Desktop.
'''This is hacky code I use to convert Jupyter notebooks into Jekyll posts.
Notebooks' first line should be
# Title
and `Title` will be used as the post's title.
I convert LaTeX into the form MathJax needs.
To make plots work, see `get_maybe_image_code` comment.
'''
import re
import os
import sys
import nbformat
NB_VERSION = 4
HEADER = '''---
title: '{}'
tags: [jupyter]
layout: post
mathjax: true
---
'''
GITHUB_PATH = 'https://github.com/jessstringham/blog/tree/master/notebooks'
ASSETS_PATH = 'assets'
SOURCE_CODE_PREFIX = '\n\n{% highlight python %}\n'
SOURCE_CODE_SUFFIX = '\n{% endhighlight %}\n\n'
def header_from_path(path, title):
'''Return the front-matter with the notebook title, and the first line of the
post as a link to my github notebook
'''
filename = path.split('/')[-1]
link = '[This post is also a Jupyter notebook!]({}/{})\n'.format(
GITHUB_PATH,
filename
)
return HEADER.format(title) + link
def replace_single_dollar_signs(text):
'''My Jekyll-setup renders $$\LaTeX$$ okay, but needs $\LaTeX$ to be replaced
with \\( \LaTeX \\). Hacky because I don't remember clever ways to use re or
whatever to handle symmetric braces.
'''
new_text = ''
state = 'text'
for letter in source:
if state == 'text':
if letter == '$':
state = 'one_dollar'
# don't add letter
else:
new_text += letter
elif state == 'one_dollar':
if letter == '$':
state = 'two_dollar'
# I suppressed single $, so add both here
new_text += '$$'
else:
state = 'in_state'
new_text += '\\\\( '
new_text += letter
elif state == 'in_state':
if letter == '$':
state = 'text'
new_text += ' \\\\)'
else:
new_text += letter
elif state == 'two_dollar':
if letter == '$':
state = 'exit_two_dollar'
new_text += letter
elif state == 'exit_two_dollar':
# this should be a $
state = 'text'
new_text += letter
return new_text
def check_image_link(image_filename):
if not os.path.exists(os.path.join(ASSETS_PATH, image_filename)):
print('remember to move {} to {}'.format(image_filename, ASSETS_PATH))
def process_md(source):
# check that image files shown in the notebook exist in my blog images folder
images = re.findall("images\/(.*?)[)\"']", source)
for image in images:
check_image_link(image)
# handle LaTeX
source = replace_single_dollar_signs(source)
# then update links to the blog images links
source = source.replace('images/', '/{}/'.format(ASSETS_PATH))
# update links to other notebooks in the folder to other posts
source = re.sub(
r'\((.*?)\.ipynb\)',
r'({% post_url \1 %})',
source
)
return source
def get_maybe_image_code(source):
'''Get images for the notebook output.
This is a little hacky: In the notebook, I add a function `maybe_save_plot`:
SAVE = True
def maybe_save_plot(filename):
if SAVE:
plt.tight_layout()
plt.savefig('images/' + filename, bbox_inches="tight")
I call this before the `plt.show()`s. I run the notebook and it saves all the plots.
When I generate the blog post, I append the image after codeblocks that contain
`maybe_save_plot`.
atm, my blog reads images from a separate assets folder, so I need to remember to manually
move it over. That's what the print statement is for.
'''
maybe_image = ''
for line in source.split('\n'):
save_plot_match = re.match("maybe_save_plot\('(.*)'\)", line)
if save_plot_match:
save_name = save_plot_match.group(1)
filename = save_name + '.png'
check_image_link(filename)
maybe_image = '![](/assets/{})'.format(filename)
return maybe_image
def process_code(source):
maybe_image = get_maybe_image_code(source)
return SOURCE_CODE_PREFIX + source + SOURCE_CODE_SUFFIX + maybe_image + '\n'
def extract_title(source):
'''Grab the title out of the first line, like "something" from "# something"
Return the titles and the new first cell.
'''
lines = source.split('\n')
return lines[0][len('# '):], '\n'.join(lines[1:])
if __name__ == '__main__':
path = sys.argv[1]
output_path = sys.argv[2]
with open(path) as f:
nb = nbformat.read(f, NB_VERSION)
cells = nb['cells']
# Extract the title from the first cell and update the local representation of
# the cell's source
title, first_cell_source = extract_title(cells[0]['source'])
cells[0]['source'] = first_cell_source
result_lines = []
for cell in cells:
source = cell['source']
if source:
if cell['cell_type'] == 'markdown':
result_lines.append(process_md(source))
elif cell['cell_type'] == 'code':
result_lines.append(process_code(source))
else:
print('I don\'t know how to process cells of type {}'.format(cell['cell_type']))
result_lines = [header_from_path(path, title)] + result_lines
# write the post!
with open(output_path, 'w') as f:
f.write('\n'.join(result_lines))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment