Last active
May 20, 2018 21:25
-
-
Save jessstringham/1ff8ec24dafc0fcff15d4a0e88be074e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''This is hacky code I use to convert Jupyter notebooks into Jekyll posts. | |
Notebooks' first line should be | |
# Title | |
and `Title` will be used as the post's title. | |
I convert LaTeX into the form MathJax needs. | |
To make plots work, see `get_maybe_image_code` comment. | |
''' | |
import re | |
import os | |
import sys | |
import nbformat | |
NB_VERSION = 4 | |
HEADER = '''--- | |
title: '{}' | |
tags: [jupyter] | |
layout: post | |
mathjax: true | |
--- | |
''' | |
GITHUB_PATH = 'https://github.com/jessstringham/blog/tree/master/notebooks' | |
ASSETS_PATH = 'assets' | |
SOURCE_CODE_PREFIX = '\n\n{% highlight python %}\n' | |
SOURCE_CODE_SUFFIX = '\n{% endhighlight %}\n\n' | |
def header_from_path(path, title): | |
'''Return the front-matter with the notebook title, and the first line of the | |
post as a link to my github notebook | |
''' | |
filename = path.split('/')[-1] | |
link = '[This post is also a Jupyter notebook!]({}/{})\n'.format( | |
GITHUB_PATH, | |
filename | |
) | |
return HEADER.format(title) + link | |
def replace_single_dollar_signs(text): | |
'''My Jekyll-setup renders $$\LaTeX$$ okay, but needs $\LaTeX$ to be replaced | |
with \\( \LaTeX \\). Hacky because I don't remember clever ways to use re or | |
whatever to handle symmetric braces. | |
''' | |
new_text = '' | |
state = 'text' | |
for letter in source: | |
if state == 'text': | |
if letter == '$': | |
state = 'one_dollar' | |
# don't add letter | |
else: | |
new_text += letter | |
elif state == 'one_dollar': | |
if letter == '$': | |
state = 'two_dollar' | |
# I suppressed single $, so add both here | |
new_text += '$$' | |
else: | |
state = 'in_state' | |
new_text += '\\\\( ' | |
new_text += letter | |
elif state == 'in_state': | |
if letter == '$': | |
state = 'text' | |
new_text += ' \\\\)' | |
else: | |
new_text += letter | |
elif state == 'two_dollar': | |
if letter == '$': | |
state = 'exit_two_dollar' | |
new_text += letter | |
elif state == 'exit_two_dollar': | |
# this should be a $ | |
state = 'text' | |
new_text += letter | |
return new_text | |
def check_image_link(image_filename): | |
if not os.path.exists(os.path.join(ASSETS_PATH, image_filename)): | |
print('remember to move {} to {}'.format(image_filename, ASSETS_PATH)) | |
def process_md(source): | |
# check that image files shown in the notebook exist in my blog images folder | |
images = re.findall("images\/(.*?)[)\"']", source) | |
for image in images: | |
check_image_link(image) | |
# handle LaTeX | |
source = replace_single_dollar_signs(source) | |
# then update links to the blog images links | |
source = source.replace('images/', '/{}/'.format(ASSETS_PATH)) | |
# update links to other notebooks in the folder to other posts | |
source = re.sub( | |
r'\((.*?)\.ipynb\)', | |
r'({% post_url \1 %})', | |
source | |
) | |
return source | |
def get_maybe_image_code(source): | |
'''Get images for the notebook output. | |
This is a little hacky: In the notebook, I add a function `maybe_save_plot`: | |
SAVE = True | |
def maybe_save_plot(filename): | |
if SAVE: | |
plt.tight_layout() | |
plt.savefig('images/' + filename, bbox_inches="tight") | |
I call this before the `plt.show()`s. I run the notebook and it saves all the plots. | |
When I generate the blog post, I append the image after codeblocks that contain | |
`maybe_save_plot`. | |
atm, my blog reads images from a separate assets folder, so I need to remember to manually | |
move it over. That's what the print statement is for. | |
''' | |
maybe_image = '' | |
for line in source.split('\n'): | |
save_plot_match = re.match("maybe_save_plot\('(.*)'\)", line) | |
if save_plot_match: | |
save_name = save_plot_match.group(1) | |
filename = save_name + '.png' | |
check_image_link(filename) | |
maybe_image = '![](/assets/{})'.format(filename) | |
return maybe_image | |
def process_code(source): | |
maybe_image = get_maybe_image_code(source) | |
return SOURCE_CODE_PREFIX + source + SOURCE_CODE_SUFFIX + maybe_image + '\n' | |
def extract_title(source): | |
'''Grab the title out of the first line, like "something" from "# something" | |
Return the titles and the new first cell. | |
''' | |
lines = source.split('\n') | |
return lines[0][len('# '):], '\n'.join(lines[1:]) | |
if __name__ == '__main__': | |
path = sys.argv[1] | |
output_path = sys.argv[2] | |
with open(path) as f: | |
nb = nbformat.read(f, NB_VERSION) | |
cells = nb['cells'] | |
# Extract the title from the first cell and update the local representation of | |
# the cell's source | |
title, first_cell_source = extract_title(cells[0]['source']) | |
cells[0]['source'] = first_cell_source | |
result_lines = [] | |
for cell in cells: | |
source = cell['source'] | |
if source: | |
if cell['cell_type'] == 'markdown': | |
result_lines.append(process_md(source)) | |
elif cell['cell_type'] == 'code': | |
result_lines.append(process_code(source)) | |
else: | |
print('I don\'t know how to process cells of type {}'.format(cell['cell_type'])) | |
result_lines = [header_from_path(path, title)] + result_lines | |
# write the post! | |
with open(output_path, 'w') as f: | |
f.write('\n'.join(result_lines)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment