Created
September 11, 2014 05:58
-
-
Save satyamsatyarthi/26f3d560200f8537f648 to your computer and use it in GitHub Desktop.
Convert IPython notebooks to Jekyll compatiblem markdown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json #for reading .ipynb | |
import sys #python version for proper unicode support | |
import os #directory and path operations | |
import errno #safe directory creation | |
import argparse #command line args | |
import re #latex to liquid | |
parser = argparse.ArgumentParser(description='Convert IPython notebooks to Jekyll Markdown with Liquid Tags.') | |
parser.add_argument('filename', metavar='file', type=str, nargs=1, help='full path to .ipynb file') | |
parser.add_argument('--f', action='store_true', help='overwrite existing files without warning.') | |
inputs = parser.parse_args() | |
imgBaseDir = 'images' #directory containing images (relative to base Jekyll directory) | |
autogenPostDir = '_autogen' #directory containing autogenerated posts (within _posts) | |
#figure out directory names and create autogen directories if necessary | |
fDir, fName = os.path.split(inputs.filename[0]) | |
fName = fName.replace('.ipynb', '') | |
fullPath = os.path.dirname(os.path.realpath(__file__)) | |
jekyllDir = os.path.split(fullPath)[0] | |
imgFullDir = os.path.join(jekyllDir, imgBaseDir, fName) | |
mdFName = os.path.join(jekyllDir, '_posts', autogenPostDir, fName + '.md') | |
def makedirSafe(dirPath): | |
try: | |
os.makedirs(dirPath) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
makedirSafe(os.path.join(jekyllDir, '_posts', autogenPostDir)) #create autogen directory if needed | |
#output file already exists | |
if(os.path.exists(mdFName) and not inputs.f): | |
raise IOError('The markdown file to be generated already exists.\n Run with --f to overwrite.') | |
#for Unicode support across python verions | |
if sys.version_info[0] < 3: | |
import io | |
_open_func_bak = open # Make a back up, just in case | |
open = io.open | |
#expressions to convert to Liquid math tags | |
displayRe = re.compile('([^\$]*)\$\$([^\$]+)\$\$([^\$]*)') | |
inlineRe = re.compile('([^\$]*)\$([^\$]+)\$([^\$]*)') | |
#Code cells have inputs and outputs | |
def processCodeCell(codeCell): | |
result = {'text': '', 'images': []} | |
if(codeCell['input'] != []): | |
result['text'] = '{{% highlight {language} %}}\n'.format(language=codeCell['language']) | |
for line in codeCell['input']: | |
result['text'] += line | |
result['text'] += '\n{% endhighlight %}' | |
result['text'] += '\n' | |
if(codeCell['outputs'] != []): | |
for out in codeCell['outputs']: | |
if(out['output_type'] == 'pyout'): | |
if(type(out) is dict and out.has_key('latex')): | |
for line in out['latex']: | |
line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line) | |
line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line) | |
result['text'] += line | |
else: | |
for line in out['text']: | |
result['text'] += line | |
else: | |
if(out['output_type'] == 'display_data'): | |
if(out.has_key('svg')): | |
result['images'].append({'format': 'svg', 'imgData': out['svg']}) | |
return result | |
#Markdown cells only have md text | |
def processMarkdownCell(mdCell): | |
result = '' | |
for line in mdCell['source']: | |
line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line) | |
line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line) | |
result += line | |
return result | |
#%% | |
with open(inputs.filename[0], 'r') as f: | |
rd = json.load(f) | |
#only need to do one worksheet for now. | |
wb = rd['worksheets'][0] | |
#%% | |
out = [] | |
nImages = 0 | |
imgDirString = os.path.join('{{site.url}}', imgBaseDir, fName, '') | |
for cell in wb['cells']: | |
if(cell['cell_type'] == 'code'): | |
codeCellOut = processCodeCell(cell) | |
cellText = codeCellOut['text'] | |
if(codeCellOut['images'] != []): | |
#at least one image found | |
if(nImages == 0): | |
makedirSafe(imgFullDir) | |
for image in codeCellOut['images']: | |
imgName = fName + str(nImages) + '.' + image['format'] | |
imgFullPath = os.path.join(imgFullDir, imgName) | |
if(os.path.exists(imgFullPath) and not inputs.f): | |
raise IOError('Image file: ' + fName + str(nImages) + '.' + image['format'] + ' already exists.\n Run with --f to overwrite') | |
#svg files are just xml so we can generate the output | |
if(image['format'] == 'svg'): | |
with open(imgFullPath, 'w+', encoding='utf-8') as f: | |
for line in image['imgData']: | |
f.write(line) | |
else: | |
raise TypeError('You encountered a ' + image['format'] + ' file. I don\'t know how to deal with that.') | |
#generate image tag | |
cellText += u'\n![' + imgName + '](' + imgDirString + imgName + ')' | |
nImages += 1 | |
out.append(cellText + u'\n') | |
else: | |
out.append(processMarkdownCell(cell)+'\n') | |
#write markdown file | |
with open(mdFName, 'w', encoding='utf-8') as f: | |
for cell in out: | |
f.write(cell) | |
f.write(u'\n') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment