satyamsatyarthi · September 11, 2014 05:58
diff --git a/ipynb2jekyll.py b/ipynb2jekyll.py
 import json #for reading .ipynb
 import sys #python version for proper unicode support
 import os #directory and path operations
 import errno #safe directory creation
 import argparse #command line args
 import re #latex to liquid 

 parser = argparse.ArgumentParser(description='Convert IPython notebooks to Jekyll Markdown with Liquid Tags.')
 parser.add_argument('filename', metavar='file', type=str, nargs=1, help='full path to .ipynb file')
 parser.add_argument('--f', action='store_true', help='overwrite existing files without warning.')
 inputs = parser.parse_args()


 imgBaseDir = 'images' #directory containing images (relative to  base Jekyll directory)
 autogenPostDir = '_autogen' #directory containing autogenerated posts (within _posts)

 #figure out directory names and create autogen directories if necessary
 fDir, fName = os.path.split(inputs.filename[0])
 fName = fName.replace('.ipynb', '')
 fullPath = os.path.dirname(os.path.realpath(__file__))
 jekyllDir = os.path.split(fullPath)[0]
 imgFullDir = os.path.join(jekyllDir, imgBaseDir, fName)
 mdFName = os.path.join(jekyllDir, '_posts', autogenPostDir, fName + '.md')

 def makedirSafe(dirPath):
    try:
        os.makedirs(dirPath)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

 makedirSafe(os.path.join(jekyllDir, '_posts', autogenPostDir)) #create autogen directory if needed
    
 #output file already exists
 if(os.path.exists(mdFName) and not inputs.f):
    raise IOError('The markdown file to be generated already exists.\n Run with --f to overwrite.')

 #for Unicode support across python verions 
 if sys.version_info[0] < 3:
    import io
    _open_func_bak = open # Make a back up, just in case
    open = io.open

 #expressions to convert to Liquid math tags
 displayRe = re.compile('([^\$]*)\$\$([^\$]+)\$\$([^\$]*)')
 inlineRe = re.compile('([^\$]*)\$([^\$]+)\$([^\$]*)')
    
 #Code cells have inputs and outputs
 def processCodeCell(codeCell):
    result = {'text': '', 'images': []}
    if(codeCell['input'] != []):
        result['text'] = '{{% highlight {language} %}}\n'.format(language=codeCell['language'])
        for line in codeCell['input']:
            result['text'] += line
        result['text'] += '\n{% endhighlight %}'
        result['text'] += '\n'
        if(codeCell['outputs'] != []):
            for out in codeCell['outputs']:
                if(out['output_type'] == 'pyout'):
                    if(type(out) is dict and out.has_key('latex')):
                        for line in out['latex']:
                            line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
                            line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
                            result['text'] += line
                    else:
                        for line in out['text']:
                            result['text'] += line
                else:
                    if(out['output_type'] == 'display_data'):
                        if(out.has_key('svg')):
                            result['images'].append({'format': 'svg', 'imgData': out['svg']})
        
    return result


 #Markdown cells only have md text
 def processMarkdownCell(mdCell):
    result = ''
    for line in mdCell['source']:
        line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
        line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
        result += line
        
    return result

 #%%
 with open(inputs.filename[0], 'r') as f:
    rd = json.load(f)


 #only need to do one worksheet for now. 
 wb = rd['worksheets'][0]

 #%%
 out = []
 nImages = 0
 imgDirString = os.path.join('{{site.url}}', imgBaseDir, fName, '')

 for cell in wb['cells']:
    if(cell['cell_type'] == 'code'):
        codeCellOut = processCodeCell(cell)
        cellText = codeCellOut['text']
        if(codeCellOut['images'] != []):
            #at least one image found
            if(nImages == 0):
                makedirSafe(imgFullDir)
                        
            for image in codeCellOut['images']:
                imgName = fName + str(nImages) + '.' + image['format']
                imgFullPath = os.path.join(imgFullDir, imgName)
                
                if(os.path.exists(imgFullPath) and not inputs.f):
                    raise IOError('Image file: ' + fName + str(nImages) + '.' + image['format'] + ' already exists.\n Run with --f to overwrite')
                
                #svg files are just xml so we can generate the output
                if(image['format'] == 'svg'):    
                    with open(imgFullPath, 'w+', encoding='utf-8') as f:
                        for line in image['imgData']:
                            f.write(line)
                else:
                    raise TypeError('You encountered a ' + image['format'] + ' file. I don\'t know how to deal with that.')
                
                #generate image tag
                cellText += u'\n![' + imgName + '](' + imgDirString + imgName + ')'
                nImages += 1        
        out.append(cellText + u'\n')

    else:
        out.append(processMarkdownCell(cell)+'\n')



 #write markdown file
 with open(mdFName, 'w', encoding='utf-8') as f:
    for cell in out:
        f.write(cell)
        f.write(u'\n')
	import json #for reading .ipynb
	import sys #python version for proper unicode support
	import os #directory and path operations
	import errno #safe directory creation
	import argparse #command line args
	import re #latex to liquid

	parser = argparse.ArgumentParser(description='Convert IPython notebooks to Jekyll Markdown with Liquid Tags.')
	parser.add_argument('filename', metavar='file', type=str, nargs=1, help='full path to .ipynb file')
	parser.add_argument('--f', action='store_true', help='overwrite existing files without warning.')
	inputs = parser.parse_args()


	imgBaseDir = 'images' #directory containing images (relative to base Jekyll directory)
	autogenPostDir = '_autogen' #directory containing autogenerated posts (within _posts)

	#figure out directory names and create autogen directories if necessary
	fDir, fName = os.path.split(inputs.filename[0])
	fName = fName.replace('.ipynb', '')
	fullPath = os.path.dirname(os.path.realpath(__file__))
	jekyllDir = os.path.split(fullPath)[0]
	imgFullDir = os.path.join(jekyllDir, imgBaseDir, fName)
	mdFName = os.path.join(jekyllDir, '_posts', autogenPostDir, fName + '.md')

	def makedirSafe(dirPath):
	try:
	os.makedirs(dirPath)
	except OSError as exception:
	if exception.errno != errno.EEXIST:
	raise

	makedirSafe(os.path.join(jekyllDir, '_posts', autogenPostDir)) #create autogen directory if needed

	#output file already exists
	if(os.path.exists(mdFName) and not inputs.f):
	raise IOError('The markdown file to be generated already exists.\n Run with --f to overwrite.')

	#for Unicode support across python verions
	if sys.version_info[0] < 3:
	import io
	_open_func_bak = open # Make a back up, just in case
	open = io.open

	#expressions to convert to Liquid math tags
	displayRe = re.compile('([^\$])\$\$([^\$]+)\$\$([^\$])')
	inlineRe = re.compile('([^\$])\$([^\$]+)\$([^\$])')

	#Code cells have inputs and outputs
	def processCodeCell(codeCell):
	result = {'text': '', 'images': []}
	if(codeCell['input'] != []):
	result['text'] = '{{% highlight {language} %}}\n'.format(language=codeCell['language'])
	for line in codeCell['input']:
	result['text'] += line
	result['text'] += '\n{% endhighlight %}'
	result['text'] += '\n'
	if(codeCell['outputs'] != []):
	for out in codeCell['outputs']:
	if(out['output_type'] == 'pyout'):
	if(type(out) is dict and out.has_key('latex')):
	for line in out['latex']:
	line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
	line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
	result['text'] += line
	else:
	for line in out['text']:
	result['text'] += line
	else:
	if(out['output_type'] == 'display_data'):
	if(out.has_key('svg')):
	result['images'].append({'format': 'svg', 'imgData': out['svg']})

	return result


	#Markdown cells only have md text
	def processMarkdownCell(mdCell):
	result = ''
	for line in mdCell['source']:
	line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
	line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
	result += line

	return result

	#%%
	with open(inputs.filename[0], 'r') as f:
	rd = json.load(f)


	#only need to do one worksheet for now.
	wb = rd['worksheets'][0]

	#%%
	out = []
	nImages = 0
	imgDirString = os.path.join('{{site.url}}', imgBaseDir, fName, '')

	for cell in wb['cells']:
	if(cell['cell_type'] == 'code'):
	codeCellOut = processCodeCell(cell)
	cellText = codeCellOut['text']
	if(codeCellOut['images'] != []):
	#at least one image found
	if(nImages == 0):
	makedirSafe(imgFullDir)

	for image in codeCellOut['images']:
	imgName = fName + str(nImages) + '.' + image['format']
	imgFullPath = os.path.join(imgFullDir, imgName)

	if(os.path.exists(imgFullPath) and not inputs.f):
	raise IOError('Image file: ' + fName + str(nImages) + '.' + image['format'] + ' already exists.\n Run with --f to overwrite')

	#svg files are just xml so we can generate the output
	if(image['format'] == 'svg'):
	with open(imgFullPath, 'w+', encoding='utf-8') as f:
	for line in image['imgData']:
	f.write(line)
	else:
	raise TypeError('You encountered a ' + image['format'] + ' file. I don\'t know how to deal with that.')

	#generate image tag
	cellText += u'\n![' + imgName + '](' + imgDirString + imgName + ')'
	nImages += 1
	out.append(cellText + u'\n')

	else:
	out.append(processMarkdownCell(cell)+'\n')



	#write markdown file
	with open(mdFName, 'w', encoding='utf-8') as f:
	for cell in out:
	f.write(cell)
	f.write(u'\n')