Skip to content

Instantly share code, notes, and snippets.

@kangwonlee
Created March 8, 2017 07:21
Show Gist options
  • Save kangwonlee/1d5c03314acd256051e1c0352de70c81 to your computer and use it in GitHub Desktop.
Save kangwonlee/1d5c03314acd256051e1c0352de70c81 to your computer and use it in GitHub Desktop.
Unpack iPython notebook
'''unpack ipynb'''
import os
from pprint import pprint
import re
import sys
def dont_do_anything(fw, cell_dict):
print "won't do anything about", cell_dict["cell_type"]
print cell_dict
def handle_heading(fw, cell_dict):
''''''
'''
sample heading cell
{'cell_type': 'heading',
'level': 1,
'metadata': {},
'source': ['HYPOTHESIS TESTING EXERCISES - SOLUTION']}
'''
'''string to be written to file'''
output = ( '#' * 60 + '\n')
for line in cell_dict['source']:
output += '# ' + line + '\n'
output += '#'.ljust(60,'#')+ '\n'
fw.write(output)
return output
def handle_markdown(fw, cell_dict):
''''''
'''
{'cell_type': 'markdown',
'metadata': {},
'source': ["Verify the validity of Benford's law when applied to 1)
the population of a country; 2) the number of breast cancer
cases in each country.\n",
'\n',
'1. Collect a count of the first digits of all the numbers in the data sets\n',
"2. Use a statistical tests to compare the observed count to the one
expected by Benford's law"]}
'''
code_list = cell_dict['source']
fw.write('"""\n')
for code in code_list:
fw.write( code )
fw.write('\n"""\n')
def handle_code(fw, cell_dict):
''''''
'''
{'cell_type': 'code',
'collapsed': False,
'input': ['%matplotlib inline\n',
'\n',
'import numpy as np\n',
'import pandas as pd\n',
'import matplotlib.pyplot as plt\n',
'import statsmodels.api as sm\n',
'from scipy import stats'],
'language': 'python',
'metadata': {},
'outputs': [],
'prompt_number': 1}
'''
'''handle input cell'''
for code in cell_dict.get("input",[]):
'''magic command'''
if '%' == code[0]:
fw.write('#')
code_strip = code.strip()
if code_strip and ('?' == code_strip[-1]) and ('#' != code_strip[0]):
fw.write('help(')
fw.write(code.strip()[:-1])
fw.write(')\n')
else:
fw.write( code )
'''handle output cell'''
output = cell_dict.get("output", [])
for code in output:
fw.write('## ')
fw.write(code)
fw.write('\n')
fw.write('#'.ljust(20,'#'))
fw.write('\n\n')
handler = {'heading':handle_heading,
'code':handle_code,
'markdown':handle_markdown,
'raw':handle_markdown,
}
def unpack(filename):
''' filename '''
split_ext = os.path.splitext(filename)
if ".ipynb" != split_ext[1]:
filename = split_ext[0] + ".ipynb"
pyname = split_ext[0] + ".py"
if not os.path.exists(pyname):
fw = open(pyname, 'w')
''' read file '''
if os.path.exists(filename):
f = open(filename,'r'); txt = f.read(); f.close()
''' replace all triple double quotes to triple qutes to avoid
possible confusion '''
txt = txt.replace('"""', "'''")
''' decompose '''
false = False
true = True
d = eval(txt)
try:
worksheets = d.get('worksheets',[])
if worksheets:
for worksheet in worksheets:
cells = worksheet.get('cells', [])
if cells:
fw.write("from pylab import *\n")
for cell in cells:
# process cell, or don't do anyting
call_this = handler.get(cell['cell_type'],
dont_do_anything)
call_this(fw, cell)
# to present the result at least at the end
fw.write('print (" The presented result might be overlapping. ".center(60, "*"))')
fw.write("\nshow()\n")
except:
print filename
raise
fw.close()
if "__main__" == __name__:
txt = unpack(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment