Created
March 8, 2017 07:21
-
-
Save kangwonlee/1d5c03314acd256051e1c0352de70c81 to your computer and use it in GitHub Desktop.
Unpack iPython notebook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''unpack ipynb''' | |
import os | |
from pprint import pprint | |
import re | |
import sys | |
def dont_do_anything(fw, cell_dict): | |
print "won't do anything about", cell_dict["cell_type"] | |
print cell_dict | |
def handle_heading(fw, cell_dict): | |
'''''' | |
''' | |
sample heading cell | |
{'cell_type': 'heading', | |
'level': 1, | |
'metadata': {}, | |
'source': ['HYPOTHESIS TESTING EXERCISES - SOLUTION']} | |
''' | |
'''string to be written to file''' | |
output = ( '#' * 60 + '\n') | |
for line in cell_dict['source']: | |
output += '# ' + line + '\n' | |
output += '#'.ljust(60,'#')+ '\n' | |
fw.write(output) | |
return output | |
def handle_markdown(fw, cell_dict): | |
'''''' | |
''' | |
{'cell_type': 'markdown', | |
'metadata': {}, | |
'source': ["Verify the validity of Benford's law when applied to 1) | |
the population of a country; 2) the number of breast cancer | |
cases in each country.\n", | |
'\n', | |
'1. Collect a count of the first digits of all the numbers in the data sets\n', | |
"2. Use a statistical tests to compare the observed count to the one | |
expected by Benford's law"]} | |
''' | |
code_list = cell_dict['source'] | |
fw.write('"""\n') | |
for code in code_list: | |
fw.write( code ) | |
fw.write('\n"""\n') | |
def handle_code(fw, cell_dict): | |
'''''' | |
''' | |
{'cell_type': 'code', | |
'collapsed': False, | |
'input': ['%matplotlib inline\n', | |
'\n', | |
'import numpy as np\n', | |
'import pandas as pd\n', | |
'import matplotlib.pyplot as plt\n', | |
'import statsmodels.api as sm\n', | |
'from scipy import stats'], | |
'language': 'python', | |
'metadata': {}, | |
'outputs': [], | |
'prompt_number': 1} | |
''' | |
'''handle input cell''' | |
for code in cell_dict.get("input",[]): | |
'''magic command''' | |
if '%' == code[0]: | |
fw.write('#') | |
code_strip = code.strip() | |
if code_strip and ('?' == code_strip[-1]) and ('#' != code_strip[0]): | |
fw.write('help(') | |
fw.write(code.strip()[:-1]) | |
fw.write(')\n') | |
else: | |
fw.write( code ) | |
'''handle output cell''' | |
output = cell_dict.get("output", []) | |
for code in output: | |
fw.write('## ') | |
fw.write(code) | |
fw.write('\n') | |
fw.write('#'.ljust(20,'#')) | |
fw.write('\n\n') | |
handler = {'heading':handle_heading, | |
'code':handle_code, | |
'markdown':handle_markdown, | |
'raw':handle_markdown, | |
} | |
def unpack(filename): | |
''' filename ''' | |
split_ext = os.path.splitext(filename) | |
if ".ipynb" != split_ext[1]: | |
filename = split_ext[0] + ".ipynb" | |
pyname = split_ext[0] + ".py" | |
if not os.path.exists(pyname): | |
fw = open(pyname, 'w') | |
''' read file ''' | |
if os.path.exists(filename): | |
f = open(filename,'r'); txt = f.read(); f.close() | |
''' replace all triple double quotes to triple qutes to avoid | |
possible confusion ''' | |
txt = txt.replace('"""', "'''") | |
''' decompose ''' | |
false = False | |
true = True | |
d = eval(txt) | |
try: | |
worksheets = d.get('worksheets',[]) | |
if worksheets: | |
for worksheet in worksheets: | |
cells = worksheet.get('cells', []) | |
if cells: | |
fw.write("from pylab import *\n") | |
for cell in cells: | |
# process cell, or don't do anyting | |
call_this = handler.get(cell['cell_type'], | |
dont_do_anything) | |
call_this(fw, cell) | |
# to present the result at least at the end | |
fw.write('print (" The presented result might be overlapping. ".center(60, "*"))') | |
fw.write("\nshow()\n") | |
except: | |
print filename | |
raise | |
fw.close() | |
if "__main__" == __name__: | |
txt = unpack(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment