kangwonlee · March 8, 2017 07:21
diff --git a/unpack_ipynb.py b/unpack_ipynb.py
 '''unpack ipynb'''
 import os
 from pprint import pprint
 import re
 import sys


 def dont_do_anything(fw, cell_dict):
    print "won't do anything about", cell_dict["cell_type"]
    print cell_dict


 def handle_heading(fw, cell_dict):
    ''''''
    '''
    sample heading cell
    {'cell_type': 'heading',
     'level': 1,
     'metadata': {},
     'source': ['HYPOTHESIS TESTING EXERCISES - SOLUTION']}
    '''
    
    '''string to be written to file'''
    output = ( '#' * 60 + '\n')
    for line in cell_dict['source']:
        output += '# ' + line + '\n'
    output += '#'.ljust(60,'#')+ '\n'
    
    fw.write(output)
    return output


 def handle_markdown(fw, cell_dict):
    ''''''
    '''
    {'cell_type': 'markdown',
     'metadata': {},
     'source': ["Verify the validity of Benford's law when applied to 1) 
                the population of a country; 2) the number of breast cancer 
                cases in each country.\n",
      '\n',
      '1. Collect a count of the first digits of all the numbers in the data sets\n',
      "2. Use a statistical tests to compare the observed count to the one 
      expected by Benford's law"]}
    '''
    code_list = cell_dict['source']
    
    fw.write('"""\n')
    for code in code_list:
        fw.write( code )
    fw.write('\n"""\n')


 def handle_code(fw, cell_dict):
    ''''''
    '''
    {'cell_type': 'code',
     'collapsed': False,
     'input': ['%matplotlib inline\n',
      '\n',
      'import numpy as np\n',
      'import pandas as pd\n',
      'import matplotlib.pyplot as plt\n',
      'import statsmodels.api as sm\n',
      'from scipy import stats'],
     'language': 'python',
     'metadata': {},
     'outputs': [],
     'prompt_number': 1}
    '''
    
    '''handle input cell'''
    for code in cell_dict.get("input",[]):
        '''magic command'''
        if '%' == code[0]:
            fw.write('#')
        code_strip = code.strip()
        if code_strip and ('?' == code_strip[-1]) and ('#' != code_strip[0]):
            fw.write('help(')
            fw.write(code.strip()[:-1])
            fw.write(')\n')
        else:
            fw.write( code )
        
    '''handle output cell'''
    output = cell_dict.get("output", [])
    for code in output:
        fw.write('## ')
        fw.write(code)
        
    fw.write('\n')
    fw.write('#'.ljust(20,'#'))
    fw.write('\n\n')


 handler = {'heading':handle_heading,
           'code':handle_code,
           'markdown':handle_markdown,
           'raw':handle_markdown,
            }


 def unpack(filename):
    
    ''' filename '''
    split_ext = os.path.splitext(filename)
    
    if ".ipynb" != split_ext[1]:
        filename = split_ext[0] + ".ipynb"
    pyname = split_ext[0] + ".py"
    
    if not os.path.exists(pyname):
    
        fw = open(pyname, 'w')
        
        ''' read file '''
        if os.path.exists(filename):
            f = open(filename,'r'); txt = f.read(); f.close()
            
            
            ''' replace all triple double quotes to triple qutes to avoid 
            possible confusion '''
            txt = txt.replace('"""', "'''")
            
            ''' decompose '''
            false = False
            true = True
            d = eval(txt)
            
            try:
                worksheets = d.get('worksheets',[])
                if worksheets:
                    for worksheet in worksheets:
                        cells = worksheet.get('cells', [])
                        if cells:
                            fw.write("from pylab import *\n")
                            for cell in cells:
                                # process cell, or don't do anyting
                                call_this = handler.get(cell['cell_type'], 
                                                        dont_do_anything)
                                call_this(fw, cell)
                            # to present the result at least at the end
                            fw.write('print (" The presented result might be overlapping. ".center(60, "*"))')
                            fw.write("\nshow()\n")
            except:
                print filename
                raise
    
        fw.close()


 if "__main__" == __name__:
    txt = unpack(sys.argv[1])
	'''unpack ipynb'''
	import os
	from pprint import pprint
	import re
	import sys


	def dont_do_anything(fw, cell_dict):
	print "won't do anything about", cell_dict["cell_type"]
	print cell_dict


	def handle_heading(fw, cell_dict):
	''''''
	'''
	sample heading cell
	{'cell_type': 'heading',
	'level': 1,
	'metadata': {},
	'source': ['HYPOTHESIS TESTING EXERCISES - SOLUTION']}
	'''

	'''string to be written to file'''
	output = ( '#' * 60 + '\n')
	for line in cell_dict['source']:
	output += '# ' + line + '\n'
	output += '#'.ljust(60,'#')+ '\n'

	fw.write(output)
	return output


	def handle_markdown(fw, cell_dict):
	''''''
	'''
	{'cell_type': 'markdown',
	'metadata': {},
	'source': ["Verify the validity of Benford's law when applied to 1)
	the population of a country; 2) the number of breast cancer
	cases in each country.\n",
	'\n',
	'1. Collect a count of the first digits of all the numbers in the data sets\n',
	"2. Use a statistical tests to compare the observed count to the one
	expected by Benford's law"]}
	'''
	code_list = cell_dict['source']

	fw.write('"""\n')
	for code in code_list:
	fw.write( code )
	fw.write('\n"""\n')


	def handle_code(fw, cell_dict):
	''''''
	'''
	{'cell_type': 'code',
	'collapsed': False,
	'input': ['%matplotlib inline\n',
	'\n',
	'import numpy as np\n',
	'import pandas as pd\n',
	'import matplotlib.pyplot as plt\n',
	'import statsmodels.api as sm\n',
	'from scipy import stats'],
	'language': 'python',
	'metadata': {},
	'outputs': [],
	'prompt_number': 1}
	'''

	'''handle input cell'''
	for code in cell_dict.get("input",[]):
	'''magic command'''
	if '%' == code[0]:
	fw.write('#')
	code_strip = code.strip()
	if code_strip and ('?' == code_strip[-1]) and ('#' != code_strip[0]):
	fw.write('help(')
	fw.write(code.strip()[:-1])
	fw.write(')\n')
	else:
	fw.write( code )

	'''handle output cell'''
	output = cell_dict.get("output", [])
	for code in output:
	fw.write('## ')
	fw.write(code)

	fw.write('\n')
	fw.write('#'.ljust(20,'#'))
	fw.write('\n\n')


	handler = {'heading':handle_heading,
	'code':handle_code,
	'markdown':handle_markdown,
	'raw':handle_markdown,
	}


	def unpack(filename):

	''' filename '''
	split_ext = os.path.splitext(filename)

	if ".ipynb" != split_ext[1]:
	filename = split_ext[0] + ".ipynb"
	pyname = split_ext[0] + ".py"

	if not os.path.exists(pyname):

	fw = open(pyname, 'w')

	''' read file '''
	if os.path.exists(filename):
	f = open(filename,'r'); txt = f.read(); f.close()


	''' replace all triple double quotes to triple qutes to avoid
	possible confusion '''
	txt = txt.replace('"""', "'''")

	''' decompose '''
	false = False
	true = True
	d = eval(txt)

	try:
	worksheets = d.get('worksheets',[])
	if worksheets:
	for worksheet in worksheets:
	cells = worksheet.get('cells', [])
	if cells:
	fw.write("from pylab import *\n")
	for cell in cells:
	# process cell, or don't do anyting
	call_this = handler.get(cell['cell_type'],
	dont_do_anything)
	call_this(fw, cell)
	# to present the result at least at the end
	fw.write('print (" The presented result might be overlapping. ".center(60, "*"))')
	fw.write("\nshow()\n")
	except:
	print filename
	raise

	fw.close()


	if "__main__" == __name__:
	txt = unpack(sys.argv[1])
No results found