jvanasco · October 24, 2016 16:23
diff --git a/import_logger.py b/import_logger.py
 if True:
    print("===> installing import_logger_orverride")
    import os
    import psutil
    import pdb
    import pprint
    import __builtin__
    import logging
    import sys

    # setup the memory vars
    _this_process = psutil.Process(os.getpid())
    _f_get_memory_info = _this_process.get_memory_info  # call is different on other versions of psutil
    GET_MEMORY = lambda: _f_get_memory_info()[0]

    # set up the dirs
    # we'll lot go `{CWD}/imports_parser/runs/{VERSION}` in which `VERSION` is 001, 002, etc
    REPORTS_DIR_BASE = os.path.join("imports_parser", "runs")
    if not os.path.exists(REPORTS_DIR_BASE):
        os.makedirs(REPORTS_DIR_BASE)
    dirs = [i for i in os.listdir(REPORTS_DIR_BASE)
            if os.path.isdir(os.path.join(REPORTS_DIR_BASE, i))
            ]
    max_dirs = len(dirs)
    REPORTS_DIR_RUN = os.path.join(REPORTS_DIR_BASE, "%03d" % max_dirs)
    print("===-  Logging to %s" % REPORTS_DIR_RUN)
    os.makedirs(REPORTS_DIR_RUN)
    writer_success = open(os.path.join(REPORTS_DIR_RUN, 'imports.txt'), 'a')
    writer_error = open(os.path.join(REPORTS_DIR_RUN, 'errors.txt'), 'a')

    # we need this still
    realimport = __builtin__.__import__

    # our override
    def import_logger_orverride(name, *args, **kwargs):
        _mem_start = GET_MEMORY()
        _package_name = name
        if len(args) == 4:
            _package_name = "%s.%s" % (name,
                                       str(args[2]).replace(',', '|'))
        # use sys._getframe, because the `inspect` module leaves a circular reference that won't clean up (even with an explicit delete)
        _frame = sys._getframe(1)
        try:
            _caller_file = _frame.f_locals['__file__']
        except:
            _caller_file = "<>"
        try:
            _imported = realimport(name, *args, **kwargs)
            _mem_finish = GET_MEMORY()
            _mem_growth = _mem_finish - _mem_start
            _line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish)
            writer_success.write(_line)
            return _imported
        except Exception as e:
            if isinstance(e, ImportError) and e.message.startswith("No module named"):
                _mem_finish = GET_MEMORY()
                _mem_growth = _mem_finish - _mem_start
                _line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish)
                writer_error.write(_line)
            raise
        finally:
            del _caller_file
            del _frame

    # install the override
    __builtin__.__import__ = import_logger_orverride
    print("<=== import_logger_orverride installed")
diff --git a/transformer.py b/transformer.py
 import os
 import pprint

 # manually change this, because I'm lazy.
 versions_dir = "runs/001/"

 # written by `import_logger.py`
 fname_imports = "imports.txt"
 fname_errors = "errors.txt"

 raw_data = open(os.path.join(versions_dir, fname_imports)).readlines()

 # first pass, to python
 data_formatted = []
 for (idx, row) in enumerate(raw_data):
    # input is """import|{imported}{caller},{growth},{pre},{post}"""
    if not row.startswith('import|'):
        continue
    row = row[7:]
    vs = [c.strip() for c in row.split(',')]
    # output is """{idx},{imported},{caller},{growth},{pre},{post}"""
    vs.insert(0, str(idx))
    data_formatted.append(vs)

 # second pass, calculate the max
 maxxed = float(data_formatted[-1][4])
 for row in data_formatted:
    # input is """{idx},{imported},{caller},{growth},{pre},{post}"""
    # output is """{idx},{imported},{caller},{growth},{pre},{post},{pct_growth},{pct_overall}"""
    as_percent_growth = '0'
    _growth = float(row[3]) if row[3] else 0
    if _growth:
        as_percent_growth = str((_growth / maxxed)*100)
    row.append(as_percent_growth)
    as_percent_overall = str((float(row[5]) / maxxed)*100)
    row.append(as_percent_overall)

 # okay now let's try and figure out the level
 current_max_mem = 0
 current_recursion = 0
 seen = {}
 bypre = {}
 bypost = {}
 callers = {}
 bys = {}
 for row in data_formatted:
    row_id = int(row[0])
    row_name = row[1]
    row_caller = row[2]
    row_growth = int(row[3]) if row[3] else 0
    row_pre = int(row[4])
    row_post = int(row[5])

    if row_caller not in callers:
        callers[row_caller] = set({})
    callers[row_caller].add(row_name)
    
    if row_name not in bys:
        bys[row_name] = set({})
    bys[row_name].add(row_caller)
    
    
    if row_pre not in bypre:
        bypre[row_pre] = []
    bypre[row_pre].append((row_id, row_pre, row_post))

    if row_post not in bypost:
        bypost[row_post] = []
    bypost[row_post].append((row_id, row_pre, row_post))

 open('%s/callers.txt' % versions_dir, 'w').write(pprint.pformat(callers))
 open('%s/bys.txt' % versions_dir, 'w').write(pprint.pformat(bys))

 data_formatted.insert(0, ['idx', 'imported', 'caller', 'growth', 'pre', 'post', 'pct-growth', 'pct-overall'])
 data_formatted = [','.join(row) for row in data_formatted]
 open('%s/imports-processed.csv' % versions_dir, 'w').write('\n'.join(data_formatted))
	if True:
	print("===> installing import_logger_orverride")
	import os
	import psutil
	import pdb
	import pprint
	import __builtin__
	import logging
	import sys

	# setup the memory vars
	_this_process = psutil.Process(os.getpid())
	_f_get_memory_info = _this_process.get_memory_info # call is different on other versions of psutil
	GET_MEMORY = lambda: _f_get_memory_info()[0]

	# set up the dirs
	# we'll lot go `{CWD}/imports_parser/runs/{VERSION}` in which `VERSION` is 001, 002, etc
	REPORTS_DIR_BASE = os.path.join("imports_parser", "runs")
	if not os.path.exists(REPORTS_DIR_BASE):
	os.makedirs(REPORTS_DIR_BASE)
	dirs = [i for i in os.listdir(REPORTS_DIR_BASE)
	if os.path.isdir(os.path.join(REPORTS_DIR_BASE, i))
	]
	max_dirs = len(dirs)
	REPORTS_DIR_RUN = os.path.join(REPORTS_DIR_BASE, "%03d" % max_dirs)
	print("===- Logging to %s" % REPORTS_DIR_RUN)
	os.makedirs(REPORTS_DIR_RUN)
	writer_success = open(os.path.join(REPORTS_DIR_RUN, 'imports.txt'), 'a')
	writer_error = open(os.path.join(REPORTS_DIR_RUN, 'errors.txt'), 'a')

	# we need this still
	realimport = __builtin__.__import__

	# our override
	def import_logger_orverride(name, args, *kwargs):
	_mem_start = GET_MEMORY()
	_package_name = name
	if len(args) == 4:
	_package_name = "%s.%s" % (name,
	str(args[2]).replace(',', '\|'))
	# use sys._getframe, because the `inspect` module leaves a circular reference that won't clean up (even with an explicit delete)
	_frame = sys._getframe(1)
	try:
	_caller_file = _frame.f_locals['__file__']
	except:
	_caller_file = "<>"
	try:
	_imported = realimport(name, args, *kwargs)
	_mem_finish = GET_MEMORY()
	_mem_growth = _mem_finish - _mem_start
	_line = "import\|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish)
	writer_success.write(_line)
	return _imported
	except Exception as e:
	if isinstance(e, ImportError) and e.message.startswith("No module named"):
	_mem_finish = GET_MEMORY()
	_mem_growth = _mem_finish - _mem_start
	_line = "import\|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish)
	writer_error.write(_line)
	raise
	finally:
	del _caller_file
	del _frame

	# install the override
	__builtin__.__import__ = import_logger_orverride
	print("<=== import_logger_orverride installed")
	import os
	import pprint

	# manually change this, because I'm lazy.
	versions_dir = "runs/001/"

	# written by `import_logger.py`
	fname_imports = "imports.txt"
	fname_errors = "errors.txt"

	raw_data = open(os.path.join(versions_dir, fname_imports)).readlines()

	# first pass, to python
	data_formatted = []
	for (idx, row) in enumerate(raw_data):
	# input is """import\|{imported}{caller},{growth},{pre},{post}"""
	if not row.startswith('import\|'):
	continue
	row = row[7:]
	vs = [c.strip() for c in row.split(',')]
	# output is """{idx},{imported},{caller},{growth},{pre},{post}"""
	vs.insert(0, str(idx))
	data_formatted.append(vs)

	# second pass, calculate the max
	maxxed = float(data_formatted[-1][4])
	for row in data_formatted:
	# input is """{idx},{imported},{caller},{growth},{pre},{post}"""
	# output is """{idx},{imported},{caller},{growth},{pre},{post},{pct_growth},{pct_overall}"""
	as_percent_growth = '0'
	_growth = float(row[3]) if row[3] else 0
	if _growth:
	as_percent_growth = str((_growth / maxxed)*100)
	row.append(as_percent_growth)
	as_percent_overall = str((float(row[5]) / maxxed)*100)
	row.append(as_percent_overall)

	# okay now let's try and figure out the level
	current_max_mem = 0
	current_recursion = 0
	seen = {}
	bypre = {}
	bypost = {}
	callers = {}
	bys = {}
	for row in data_formatted:
	row_id = int(row[0])
	row_name = row[1]
	row_caller = row[2]
	row_growth = int(row[3]) if row[3] else 0
	row_pre = int(row[4])
	row_post = int(row[5])

	if row_caller not in callers:
	callers[row_caller] = set({})
	callers[row_caller].add(row_name)

	if row_name not in bys:
	bys[row_name] = set({})
	bys[row_name].add(row_caller)


	if row_pre not in bypre:
	bypre[row_pre] = []
	bypre[row_pre].append((row_id, row_pre, row_post))

	if row_post not in bypost:
	bypost[row_post] = []
	bypost[row_post].append((row_id, row_pre, row_post))

	open('%s/callers.txt' % versions_dir, 'w').write(pprint.pformat(callers))
	open('%s/bys.txt' % versions_dir, 'w').write(pprint.pformat(bys))

	data_formatted.insert(0, ['idx', 'imported', 'caller', 'growth', 'pre', 'post', 'pct-growth', 'pct-overall'])
	data_formatted = [','.join(row) for row in data_formatted]
	open('%s/imports-processed.csv' % versions_dir, 'w').write('\n'.join(data_formatted))