Created
October 24, 2016 16:23
-
-
Save jvanasco/d378b8f65ead664b822d71c278bca09c to your computer and use it in GitHub Desktop.
Use the `import_logger.py` to "decorate" cPython's `import` statement, and log the actual memory grown to a file. `transformer.py` will do some quick/lightweight transformations on the logger's output, creating a spreadsheet's readable csv. this is far from perfect, but can be very helpful.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if True: | |
print("===> installing import_logger_orverride") | |
import os | |
import psutil | |
import pdb | |
import pprint | |
import __builtin__ | |
import logging | |
import sys | |
# setup the memory vars | |
_this_process = psutil.Process(os.getpid()) | |
_f_get_memory_info = _this_process.get_memory_info # call is different on other versions of psutil | |
GET_MEMORY = lambda: _f_get_memory_info()[0] | |
# set up the dirs | |
# we'll lot go `{CWD}/imports_parser/runs/{VERSION}` in which `VERSION` is 001, 002, etc | |
REPORTS_DIR_BASE = os.path.join("imports_parser", "runs") | |
if not os.path.exists(REPORTS_DIR_BASE): | |
os.makedirs(REPORTS_DIR_BASE) | |
dirs = [i for i in os.listdir(REPORTS_DIR_BASE) | |
if os.path.isdir(os.path.join(REPORTS_DIR_BASE, i)) | |
] | |
max_dirs = len(dirs) | |
REPORTS_DIR_RUN = os.path.join(REPORTS_DIR_BASE, "%03d" % max_dirs) | |
print("===- Logging to %s" % REPORTS_DIR_RUN) | |
os.makedirs(REPORTS_DIR_RUN) | |
writer_success = open(os.path.join(REPORTS_DIR_RUN, 'imports.txt'), 'a') | |
writer_error = open(os.path.join(REPORTS_DIR_RUN, 'errors.txt'), 'a') | |
# we need this still | |
realimport = __builtin__.__import__ | |
# our override | |
def import_logger_orverride(name, *args, **kwargs): | |
_mem_start = GET_MEMORY() | |
_package_name = name | |
if len(args) == 4: | |
_package_name = "%s.%s" % (name, | |
str(args[2]).replace(',', '|')) | |
# use sys._getframe, because the `inspect` module leaves a circular reference that won't clean up (even with an explicit delete) | |
_frame = sys._getframe(1) | |
try: | |
_caller_file = _frame.f_locals['__file__'] | |
except: | |
_caller_file = "<>" | |
try: | |
_imported = realimport(name, *args, **kwargs) | |
_mem_finish = GET_MEMORY() | |
_mem_growth = _mem_finish - _mem_start | |
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish) | |
writer_success.write(_line) | |
return _imported | |
except Exception as e: | |
if isinstance(e, ImportError) and e.message.startswith("No module named"): | |
_mem_finish = GET_MEMORY() | |
_mem_growth = _mem_finish - _mem_start | |
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish) | |
writer_error.write(_line) | |
raise | |
finally: | |
del _caller_file | |
del _frame | |
# install the override | |
__builtin__.__import__ = import_logger_orverride | |
print("<=== import_logger_orverride installed") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pprint | |
# manually change this, because I'm lazy. | |
versions_dir = "runs/001/" | |
# written by `import_logger.py` | |
fname_imports = "imports.txt" | |
fname_errors = "errors.txt" | |
raw_data = open(os.path.join(versions_dir, fname_imports)).readlines() | |
# first pass, to python | |
data_formatted = [] | |
for (idx, row) in enumerate(raw_data): | |
# input is """import|{imported}{caller},{growth},{pre},{post}""" | |
if not row.startswith('import|'): | |
continue | |
row = row[7:] | |
vs = [c.strip() for c in row.split(',')] | |
# output is """{idx},{imported},{caller},{growth},{pre},{post}""" | |
vs.insert(0, str(idx)) | |
data_formatted.append(vs) | |
# second pass, calculate the max | |
maxxed = float(data_formatted[-1][4]) | |
for row in data_formatted: | |
# input is """{idx},{imported},{caller},{growth},{pre},{post}""" | |
# output is """{idx},{imported},{caller},{growth},{pre},{post},{pct_growth},{pct_overall}""" | |
as_percent_growth = '0' | |
_growth = float(row[3]) if row[3] else 0 | |
if _growth: | |
as_percent_growth = str((_growth / maxxed)*100) | |
row.append(as_percent_growth) | |
as_percent_overall = str((float(row[5]) / maxxed)*100) | |
row.append(as_percent_overall) | |
# okay now let's try and figure out the level | |
current_max_mem = 0 | |
current_recursion = 0 | |
seen = {} | |
bypre = {} | |
bypost = {} | |
callers = {} | |
bys = {} | |
for row in data_formatted: | |
row_id = int(row[0]) | |
row_name = row[1] | |
row_caller = row[2] | |
row_growth = int(row[3]) if row[3] else 0 | |
row_pre = int(row[4]) | |
row_post = int(row[5]) | |
if row_caller not in callers: | |
callers[row_caller] = set({}) | |
callers[row_caller].add(row_name) | |
if row_name not in bys: | |
bys[row_name] = set({}) | |
bys[row_name].add(row_caller) | |
if row_pre not in bypre: | |
bypre[row_pre] = [] | |
bypre[row_pre].append((row_id, row_pre, row_post)) | |
if row_post not in bypost: | |
bypost[row_post] = [] | |
bypost[row_post].append((row_id, row_pre, row_post)) | |
open('%s/callers.txt' % versions_dir, 'w').write(pprint.pformat(callers)) | |
open('%s/bys.txt' % versions_dir, 'w').write(pprint.pformat(bys)) | |
data_formatted.insert(0, ['idx', 'imported', 'caller', 'growth', 'pre', 'post', 'pct-growth', 'pct-overall']) | |
data_formatted = [','.join(row) for row in data_formatted] | |
open('%s/imports-processed.csv' % versions_dir, 'w').write('\n'.join(data_formatted)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment