Forked from ericsnowcurrently/profile-type_new.py
Last active
January 14, 2025 20:22
-
-
Save mdboom/9873e7cfb226706821894f674a933609 to your computer and use it in GitHub Desktop.
a script to instrument and profile type_new()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
import logging | |
import os | |
import os.path | |
import re | |
import shutil | |
import subprocess | |
import sys | |
import textwrap | |
import time | |
#SHORTCUT = False | |
SHORTCUT = True | |
VERBOSITY = 3 # logging.INFO | |
logger = logging.getLogger() | |
####################################### | |
# logging | |
def verbosity_to_loglevel(verbosity, *, maxlevel=logging.CRITICAL): | |
return max(1, # 0 disables it, so we use the next lowest. | |
min(maxlevel, | |
maxlevel - verbosity * 10)) | |
def loglevel_to_verbosity(level, *, maxlevel=logging.CRITICAL): | |
return max(0, maxlevel - level) // 10 | |
def configure_logger(logger, verbosity=VERBOSITY, *, | |
maxlevel=logging.CRITICAL, | |
): | |
level = verbosity_to_loglevel(verbosity) | |
logger.setLevel(level) | |
if not logger.handlers: | |
handler = logging.StreamHandler(sys.stdout) | |
handler.setLevel(level) | |
logger.addHandler(handler) | |
def get_verbosity(): | |
loglevel = logger.getEffectiveLevel() | |
assert loglevel > 0, loglevel | |
return loglevel_to_verbosity(loglevel) | |
####################################### | |
# OS utils | |
def resolve_cmd_capture(capture=None): | |
if capture is None: | |
return not (get_verbosity() < VERBOSITY) | |
elif isinstance(capture, str) and capture.startswith('?'): | |
if get_verbosity() < VERBOSITY: | |
return None | |
else: | |
return capture[1:] | |
else: | |
return capture | |
def run_cmd(cmd, *args, capture=None, cwd=None, env=None, quiet=False): | |
capture = resolve_cmd_capture(capture) | |
kwargs = dict( | |
cwd=cwd, | |
env=env, | |
) | |
if capture is True: | |
kwargs['stdout'] = subprocess.PIPE | |
kwargs['stderr'] = subprocess.PIPE | |
kwargs['text'] = True | |
elif capture == 'stdout': | |
kwargs['stdout'] = subprocess.PIPE | |
kwargs['text'] = True | |
elif capture == 'stderr': | |
kwargs['stderr'] = subprocess.PIPE | |
kwargs['text'] = True | |
elif capture == '+stderr': | |
kwargs['stdout'] = subprocess.PIPE | |
kwargs['stderr'] = subprocess.STDOUT | |
kwargs['text'] = True | |
elif capture: | |
raise ValueError(f'unsupported capture {capture!r}') | |
if os.path.basename(cmd) == cmd: | |
raise NotImplementedError(repr(cmd)) | |
cmd = shutil.which(cmd) | |
argv = [cmd, *args] | |
if not quiet: | |
if env: | |
logger.info(f'# running: {" ".join(argv)} ({cwd or os.getcwd()}) {env}') | |
else: | |
logger.info(f'# running: {" ".join(argv)} ({cwd or os.getcwd()})') | |
proc = subprocess.run(argv, **kwargs) | |
return proc.returncode, proc.stdout, proc.stderr | |
####################################### | |
# git | |
GIT = shutil.which('git') | |
CWD = os.getcwd() | |
def find_repo_root(*, fail=False): | |
rc, stdout, stderr = run_cmd( | |
GIT, 'rev-parse', '--show-toplevel', | |
capture=True, | |
quiet=True, | |
) | |
if rc != 0: | |
if os.path.exists(os.path.join(CWD, 'cpython', '.git')): | |
return os.path.join(CWD, 'cpython') | |
if fail: | |
sys.exit('ERROR: repo root not found') | |
return None | |
assert stdout.strip() | |
return os.path.abspath(stdout.strip()) | |
def resolve_repo_root(reporoot, *, fail=False): | |
if reporoot == '': | |
return os.get_cwd() | |
elif reporoot is None: | |
return find_repo_root(fail=fail) | |
else: | |
return os.path.abspath(reporoot) | |
####################################### | |
# C code | |
def render_header_file(name, body): | |
name = os.path.basename(name).replace('.', ' ').split()[0] | |
name = f'{name.upper()}_H' | |
return os.linesep.join([ | |
f'#ifndef {name}', | |
f'#define {name}', | |
'', | |
'', | |
textwrap.dedent(body), | |
'', | |
f'#endif /* {name} */', | |
'' | |
]) | |
def get_func_vartype(header, funcname): | |
header = header.replace('\r\n', ' ').replace('\n', ' ') | |
vartype, _ = header.split(f' {funcname}(') | |
vartype = vartype.strip() | |
if vartype.startswith('static '): | |
vartype = vartype[7:].lstrip() | |
return vartype | |
def find_function(text, name): | |
starting = 0 | |
while True: | |
starting = text.find(f'\n{name}(', starting) | |
if starting < 0: | |
return None, None, None, None | |
body = text.find('\n{\n', starting) | |
if body < 0: | |
return None, None, None, None | |
body += 1 | |
if ';' not in text[starting:body]: | |
# It must have been a forward declaration. | |
break | |
starting += 1 | |
start = text.rfind('\n', 0, starting-1) | |
if start < 0: | |
return None, None, None, None | |
start += 1 | |
assert start < starting, (starting, start, body) | |
end = text.find('\n}\n', body) | |
if end < 0: | |
return None, None, None, None | |
end += 2 | |
return ( | |
text[:start], | |
text[start:body], | |
text[body:end], | |
text[end:], | |
) | |
def find_text_section(text, start_marker, end_marker, *, start=0): | |
assert start_marker.endswith('\n'), repr(start_marker) | |
assert end_marker.endswith('\n'), repr(end_marker) | |
if start < 0: | |
end = len(text) + start + 1 | |
pos2 = text.rfind(end_marker, 0, end) | |
if pos2 < 0: | |
if text.rfind(start_marker, 0, end) >= 0: | |
raise ValueError('missing end marker') | |
return None, None, None | |
pos1 = text.rfind(start_marker, 0, pos2) | |
if pos1 < 0: | |
raise ValueError('missing start marker') | |
else: | |
pos1 = text.find(start_marker, start) | |
if pos1 < 0: | |
if text.find(end_marker, start) >= 0: | |
raise ValueError('missing start marker') | |
return None, None, None | |
pos2 = text.find(end_marker, pos1) | |
if pos2 < 0: | |
raise ValueError('missing end marker') | |
start = text.rfind('\n', 0, pos1) + 1 | |
section = text[start:pos2+len(end_marker)] | |
indent = text[start:pos1] | |
assert section | |
assert indent.strip() == '', repr(section) | |
assert text[text.rfind('\n', pos2):pos2].strip() == '', repr(section) | |
return start, section, indent | |
####################################### | |
# cpython | |
class CPythonConfig(namedtuple('CPythonConfig', 'debug freethreading pgo lto')): | |
CACHEFILE = '../python-config.cache' | |
prefix = None | |
cflags = None | |
ipv6 = None | |
expat = None | |
ffi = None | |
optimizelevel = 0 | |
@classmethod | |
def from_opts(cls, opts): | |
if not opts: | |
return cls( | |
debug=True, | |
freethreading=False, | |
pgo=False, | |
lto=False, | |
) | |
elif isinstance(opts, str): | |
kwargs = {} | |
for opt in opts.split(): | |
if opts == '--with-pydebug': | |
kwargs['debug'] = True | |
elif opts == '--enable-optimizations': | |
kwargs['pgo'] = True | |
elif opts == '--with-lto': | |
kwargs['lto'] = True | |
elif opts == '--disable-gil': | |
kwargs['freethreading'] = True | |
else: | |
raise ValueError(f'unsupported config opt {opt!r}') | |
else: | |
raise NotImplementedError(repr(opts)) | |
def __new__(cls, debug=False, freethreading=False, pgo=False, lto=False): | |
return super().__new__( | |
cls, | |
debug=debug, | |
freethreading=freethreading, | |
pgo=pgo, | |
lto=lto, | |
) | |
def as_configure_args(self, cachefile=''): | |
argv = [] | |
if cachefile == '': | |
cachefile = self.CACHEFILE | |
if cachefile: | |
argv.append(f'--cache-file={cachefile}') | |
if self.prefix: | |
argv.append(f'--prefix={self.prefix}') | |
cflags = self.cflags | |
if self.optimizelevel is not None: | |
assert not cflags or not isinstance(cflags, str), cflags | |
cflags = [*(cflags or ()), f'-O{self.optimizelevel}'] | |
if cflags: | |
# XXX Support some generic CFlags type? | |
if not isinstance(cflags, str): | |
cflags = ' '.join(cflags) | |
argv.append(f'CFLAGS={cflags}') | |
if self.debug: | |
argv.append('--with-pydebug') | |
if self.freethreading: | |
argv.append('--disable-gil') | |
if self.ipv6: | |
argv.append('--enable-ipv6') | |
if self.expat == 'system': | |
argv.append('--with-system-expat') | |
elif self.expat: | |
raise NotImplementedError(self.expat) | |
if self.ffi == 'system': | |
argv.append('--with-system-ffi') | |
elif self.ffi: | |
raise NotImplementedError(self.ffi) | |
if self.pgo: | |
argv.append('--enable-optimizations') | |
if self.lto: | |
argv.append('--with-lto') | |
return argv | |
class CPythonBuildSpec(namedtuple('CPythonBuildSpec', 'builddir cfg')): | |
MAKE = shutil.which('make') | |
OUT_OF_TREE_CLEAN_FILES = [ | |
'python', | |
'Programs/python.o', | |
'Python/frozen_modules/importlib._bootstrap.h', | |
] | |
@classmethod | |
def add_cli(cls, parser): | |
parser.add_argument('--build-dir') | |
parser.add_argument('--build-config') | |
parser.add_argument('--force-build', dest='forcebuild', | |
action='store_const', const=True) | |
def handle_args(args, reporoot=None): | |
ns = vars(args) | |
builddir = ns.pop('build_dir') | |
if not builddir: | |
builddir = resolve_repo_root(reporoot) | |
cfg = ns.pop('build_config') | |
if cfg is not None: | |
if not builddir: | |
parser.error('missing --build-dir') | |
cfg = CPythonConfig.from_opts(cfg) | |
if not builddir and not cfg: | |
if args.forcebuild: | |
parser.error('missing --build-dir') | |
return None | |
# We leave args.forcebuild in place. | |
return cls(builddir, cfg) | |
return handle_args | |
def verify(self, python, *, fail=True): | |
# XXX Check it somehow. | |
return True | |
if fail: | |
raise ValueError(f'build {self!r} doesn\'t match given python {python!r}') | |
return False | |
def prepare_for_build(self, reporoot=None, *extraheaders): | |
reporoot = resolve_repo_root(reporoot, fail=True) | |
if self.builddir == reporoot: | |
return | |
# else it is an out-of-tree build. | |
os.makedirs(self.builddir, exist_ok=True) | |
for relfile in extraheaders: | |
src = os.path.join(reporoot, relfile) | |
tgt = os.path.join(self.builddir, relfile) | |
if os.path.exists(src): | |
dirname = os.path.dirname(tgt) | |
if dirname: | |
os.makedirs(dirname, exist_ok=True) | |
shutil.copyfile(src, tgt) | |
rc = -1 | |
if os.path.exists(os.path.join(reporoot, 'Makefile')): | |
filename = os.path.join(reporoot, 'python') | |
if os.path.exists(filename): | |
os.unlink(filename) | |
rc, _, _ = run_cmd(self.MAKE, 'clean', cwd=reporoot, capture=True) | |
if rc != 0: | |
for relfile in self.OUT_OF_TREE_CLEAN_FILES: | |
filename = os.path.join(reporoot, relfile) | |
if os.path.exists(filename): | |
os.unlink(filename) | |
def get_configure_argv(self, reporoot=None, cachefile=None): | |
reporoot = resolve_repo_root(reporoot, fail=True) | |
cmd = os.path.join(reporoot, 'configure') | |
return [cmd, *self.cfg.as_configure_args()] | |
def get_build_argv(self): | |
return [self.MAKE, '-j8', "CFLAGS='-Werror'"] | |
def build_cpython(spec=None, reporoot=None, *extraheaders, hide=None): | |
logger.info('# building cpython') | |
if spec is None: | |
spec = CPythonBuildSpec() | |
reporoot = resolve_repo_root(reporoot, fail=True) | |
capture = '+stderr' if hide else '?+stderr' | |
spec.prepare_for_build(reporoot, *extraheaders) | |
if spec.cfg: | |
rc, stdout, _ = run_cmd( | |
*spec.get_configure_argv(reporoot), | |
cwd=spec.builddir, | |
capture=capture, | |
) | |
if not spec.cfg or rc == 0: | |
if not os.path.exists(os.path.join(reporoot, 'Makefile')): | |
raise Exception('build not configured') | |
rc, stdout, _ = run_cmd( | |
*spec.get_build_argv(), | |
cwd=spec.builddir, | |
capture='+stderr' if hide else '?+stderr', | |
) | |
if rc != 0: | |
if not stdout: | |
raise NotImplementedError(rc) | |
elif resolve_cmd_capture(capture): | |
print(stdout) | |
raise NotImplementedError(rc) | |
else: | |
raise NotImplementedError(rc, stdout) | |
def run_python_text(python, script, *, | |
nosite=True, | |
capture=None, | |
env=None, | |
): | |
if capture is not False: | |
capture = 'stdout' if capture else '?stdout' | |
argv = [python] | |
if nosite: | |
argv.append('-S') | |
argv.extend(['-c', script]) | |
start = time.time() | |
rc, stdout, _ = run_cmd(*argv, capture=capture, env=env) | |
pytotal = Duration(time.time() - start) | |
assert rc == 0, rc | |
return stdout, pytotal | |
####################################### | |
# other utils | |
def touch(filename): | |
with open(filename, 'a') as outfile: | |
outfile.write('') | |
class Timestamp(float): | |
def __repr__(self): | |
return f'{type(self).__name__}({float.__repr__(self)})' | |
def __str__(self): | |
return f'{self:.6f}s' | |
def __sub__(self, other): | |
return Duration(super().__sub__(other)) | |
def __rsub__(self, other): | |
return Duration(super().__rsub__(other)) | |
class Duration(float): | |
def __repr__(self): | |
return f'{type(self).__name__}({float.__repr__(self)})' | |
def __str__(self): | |
return self.render() | |
def __add__(self, other): | |
return type(self)(super().__add__(other)) | |
def __radd__(self, other): | |
return type(self)(super().__radd__(other)) | |
def __sub__(self, other): | |
return type(self)(super().__sub__(other)) | |
def __rsub__(self, other): | |
return type(self)(super().__rsub__(other)) | |
def __mul__(self, other): | |
return type(self)(super().__mul__(other)) | |
def __rmul__(self, other): | |
return type(self)(super().__rmul__(other)) | |
def __div__(self, other): | |
return type(self)(super().__div__(other)) | |
def __rdiv__(self, other): | |
return type(self)(super().__rdiv__(other)) | |
def __truediv__(self, other): | |
return type(self)(super().__truediv__(other)) | |
def __rtruediv__(self, other): | |
return type(self)(super().__rtruediv__(other)) | |
def render(self, units=None): | |
if units is None: | |
if self < 0.001: | |
units = 'ns' | |
elif self < 1: | |
units = 'ms' | |
else: | |
units = 's' | |
if units == 's': | |
return f'{self:,.6f}s' | |
elif units == 'ms': | |
return f'{self * 1_000:,.3f}ms' | |
elif units == 'ns': | |
return f'{int(self * 1000000):,}ns' | |
else: | |
raise ValueError(f'unsupported units {units!r}') | |
class SourceLocation(namedtuple('SourceLocation', 'filename lno func')): | |
def __new__(cls, filename, lno, func): | |
if not lno and not isinstance(lno, int): | |
lno = None | |
self = super().__new__( | |
cls, | |
filename or None, | |
int(lno) if lno is not None else None, | |
func or None, | |
) | |
return self | |
####################################### | |
# profile logging | |
class LogEntry(namedtuple('LogEntry', 'timestamp source context kind message')): | |
class KINDS: | |
ENTER = 'ENTER' | |
PREFIX = 'PREFIX' # MISC | |
START = 'START' | |
INFIX = 'INFIX' # MISC | |
STOP = 'STOP' | |
POSTFIX = 'POSTFIX' # MISC | |
RETURN = 'RETURN' | |
_KINDS = {v: v | |
for k, v in vars(KINDS).items() | |
if not k.startswith('_')} | |
C_ENTER = 'func-entered' | |
C_START = 'started' | |
C_MISC = 'misc' | |
C_STOP = 'stopped' | |
C_RETURN = 'func-returning' | |
# (timestamp, ctx, kind, msg) | |
C_SIMPLE_FORMAT = '{}:{}:{}:{}' | |
# (timestamp, file, line, func, ctx, kind, msg) | |
C_FORMAT = '{} ({}:{}:{}) {}:{}:{}' | |
REGEX = re.compile(r""" | |
^ | |
( \d+ (?: \. \d+ )? ) # <timestamp> | |
(?: | |
(?: | |
\s+ | |
\( | |
( [./\\\w] [^:]* ) # <file> | |
: | |
( \d+ ) # <lno> | |
: | |
( \w+ ) # <func> | |
\) | |
\s+ | |
) | |
| | |
: | |
) | |
( \w[-\w]* )? # <ctx> | |
: | |
( \w[-\w]* ) # <kind> | |
: | |
( .* ) # <msg> | |
$ | |
""", re.VERBOSE) | |
@classmethod | |
def parse_all(cls, lines): | |
if isinstance(lines, str): | |
lines = lines.splitlines() | |
active = [] | |
prev = None | |
for line in lines: | |
entry, implicit = cls._parse(line, active, prev) | |
if entry is None: | |
continue | |
for entry in cls._expand_implicit(entry, implicit, prev): | |
if entry.kind is cls.KINDS.ENTER: | |
active.append(entry) | |
elif entry.kind is cls.KINDS.START: | |
active.append(entry) | |
elif entry.kind is cls.KINDS.STOP: | |
assert active[-1].kind is cls.KINDS.START, (entry, active) | |
active.pop() | |
elif entry.kind is cls.KINDS.RETURN: | |
assert active[-1].kind is cls.KINDS.ENTER, (entry, active) | |
active.pop() | |
yield entry | |
prev = entry | |
assert not active, active | |
@classmethod | |
def _parse(cls, line, active=None, prev=None): | |
line = line.strip() | |
if not line or line.startswith('#'): | |
return None, None | |
m = cls.REGEX.match(line) | |
if not m: | |
raise ValueError(f'unsupported log entry {line!r}') | |
(timestamp, | |
filename, lno, func, | |
ctx, ckind, msg, | |
) = m.groups() | |
loc = SourceLocation(filename, lno, func) if filename else None | |
orig_ctx = ctx | |
ctx = cls._resolve_context(ctx, func, msg) | |
if ctx is None: | |
raise ValueError(f'missing context in {line!r}') | |
kind, implicit = cls._resolve_kind(ckind or msg, ctx, active, prev, line) | |
self = cls(timestamp, loc, ctx, kind, msg) | |
if ctx == orig_ctx: | |
self._raw = line | |
return self, implicit | |
@classmethod | |
def _resolve_context(cls, context, func, msg): | |
if context: | |
return context | |
elif func: | |
return func | |
elif msg and re.match(r'^[-\w]+$', msg): | |
return msg | |
else: | |
return None | |
@classmethod | |
def _resolve_kind(cls, ckind, ctx, active, prev, line): | |
if ckind == cls.C_ENTER: | |
kind = cls.KINDS.ENTER | |
elif ckind == cls.C_START: | |
kind = cls.KINDS.START | |
elif ckind == cls.C_MISC: | |
kind = None | |
elif ckind == cls.C_STOP: | |
kind = cls.KINDS.STOP | |
elif ckind == cls.C_RETURN: | |
kind = cls.KINDS.RETURN | |
else: | |
raise ValueError(f'unsupported ckind {ckind!r} in {line!r}') | |
if active is None: | |
return kind, None | |
assert not active or active[-1].kind is not cls.KINDS.RETURN, active | |
assert not active or active[-1].kind is not cls.KINDS.STOP, active | |
implicit = None | |
if kind is cls.KINDS.ENTER: | |
pass | |
elif kind is cls.KINDS.START: | |
if not active: | |
implicit = cls.KINDS.ENTER | |
elif active[-1].ctx != ctx: | |
implicit = cls.KINDS.ENTER | |
elif active[-1].kind is cls.KINDS.ENTER: | |
pass | |
elif active[-1].kind is cls.KINDS.START: | |
raise ValueError(f'already started for {line!r}') | |
else: | |
raise NotImplementedError | |
elif kind is None: | |
if not active: | |
implicit = cls.KINDS.ENTER | |
kind = cls.KINDS.PREFIX | |
elif active[-1].ctx != ctx: | |
implicit = cls.KINDS.ENTER | |
kind = cls.KINDS.PREFIX | |
elif active[-1].kind is cls.KINDS.ENTER: | |
if prev is not None and prev.kind is cls.KINDS.PREFIX: | |
kind = cls.KINDS.PREFIX | |
else: | |
kind = cls.KINDS.POSTFIX | |
elif active[-1].kind is cls.KINDS.START: | |
kind = cls.KINDS.INFIX | |
else: | |
raise NotImplementedError | |
elif kind is cls.KINDS.STOP: | |
if not active: | |
raise ValueError(f'missing start for {line!r}') | |
elif active[-1].ctx != ctx: | |
raise ValueError(f'missing start for {line!r}') | |
elif active[-1].kind is cls.KINDS.ENTER: | |
if prev is active[-1]: | |
implicit = cls.KINDS.START | |
else: | |
raise ValueError(f'missing start for {line!r}') | |
elif active[-1].kind is cls.KINDS.START: | |
pass | |
else: | |
raise NotImplementedError | |
elif kind is cls.KINDS.RETURN: | |
if not active: | |
raise ValueError(f'missing function enter for {line!r}') | |
elif active[-1].ctx != ctx: | |
raise ValueError(f'missing function enter for {line!r}') | |
elif active[-1].kind is cls.KINDS.ENTER: | |
if prev is active[-1]: | |
implicit = cls.KINDS.START | |
elif active[-1].kind is cls.KINDS.START: | |
implicit = cls.KINDS.STOP | |
else: | |
raise NotImplementedError | |
else: | |
raise NotImplementedError(kind) | |
return kind, implicit | |
@classmethod | |
def _expand_implicit(cls, entry, implicit, prev): | |
if not implicit: | |
yield entry | |
return | |
if entry.kind is cls.KINDS.ENTER: | |
raise NotImplementedError('implicit not expected', entry) | |
elif entry.kind is cls.KINDS.PREFIX: | |
assert implcit is cls.KINDS.ENTER, (implicit, entry) | |
yield entry._as_implicit(implicit) | |
elif entry.kind is cls.KINDS.START: | |
assert implicit is cls.KINDS.ENTER, (implicit, entry) | |
yield entry._as_implicit(implicit) | |
elif entry.kind is cls.KINDS.INFIX: | |
raise NotImplementedError('implicit not expected', entry) | |
elif entry.kind is cls.KINDS.STOP: | |
raise NotImplementedError('implicit not expected', entry) | |
elif entry.kind is cls.KINDS.POSTFIX: | |
raise NotImplementedError('implicit not expected', entry) | |
elif entry.kind is cls.KINDS.RETURN: | |
if implicit is cls.KINDS.START: | |
assert prev.ctx == entry.ctx, (prev, entry) | |
assert prev.kind is cls.KINDS.ENTER, (prev, entry) | |
yield prev._as_implicit(implicit) | |
yield entry._as_implicit(cls.KINDS.STOP) | |
else: | |
assert implicit is cls.KINDS.STOP, (implicit, entry) | |
yield entry._as_implicit(implicit) | |
else: | |
NotImplementedError(entry) | |
yield entry | |
def __new__(cls, timestamp, source, context, kind, message): | |
self = super().__new__( | |
cls, | |
Timestamp(timestamp) if timestamp is not None else None, | |
source or None, | |
context or None, | |
kind or None, | |
message, | |
) | |
return self | |
def __str__(self): | |
try: | |
raw = self._raw | |
except AttributeError: | |
raw = None | |
if raw is not None: | |
return raw | |
cls = type(self) | |
args = [ | |
str(self.timestamp) if self.timestamp is not None else '', | |
] | |
if self.source is not None: | |
fmt = cls.C_FORMAT | |
assert self.source.filename, self.source | |
assert self.source.lno is not None, self.source | |
assert self.source.func, self.source | |
args.extend(self.source) | |
else: | |
fmt = cls.C_SIMPLE_FORMAT | |
if self.kind is cls.KINDS.ENTER: | |
kind = cls.C_ENTER | |
elif self.kind is cls.KINDS.PREFIX: | |
kind = cls.C_MISC | |
elif self.kind is cls.KINDS.START: | |
kind = cls.C_START | |
elif self.kind is cls.KINDS.INFIX: | |
kind = cls.C_MISC | |
elif self.kind is cls.KINDS.STOP: | |
kind = cls.C_STOP | |
elif self.kind is cls.KINDS.POSTFIX: | |
kind = cls.C_MISC | |
elif self.kind is cls.KINDS.RETURN: | |
kind = cls.C_RETURN | |
else: | |
raise NotImplementedError(self) | |
args.extend([ | |
self.ctx or '', | |
kind or '', | |
self.msg or '', | |
]) | |
return fmt.format(*args) | |
@property | |
def location(self): | |
return self.source | |
@property | |
def loc(self): | |
return self.source | |
@property | |
def ctx(self): | |
return self.context | |
@property | |
def msg(self): | |
return self.message | |
def _as_implicit(self, implicit): | |
implicit = self._replace(kind=implicit, message=None) | |
implicit._raw = None | |
return implicit | |
class LogEntryInterval(namedtuple('LogEntryInterval', 'start stop')): | |
@property | |
def duration(self): | |
return self.stop.timestamp - self.start.timestamp | |
class ProfilerHelperFile: | |
FILE = None | |
@classmethod | |
def from_raw(cls, raw): | |
if isinstance(raw, cls): | |
return cls | |
elif isinstance(raw, str): | |
return cls(raw) | |
elif raw is None: | |
return cls() | |
else: | |
raise TypeError(f'unsupported raw value {raw!r}') | |
def __init__(self, filename=None): | |
if not filename: | |
filename = self.FILE | |
if not filename: | |
raise ValueError('missing filename') | |
self._filename = filename | |
self._relative = not os.path.isabs(filename) | |
def __repr__(self): | |
return f'{type(self).__name__}(filename={self._filename!r})' | |
def __str__(self): | |
return self._filename | |
@property | |
def filename(self): | |
return self._filename | |
@property | |
def include(self): | |
if not self._relative: | |
raise Exception('must be relative') | |
if not self._filename.startswith('Include'): | |
return self._filename | |
if 'pycore_' in self._filename: | |
return os.path.basename(self._filename) | |
_, _, name = self._filename.partition('Include') | |
return name[1:] | |
@property | |
def isrelative(self): | |
return self._relative | |
def as_relative(self, reporoot): | |
if self._relative: | |
return self | |
if not self._filename.startswith(reporoot + '/'): | |
raise NotImplementedError(reporoot) | |
relfile = os.path.relpath(self._filename, reporoot) | |
cls = type(self) | |
return cls(relfile) | |
def as_absolute(self, reporoot=None): | |
if not self._relative: | |
if reporoot: | |
if not self._filename.startswith(reporoot + '/'): | |
raise NotImplementedError(reporoot) | |
return self | |
reporoot = resolve_repo_root(reporoot) | |
filename = os.path.join(reporoot, self._filename) | |
filename = os.path.abspath(filename) | |
cls = type(self) | |
return cls(filename) | |
def render(self, *includes): | |
if not includes: | |
return self._render() | |
lines = [f'#include "{incl}"' for incl in includes] | |
lines.append('') | |
lines.append(self._render()) | |
return '\n'.join(lines) | |
def write(self, *includes, force=False): | |
if self._relative: | |
raise NotImplementedError | |
text = self.render(*includes) | |
if not text.endswith('\n'): | |
text += '\n' | |
actual = self._read() | |
if actual is not None: | |
before, actual = self._split(actual) | |
if not force and text == actual: | |
return False | |
else: | |
before = None | |
if before and not actual: | |
with open(self._filename, 'a') as outfile: | |
outfile.write(text) | |
else: | |
with open(self._filename, 'w') as outfile: | |
if before: | |
outfile.write(before) | |
outfile.write(text) | |
return True | |
def _render(self): | |
raise NotImplementedError | |
def _read(self): | |
try: | |
with open(self._filename) as infile: | |
text = infile.read() | |
except FileNotFoundError: | |
text = None | |
return text | |
def _split(self, text): | |
return None, text | |
# def _check_dirty(self, expected, actual=None): | |
# if actual is None: | |
# actual = self._read() | |
# if actual is None: | |
# # not found | |
# return True | |
# return actual != expected | |
class ProfilerHelperInstalled(ProfilerHelperFile): | |
# It gets installed at the end. | |
MARKER = f'added by {__file__}' | |
def __init__(self, filename=None, helper=None, marker=None): | |
if not marker: | |
marker = self.MARKER | |
super().__init__(filename) | |
self._helper = helper | |
self._marker = marker | |
self._start_marker = f'// {marker}' | |
self._end_marker = None | |
@property | |
def helper(self): | |
return self._helper | |
@property | |
def marker(self): | |
return self._marker | |
def render(self): | |
return super().render() | |
def write(self, force=False): | |
return super().write(force=force) | |
def _render(self): | |
if self._helper is None: | |
raise ValueError(f'helper not set') | |
if self._helper.isrelative: | |
include = self._helper.include | |
else: | |
dirname, basename = os.path.split(self._helper.filename) | |
include = os.path.join( | |
os.path.relpath(dirname, os.path.dirname(self._filename)), | |
basename, | |
) | |
text = '\n'.join([ | |
'', | |
self._start_marker, | |
f'#include "{include}"', | |
'', | |
]) | |
return text | |
def _split(self, text): | |
pos = text.rfind(f'\n{self._start_marker}') | |
if pos < 0: | |
return text, None | |
else: | |
return text[:pos], text[pos:] | |
class ProfilerHelpersHeader(ProfilerHelperFile): | |
FILE = os.path.join('Include', '_profiling_helpers.h') | |
LOGFILE = 'profiler.log' | |
PREAMBLE = rf""" | |
#define _PyProfiler_SIMPLE_FMT \ | |
"{LogEntry.C_SIMPLE_FORMAT.format('%f', '%s', '%s', '%s')}\n" | |
#define _PyProfiler_FMT \ | |
"{LogEntry.C_FORMAT.format('%f', '%s', '%d', '%s', '%s', '%s', '%s')}\n" | |
/* event kinds */ | |
#define _PyProfiler_EVENT_ENTER "{LogEntry.C_ENTER}" | |
#define _PyProfiler_EVENT_START "{LogEntry.C_START}" | |
#define _PyProfiler_EVENT_MISC "{LogEntry.C_MISC}" | |
#define _PyProfiler_EVENT_STOP "{LogEntry.C_STOP}" | |
#define _PyProfiler_EVENT_RETURN "{LogEntry.C_RETURN}" | |
#define _PyProfiler_DEFAULT_LOGFILE "{{defaultlogfile}}" | |
"""[1:-1] | |
TEXT = r""" | |
PyAPI_FUNC(FILE *) _PyProfiler_EnsureLogfile(void); | |
static PyTime_t offset = 0; | |
typedef struct { | |
struct { | |
const char *file; | |
int line; | |
const char *func; | |
} source; | |
PyTime_t time; | |
const char *ctx; | |
const char *kind; | |
const char *msg; | |
} _PyProfiler_event_t; | |
static inline void | |
_PyProfiler_write_event(FILE *outfile, _PyProfiler_event_t *evt) | |
{ | |
fprintf(outfile, _PyProfiler_SIMPLE_FMT, | |
PyTime_AsSecondsDouble(evt->time), evt->ctx, evt->kind, evt->msg); | |
} | |
static inline void | |
_PyProfiler_write_event_full(FILE *outfile, _PyProfiler_event_t *evt) | |
{ | |
fprintf(outfile, _PyProfiler_FMT, | |
PyTime_AsSecondsDouble(evt->time - offset), | |
evt->source.file, evt->source.line, evt->source.func, | |
evt->ctx, evt->kind, evt->msg); | |
} | |
#define _PyProfiler_WRITE_EVENT(OUTFILE, CTX, KIND, MSG) \ | |
do { \ | |
_PyProfiler_event_t event = { \ | |
.ctx = CTX, \ | |
.kind = KIND, \ | |
.msg = MSG, \ | |
}; \ | |
/* We could also use PyTime_TimeRaw(). */ \ | |
(void)PyTime_PerfCounterRaw(&event.time); \ | |
_PyProfiler_write_event(OUTFILE, &event); \ | |
} while (0) | |
/* copied from pycore_initconfig.h */ | |
#ifdef _MSC_VER | |
/* Visual Studio 2015 doesn't implement C99 __func__ in C */ | |
# define _PyProfiler_GET_FUNC() __FUNCTION__ | |
#else | |
# define _PyProfiler_GET_FUNC() __func__ | |
#endif | |
#define _PyProfiler_WRITE_FULL_EVENT(OUTFILE, CTX, KIND, MSG) \ | |
do { \ | |
_PyProfiler_event_t event = { \ | |
.source = { \ | |
.file = __FILE__, \ | |
.line = __LINE__, \ | |
.func = _PyProfiler_GET_FUNC(), \ | |
}, \ | |
.ctx = CTX, \ | |
.kind = KIND, \ | |
.msg = MSG, \ | |
}; \ | |
/* We could also use PyTime_TimeRaw(). */ \ | |
(void)PyTime_PerfCounterRaw(&event.time); \ | |
_PyProfiler_write_event_full(OUTFILE, &event); \ | |
PyTime_t offset_delta; \ | |
(void)PyTime_PerfCounterRaw(&offset_delta); \ | |
offset += offset_delta - event.time; \ | |
} while (0) | |
#ifdef PY_PROFILER_FORCE_STDOUT | |
# define _PyProfiler_ADD_ENTER() \ | |
_PyProfiler_WRITE_FULL_EVENT(stdout, "", _PyProfiler_EVENT_ENTER, "") | |
# define _PyProfiler_ADD_START(ctx) \ | |
_PyProfiler_WRITE_FULL_EVENT(stdout, ctx, _PyProfiler_EVENT_START, "") | |
# define _PyProfiler_ADD_OTHER(ctx, msg) \ | |
_PyProfiler_WRITE_FULL_EVENT(stdout, ctx, _PyProfiler_EVENT_MISC, msg) | |
# define _PyProfiler_ADD_STOP(ctx) \ | |
_PyProfiler_WRITE_FULL_EVENT(stdout, ctx, _PyProfiler_EVENT_STOP, "") | |
# define _PyProfiler_ADD_RETURN() \ | |
_PyProfiler_WRITE_FULL_EVENT(stdout, "", _PyProfiler_EVENT_RETURN, "") | |
#else | |
# define _PyProfiler_ADD_ENTER() \ | |
static FILE *logfile = NULL; \ | |
if (logfile == NULL) { \ | |
logfile = _PyProfiler_EnsureLogfile(); \ | |
} \ | |
_PyProfiler_WRITE_FULL_EVENT(logfile, "", _PyProfiler_EVENT_ENTER, ""); | |
# define _PyProfiler_ADD_START(ctx) \ | |
_PyProfiler_WRITE_FULL_EVENT(logfile, ctx, _PyProfiler_EVENT_START, ""); | |
# define _PyProfiler_ADD_OTHER(ctx, msg) \ | |
_PyProfiler_WRITE_FULL_EVENT(logfile, ctx, _PyProfiler_EVENT_MISC, msg); | |
# define _PyProfiler_ADD_STOP(ctx) \ | |
_PyProfiler_WRITE_FULL_EVENT(logfile, ctx, _PyProfiler_EVENT_STOP, ""); | |
# define _PyProfiler_ADD_RETURN() \ | |
_PyProfiler_WRITE_FULL_EVENT(logfile, "", _PyProfiler_EVENT_RETURN, ""); | |
#endif | |
"""[1:-1] | |
def render(self, *includes): | |
if includes: | |
raise NotImplementedError(includes) | |
return super().render() | |
def write(self, force=False): | |
return super().write(force=force) | |
def _render(self): | |
cls = type(self) | |
text = '\n'.join([ | |
cls.PREAMBLE.format(defaultlogfile=cls.LOGFILE), | |
cls.TEXT, | |
]) | |
text = render_header_file(self._filename, text) | |
return text | |
class ProfilerHelpersImpl(ProfilerHelperFile): | |
FILE = os.path.join('Python', '_profiling_helpers.c') | |
TEXT = textwrap.dedent(r""" | |
FILE * | |
_PyProfiler_EnsureLogfile(void) | |
{ | |
static FILE *logfile = NULL; | |
if (logfile != NULL) { | |
return logfile; | |
} | |
// We do not worry about encodings for the filename or file. | |
const char *filename = getenv("PROFILING_LOGFILE"); | |
#ifdef PROFILING_LOGFILE | |
if (filename == NULL) { | |
filename = ##PROFILING_LOGFILE; | |
} | |
#endif | |
if (filename == NULL || strlen(filename) == 0) { | |
filename = _PyProfiler_DEFAULT_LOGFILE; | |
if (filename == NULL) { | |
filename = "-"; | |
} | |
else if (strlen(filename) == 0) { | |
filename = "profile.log"; | |
} | |
assert(filename != NULL); | |
} | |
if (strcmp(filename, "-") == 0) { | |
logfile = stdout; | |
} | |
else { | |
logfile = fopen(filename, "w"); | |
} | |
if (logfile == NULL) { | |
PyObject *filenameobj = PyUnicode_FromString(filename); | |
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, filenameobj); | |
Py_DECREF(filenameobj); | |
} | |
return logfile; | |
} | |
"""[1:-1]) | |
def _render(self): | |
return self.TEXT | |
class ProfilerHelperFiles(namedtuple('ProfilerHelperFiles', | |
'header impl implincluded')): | |
IMPLINCLUDED = os.path.join('Python', 'pylifecycle.c') | |
@classmethod | |
def _resolve_implincluded(cls, implincluded, impl): | |
if implincluded is None: | |
implincluded = cls.IMPLINCLUDED | |
if not implincluded: | |
return None | |
resolved = ProfilerHelperInstalled.from_raw(implincluded) | |
if resolved.helper is None: | |
resolved._helper = impl | |
elif resolved.helper != impl: | |
raise ValueError(f'implincluded does not match impl') | |
return resolved | |
@classmethod | |
def _resolve_relative(cls, header, impl, implincluded): | |
if header.isrelative: | |
if not impl.isrelative: | |
raise ValueError(f'expected relative path for impl, got {impl!r}') | |
if implincluded and not implincluded.isrelative: | |
raise ValueError(f'expected relative path for implincluded, got {implincluded!r}') | |
return True | |
else: | |
if impl.isrelative: | |
raise ValueError(f'expected absolute path for impl, got {impl!r}') | |
if implincluded and implincluded.isrelative: | |
raise ValueError(f'expected absolute path for implincluded, got {implincluded!r}') | |
return False | |
def __new__(cls, header=None, impl=None, implincluded=None): | |
header = ProfilerHelpersHeader.from_raw(header) | |
impl = ProfilerHelpersImpl.from_raw(impl) | |
implincluded = cls._resolve_implincluded(implincluded, impl) | |
relative = cls._resolve_relative(header, impl, implincluded) | |
self = super().__new__(cls, header, impl, implincluded) | |
self._isrelative = relative | |
return self | |
@property | |
def isrelative(self): | |
return self._isrelative | |
def as_absolute(self, reporoot=None): | |
if not self._isrelative: | |
return self | |
cls = type(self) | |
copied = cls( | |
os.path.join(reporoot, self.header.filename), | |
os.path.join(reporoot, self.impl.filename), | |
(os.path.join(reporoot, self.implincluded.filename) | |
if self.implincluded | |
else None), | |
) | |
copied._isrelative = True | |
return copied | |
def as_relative(self, reporoot): | |
if self._isrelative: | |
return self | |
cls = type(self) | |
copied = cls( | |
os.path.relpath(self.header.filename, reporoot), | |
os.path.relpath(self.impl.filename, reporoot), | |
(os.path.relpath(self.implincluded.filename, reporoot) | |
if self.implincluded | |
else None), | |
) | |
copied._isrelative = False | |
return copied | |
def write(self, reporoot=None, *, force=False): | |
reporoot = resolve_repo_root(reporoot) | |
files = self.as_absolute(reporoot) | |
relfiles = self.as_relative(reporoot) | |
dirty1 = files.header.write(force=force) | |
include = relfiles.header.include | |
dirty2 = files.impl.write(include, force=force) | |
return dirty1 or dirty2 | |
def install(self, reporoot=None, *, force=False): | |
reporoot = resolve_repo_root(reporoot) | |
files = self.as_absolute(reporoot) | |
return files.implincluded.write(force=force) | |
# def write(self, reporoot=None, *, force=False): | |
# # First prep the header file. | |
# headers_text = '\n'.join([ | |
# cls.HEADER_PREAMBLE.format(defaultlogfile=cls.LOGFILE), | |
# cls.HEADER_TEXT, | |
# ]) | |
# headers_text = render_header_file(self.header, headers_text) | |
# if os.path.exists(self.header) and not force: | |
# with open(self.header) as infile: | |
# actual = infile.read() | |
# if headers_text == actual: | |
# headers_text = None | |
# | |
# # Then prep the impl file. | |
# include = os.path.relpath(headerfile, reporoot) | |
# impl_text = '\n'.join([ | |
# f'#include "{include}"', | |
# '', | |
# cls.IMPL_TEXT, | |
# '', | |
# ]) | |
# if os.path.exists(implfile): | |
# with open(implfile) as infile: | |
# actual = infile.read() | |
# if impl_text == actual: | |
# impl_text = None | |
# | |
# def install(self, reporoot=None, *, force=False): | |
# # Then prep the file that includes the impl, if applicable. | |
# if implincluded: | |
# marker = f'// added by {__file__}' | |
# include = os.path.relpath(implfile, implincluded) | |
# implincluded_text = '\n'.join([ | |
# '', | |
# marker, | |
# f'#include "{}"', | |
# '', | |
# ]) | |
# if os.path.exists(implincluded): | |
# with open(implincluded) as infile: | |
# actual = infile.read() | |
# if actual.endswith(implincluded_text): | |
# if force: | |
# pos = actual.rfind(f'\n{marker)') | |
# implincluded_actual = actual[:pos] | |
# else: | |
# implincluded_text = None | |
# | |
# # Finally, write out the helpers. | |
# dirty = False | |
# if headers_text: | |
# logger.info(f'# Adding helpers in {headerfile}') | |
# with open(headerfile, 'w') as outfile: | |
# outfile.write(headers_text) | |
# dirty = True | |
# if impl_text: | |
# logger.info(f'# Adding helpers in {implfile}') | |
# with open(implfile, 'w') as outfile: | |
# outfile.write(impl_text) | |
# dirty = True | |
# if implincluded and implincluded_text: | |
# logger.info(f'# Installing helpers in {implincluded}') | |
# if implincluded_actual: | |
# with open(implincluded, 'a') as outfile: | |
# outfile.write(implincluded_text) | |
# else: | |
# with open(implincluded, 'w') as outfile: | |
# outfile.write(implincluded_actual) | |
# outfile.write(implincluded_text) | |
# dirty = True | |
# | |
# return dirty | |
# if logfile is None or logfile == '-': | |
# parts.append(cls.STDOUT_LOGGING_CODE) | |
# else: | |
# if not logfile: | |
# # XXX CWD? | |
# logfile = cls.LOGFILE | |
# elif logfile.endswith('/') or os.path.isdir(logfile): | |
# logfile = os.path.join(logfile[:-1], cls.LOGFILE) | |
# os.makedirs(os.path.dirname(logfile), exist_ok=True) | |
# parts.append(cls.FILE_LOGGING_CODE.format(logfile=logfile)) | |
# text = '\n'.join(parts) | |
# text = render_header_file(filename, text) | |
# | |
# if os.path.exists(filename) and not force: | |
# # XXX Match logfile? | |
# return False, logfile | |
# | |
# logger.info(f'# Adding helpers in {filename}') | |
# with open(filename, 'w') as outfile: | |
# outfile.write(text) | |
# return True, logfile | |
class ProfilerHelpers: | |
@classmethod | |
def from_files(cls, header=None, impl=None, implincluded=None, *, | |
reporoot=None, | |
): | |
files = ProfilerHelperFiles(header, impl, implincluded) | |
self = cls(files) | |
if reporoot: | |
self._relfiles = files.as_relative(reporoot) | |
return self | |
@classmethod | |
def from_relfiles(cls, reporoot, header=None, impl=None, implincluded=None): | |
relfiles = ProfilerHelperFiles(header, impl, implincluded) | |
files = relfiles.as_absolute(reporoot) | |
self = cls(files) | |
self._relfiles = relfiles | |
return self | |
def __init__(self, files): | |
self._files = files | |
@property | |
def files(self): | |
return self._files | |
@property | |
def relfiles(self): | |
try: | |
return self._relfiles | |
except AttributeError: | |
self._relfiles = self._files.as_relfiles() | |
return self._relfiles | |
# def add(self, reporoot=None, *, force=False): | |
# if self._relative: | |
# raise NotImplementedError | |
# reporoot = resolve_repo_root(reporoot) | |
# filename = self._files.header.filename | |
# filename = os.path.join(reporoot, relfile) | |
# dirty = self.files.write(force=force) | |
# return dirty | |
# # Now install them. | |
# | |
# targetfile = os.path.join(reporoot, 'Include', 'Python.h') | |
# with open(targetfile) as infile: | |
# text = infile.read() | |
# marker = f'profiling helpers added by {__file__}' | |
# text = cls._install_helpers(text, marker, filename, | |
# addapi=False, bottom=True) | |
# if text is None: | |
# if dirty: | |
# touch(targetfile) | |
# return dirty | |
# else: | |
# logger.info(f'# Installing helpers in {targetfile}') | |
# with open(targetfile, 'w') as outfile: | |
# outfile.write(text) | |
# dirty = True | |
# | |
# return dirty | |
def wrap_func(self, filename, funcname, install=True): | |
with open(filename) as infile: | |
text = infile.read() | |
dirty = False | |
marker = f'profiling added by {__file__}' | |
start_marker = f'// START: {marker}' | |
end_marker = f'// END: {marker}' | |
before, header, body, after = find_function(text, funcname) | |
if body is None: | |
raise NotImplementedError(filename, funcname) | |
assert before is not None and header is not None and after is not None, (before, header, body, after) | |
lines = body.splitlines() | |
# Add the enter logging. | |
wrapped = [] | |
for line in lines: | |
if not wrapped: | |
assert line.endswith('{'), repr(line) | |
wrapped.append(line) | |
else: | |
if line.lstrip() != start_marker: | |
indent = line[:len(line) - len(line.lstrip())] | |
wrapped.append(f'{indent}{start_marker}') | |
wrapped.append(f'{indent}_PyProfiler_ADD_ENTER();') | |
wrapped.append(f'{indent}{end_marker}') | |
dirty = True | |
break | |
else: | |
raise NotImplementedError(body) | |
# Add the return logging. | |
lines = iter(lines) | |
prev3 = prev2 = '' | |
prev1 = next(lines) # the opening "{" | |
returnvoid = True | |
end = False | |
for line in lines: | |
assert not end, repr(line) | |
if line == '}': | |
if returnvoid: | |
if prev1.lstrip() != end_marker: | |
wrapped.append(f' {start_marker}') | |
wrapped.append(f' _PyProfiler_ADD_RETURN();') | |
wrapped.append(f' {end_marker}') | |
else: | |
assert prev3.lstrip() == start_marker, (funcname, prev3) | |
else: | |
assert prev1.lstrip() != end_marker, (funcname, prev1) | |
end = True | |
elif line.lstrip().startswith('return '): | |
returnvoid = False | |
if prev1.lstrip() != end_marker: | |
indent = line[:len(line) - len(line.lstrip())] | |
if '(' in line: | |
_, expr = line.split('return ') | |
while not expr.endswith(';'): | |
raise NotImplementedError | |
expr = expr[:-1] | |
vartype = get_func_vartype(header, funcname) | |
wrapped.append(indent + '{') | |
wrapped.append(f'{indent} {vartype} res = {expr};') | |
wrapped.append(f'{indent} {start_marker}') | |
wrapped.append(f'{indent} _PyProfiler_ADD_RETURN();') | |
wrapped.append(f'{indent} {end_marker}') | |
wrapped.append(f'{indent} return res;') | |
line = indent + '}' | |
else: | |
wrapped.append(f'{indent}{start_marker}') | |
wrapped.append(f'{indent}_PyProfiler_ADD_RETURN();') | |
wrapped.append(f'{indent}{end_marker}') | |
dirty = True | |
else: | |
assert prev3.lstrip() == start_marker, (funcname, prev3, line) | |
elif line.lstrip() == 'return;': | |
if prev1.lstrip() != end_marker: | |
indent = line[:len(line) - len(line.lstrip())] | |
wrapped.append(f'{indent}{start_marker}') | |
wrapped.append(f'{indent}_PyProfiler_ADD_RETURN();') | |
wrapped.append(f'{indent}{end_marker}') | |
dirty = True | |
else: | |
assert prev3.lstrip() == start_marker, (funcname, prev3, line) | |
wrapped.append(line) | |
prev3 = prev2 | |
prev2 = prev1 | |
prev1 = line | |
body = '\n'.join(wrapped) | |
if dirty: | |
if install: | |
installed = self._install(before, marker) | |
if installed is not None: | |
before = installed | |
logger.info(f'# Wrapping {funcname}() in {filename}') | |
text = f'{before}{header}{body}{after}' | |
with open(filename, 'w') as outfile: | |
outfile.write(text) | |
return dirty | |
def _install(self, text, marker, *, | |
addapi=True, | |
top=None, | |
bottom=False, | |
): | |
if top is None: | |
top = not bottom | |
else: | |
assert top or bottom, (top, bottom) | |
assert marker.rstrip() == marker, repr(marker) | |
start_marker = f'// START: {marker}\n' | |
end_marker = f'// END: {marker}\n' | |
helpers_h = self.relfiles.header.include | |
lines = [ | |
start_marker.rstrip(), | |
*(('#include "Python.h"',) if addapi else ()), | |
f'#include "{os.path.basename(helpers_h)}"', | |
end_marker.rstrip(), | |
'', | |
] | |
expected = '\n'.join(lines) | |
before = after = '' | |
body = text | |
if top: | |
before = expected + '\n' | |
(pos, section, indent, | |
) = find_text_section(text, start_marker, end_marker, start=0) | |
if section: | |
assert pos == 0, (pos, section) | |
assert indent == '', (indent, section) | |
if section == expected: | |
assert text.startswith(before), f'\n{before!r}\n---\n{section!r}\n---\n{text[:len(section)]!r}' | |
before = '' | |
else: | |
assert not text.startswith(before), f'\n{before!r}\n---\n{section!r}\n---\n{text[:len(section)]!r}' | |
body = text[len(section):] | |
else: | |
assert not text.startswith(before), f'\n{before!r}\n---\n{section!r}\n---\n{text[:len(before)]!r}' | |
if bottom: | |
after = '\n' + expected | |
(pos, section, indent, | |
) = find_text_section(text, start_marker, end_marker, start=-1) | |
if section: | |
assert pos + len(section) == len(text), (pos, section) | |
assert indent == '', (indent, section) | |
if section == expected: | |
assert text.endswith(after), f'\n{after!r}\n---\n{section!r}\n---\n{text[-len(section):]!r}' | |
after = '' | |
else: | |
assert not text.endswith(after), f'\n{after!r}\n---\n{section!r}\n---\n{text[-len(section):]!r}' | |
body = text[:pos] | |
else: | |
assert not text.endswith(after), f'\n{after!r}\n---\n{section!r}\n---\n{text[-len(after):]!r}' | |
if not before: | |
if not after: | |
return None | |
return body + after | |
elif not after: | |
return before + body | |
else: | |
return before + body + after | |
####################################### | |
# analysis | |
class ProfileNode(namedtuple('ProfileNode', 'context entries')): | |
FORMATS = ['summary', 'tree', 'raw'] | |
@classmethod | |
def from_entries(cls, entries): | |
entries = iter(entries) | |
node = cls._resolve_tree(entries) | |
while node is not None: | |
yield node | |
node = cls._resolve_tree(entries) | |
@classmethod | |
def _resolve_tree(cls, entries, entry=None): | |
if entry is None: | |
try: | |
entry = next(entries) | |
except StopIteration: | |
return None | |
assert entry.kind is LogEntry.KINDS.ENTER, repr(entry) | |
if entry.source is not None: | |
filename = entry.source.filename | |
func = entry.source.func | |
else: | |
filename = func = None | |
ctx = entry.ctx | |
if not ctx: | |
raise ValueError(f'missing context in {entry!r}') | |
node = [entry] | |
for entry in entries: | |
if entry.ctx != ctx: | |
subtree = cls._resolve_tree(entries, entry) | |
node.append(subtree) | |
else: | |
assert filename is None or (entry.source.filename == filename), entry | |
if entry.kind is LogEntry.KINDS.RETURN: | |
node.append(entry) | |
break | |
else: | |
assert entry.kind is not LogEntry.KINDS.ENTER, entry | |
node.append(entry) | |
self = cls(ctx, tuple(node)) | |
return self | |
@property | |
def duration(self): | |
return self.end - self.start | |
@property | |
def own_duration(self): | |
children = self.other_duration | |
if children is None: | |
return self.duration | |
return self.duration - children | |
@property | |
def other_duration(self): | |
children = None | |
for entry in self.entries: | |
if isinstance(entry, ProfileNode): | |
if children is None: | |
children = 0.0 | |
children += entry.duration | |
return children if children is not None else None | |
@property | |
def start(self): | |
return self.entries[0].timestamp | |
@property | |
def end(self): | |
return self.entries[-1].timestamp | |
@property | |
def intervals(self): | |
raise NotImplementedError | |
@property | |
def children(self): | |
return self.entries[1:-1] | |
def flatten(self): | |
for entry in self.entries: | |
if isinstance(entry, ProfileNode): | |
yield from entry.flatten() | |
else: | |
yield entry | |
def flatten_nodes(self): | |
yield self | |
for node in self.entries: | |
if isinstance(node, ProfileNode): | |
yield from node.flatten_nodes() | |
def summarize(self): | |
return ProfileNodeSummary.with_children( | |
self.entries[0].source.filename, | |
self.entries[0].source.func, | |
self.context, | |
self.duration, | |
self.own_duration, | |
self.other_duration, | |
) | |
def collect_summary(self, collected): | |
for node in self.flatten_nodes(): | |
# XXX Collate contexts? | |
s = node.summarize() | |
key = (s.filename, s.func) | |
try: | |
count, total, children = collected[key] | |
except KeyError: | |
count = 0 | |
total = 0.0 | |
children = None | |
count += 1 | |
total += s.duration | |
if s.children is not None: | |
if children is None: | |
children = Duration(0.0) | |
children += s.children | |
collected[key] = (count, total, children) | |
def render(self, fmt='summary'): | |
if fmt == 'summary': | |
yield self._render_summary() | |
elif fmt == 'raw': | |
yield from self._render_raw() | |
else: | |
raise ValueError(f'unsupported fmt {fmt!r}') | |
def render_all(self, fmt='tree'): | |
if fmt == 'tree' or not fmt: | |
yield from self._render_tree() | |
elif fmt == 'summary': | |
yield self._render_summary() | |
for node in self.entries: | |
if isinstance(node, ProfileNode): | |
yield from node.render_all(fmt) | |
elif fmt == 'raw': | |
for node in self.flatten(): | |
yield str(entry) | |
else: | |
raise ValueError(f'unsupported fmt {fmt!r}') | |
def _render_tree(self, depth=0, singleindent=' '): | |
indent = singleindent * depth | |
yield indent + self._render_summary() | |
for node in self.entries: | |
if not isinstance(node, ProfileNode): | |
continue | |
yield from node._render_tree(depth+1, singleindent) | |
def _render_summary(self): | |
return str(self.summarize()) | |
def _render_raw(self): | |
for entry in self.entries: | |
if isinstance(entry, ProfileNode): | |
continue | |
yield str(entry) | |
class ProfileNodeSummary(namedtuple('ProfileNodeSummary', 'filename func context duration own')): | |
@classmethod | |
def with_children(cls, filename, func, context, duration, own, children): | |
self = cls(filename, func, context, duration, own) | |
self._children = children | |
return self | |
def __str__(self): | |
context = self.context | |
if not context or context == self.func: | |
context = '' | |
duration = self.duration.render('ns') | |
if self.own == self.duration: | |
return f'{self.filename}:{self.func}() -{context}- {duration:>5}' | |
else: | |
own = self.own.render('ns') | |
return f'{self.filename}:{self.func}() -{context}- {duration:>5} {own:>5}' | |
@property | |
def children(self): | |
try: | |
return self._children | |
except AttributeError: | |
if self.own == self.duration: | |
return None | |
return self.duration - self.own | |
####################################### | |
# targets | |
TARGETS = 'profiling-targets.txt' | |
def resolve_targets(targets, reporoot=None): | |
if isinstance(targets, str): | |
targets = [targets] | |
reporoot = resolve_repo_root(reporoot) | |
if not reporoot: | |
raise Exception | |
resolved = [] | |
for target in _iter_raw_targets(targets): | |
relfile, sep, func = target.rpartition(':') | |
if not sep or not relfile or not func: | |
raise ValueError(f'invalid target {target!r}') | |
filename = os.path.join(reporoot, relfile) | |
loc = SourceLocation(filename, None, func) | |
if loc not in resolved: | |
resolved.append(loc) | |
return resolved | |
def _iter_raw_targets(targets): | |
for _target in targets: | |
for target in _target.replace(',', ' ').split(): | |
if target.startswith('file:'): | |
filename = target[5:] | |
for target in _read_targets_file(filename): | |
if target.startswith('file:'): | |
raise NotImplementedError(filename, target) | |
yield target | |
else: | |
yield target | |
def _read_targets_file(filename): | |
with open(filename) as infile: | |
text = infile.read() | |
for line in text.splitlines(): | |
line = line.strip() | |
if not line or line.startswith('#'): | |
continue | |
yield line | |
def prepare_targets(helpers, targets, install=True): | |
if not targets: | |
raise ValueError('missing targets') | |
# filename = os.path.join(reporoot, 'Objects', 'typeobject.c') | |
# return ProfilingUtils., installwrap_func(filename, 'type_new') | |
dirty = False | |
for filename, _, func in targets: | |
if helpers.wrap_func(filename, func, install): | |
dirty = True | |
return dirty | |
def prepare_main(helpers, reporoot, install=True): | |
targetfile = os.path.join(reporoot, 'Programs', 'python.c') | |
targets = [ | |
(targetfile, 'wmain'), | |
(targetfile, 'main'), | |
] | |
dirty = False | |
for filename, func in targets: | |
if helpers.wrap_func(filename, func, install): | |
dirty = True | |
return dirty | |
####################################### | |
# command helpers | |
def resolve_python(python, reporoot, build, *, fail=True, force=False): | |
built = os.path.join(build.builddir, 'python') if build else None | |
dev = os.path.join(reporoot, 'python') if reporoot else None | |
resolved = python or built or dev or None | |
if not resolved: | |
return None, None, None | |
if resolved == built: | |
builddir = build.builddir | |
elif resolved == dev: | |
builddir = reporoot | |
else: | |
builddir = None | |
if os.path.exists(resolved): | |
if resolved == python: | |
if python == built: | |
if not force and not build.verify(python, fail=fail): | |
builddir = None | |
elif python == dev: | |
if not force: | |
# XXX Check it somehow? | |
pass | |
exists = True | |
elif not fail: | |
exists = False | |
else: | |
if resolved == python: | |
if builddir: | |
raise ValueError(f'given python executable {python!r} not built yet') | |
else: | |
raise ValueError(f'given python executable {python!r} not found') | |
else: | |
raise ValueError(f'python executable not built yet (in {builddir})') | |
return resolved, exists, builddir | |
def prepare_for_build(reporoot, targets=None, *, | |
helpers=None, | |
install=None, | |
builddir=None, | |
): | |
if not targets: | |
relfile = TARGETS | |
for targetsfile in [ | |
*((os.path.join(builddir, relfile),) if builddir else ()), | |
os.path.join(reporoot, relfile), | |
os.path.join(reporoot, '..', relfile), | |
]: | |
if os.path.exists(targetsfile): | |
targets = [f'file:{targetsfile}'] | |
break | |
else: | |
targetsfile = None | |
if install is False: | |
targets = () | |
else: | |
raise ValueError('missing targets') | |
if targetsfile: | |
targets = [f'file:{targetsfile}'] | |
targets = resolve_targets(targets, reporoot) | |
if install is not False: | |
if helpers is None: | |
helpers = ProfilerHelpers.from_files(reporoot=reporoot) | |
force = bool(install) | |
dirty1 = helpers.files.write(force=force) | |
# dirty1 = helpers.add(reporoot, force=forceinstall) | |
if dirty1: | |
helpers.files.install(force=force) | |
dirty2 = prepare_main(helpers, reporoot, install=True) | |
dirty3 = prepare_targets(helpers, targets, install=True) | |
dirty = dirty1 or dirty2 or dirty3 | |
else: | |
dirty = None | |
return dirty | |
def prepare(reporoot, targets=None, *, | |
helpers=None, | |
install=None, | |
build=None, | |
forcebuild=False, | |
): | |
built = None | |
# if helpers is None: | |
# helpers = ProfilerHelpers.from_files() | |
dirty = prepare_for_build( | |
reporoot=reporoot, | |
targets=targets, | |
helpers=helpers, | |
install=install, | |
builddir=build.builddir if build else None, | |
) | |
# helpers_h = helpers.relfiles.header.include | |
if dirty: | |
built = build_cpython(build, reporoot, hide=False) | |
forcebuild = False | |
if forcebuild: | |
built = build_cpython(build, reporoot) | |
return built, dirty | |
def run_for_profile(python=None, *, | |
logfile=None, | |
builddir=None, | |
withsite=True, | |
capture=True, | |
): | |
if not python: | |
reporoot = find_repo_root(rfail=True) | |
python = os.path.join(reporoot, 'python') | |
if not os.path.exists(python): | |
build = CPythonBuildSpec(builddir or reporoot, cfg=None) | |
python, _ = prepare(reporoot, build=build) | |
assert python | |
elif not os.path.exists(python): | |
raise ValueError(f'given python executable {python!r} not found') | |
env = {} | |
if logfile: | |
if os.path.exists(logfile): | |
# XXX rotate it? | |
os.unlink(logfile) | |
env['PROFILING_LOGFILE'] = logfile | |
stdout, pytotal = run_python_text( | |
python, 'pass', | |
nosite=not withsite, | |
capture=capture, | |
env=env or None, | |
) | |
if logfile: | |
if stdout: | |
with open(logfile, 'w') as outfile: | |
outfile.write(stdout) | |
elif os.path.exists(logfile): | |
with open(logfile) as infile: | |
stdout = infile.read() | |
elif not capture: | |
logfile = None | |
stdout = None | |
else: | |
stdout = '' | |
elif not stdout: | |
stdout = '' if capture else None | |
return stdout, pytotal | |
def show_profile_data(data, pytotal=None, fmt='tree'): | |
if isinstance(data, str): | |
entries = LogEntry.parse_all(data) | |
else: | |
entries = data | |
if fmt != 'tree': | |
for root in ProfileNode.from_entries(entries): | |
yield from root.render_all(fmt) | |
return | |
summaries = {} | |
roots = iter(ProfileNode.from_entries(entries)) | |
try: | |
root = next(roots) | |
except StopIteration: | |
pass | |
else: | |
loc = root.entries[0].loc | |
if loc.func in ('main', 'wmain'): | |
#assert loc.filename == 'Programs/python.c' | |
if list(roots): | |
raise NotImplementedError | |
pytotal = root.duration | |
roots = iter(root.children) | |
else: | |
yield from root.render_all(fmt) | |
root.collect_summary(summaries) | |
for root in roots: | |
yield from root.render_all(fmt) | |
root.collect_summary(summaries) | |
byfile = {} | |
maxfile = maxfunc = maxcount = maxavg = maxownavg = 0 | |
for (filename, func), (count, total, children) in summaries.items(): | |
maxfile = max(maxfile, len(filename)) | |
maxfunc = max(maxfunc, len(func)) | |
maxcount = max(maxcount, count) | |
assert isinstance(total, Duration), repr(total) | |
avg = total / count | |
maxavg = max(maxavg, avg) | |
if children is not None: | |
assert isinstance(children, Duration), repr(children) | |
ownavg = (total - children) / count | |
maxownavg = max(maxownavg, ownavg) | |
else: | |
ownavg = None | |
try: | |
funcs = byfile[filename] | |
except KeyError: | |
funcs = byfile[filename] = {} | |
assert func not in funcs, func | |
funcs[func] = (count, total, avg, ownavg) | |
widths = ( | |
max(maxfile, maxfunc+4) - 4, | |
len(str(maxcount)), | |
len(maxavg.render('ns')), | |
5 if maxavg < 10 else 6, | |
) | |
fmt = ' {:%d} {:>%d} times, avg {:>%d} = {:>%d}' % widths | |
if maxownavg: | |
ownwidth = len(maxownavg.render('ns')) | |
ownpctwidth = 5 if maxownavg < 10 else 6 | |
ownfmt = fmt + ' (own {:>%d} = {:>%d})' % (ownwidth, ownpctwidth) | |
yield '' | |
for filename, funcs in byfile.items(): | |
# for func, (count, avg) in funcs.items(): | |
# yield f'{filename + ":" + func + "()":50} {count:>5} times, avg {avg}' | |
yield filename | |
rows = sorted(funcs.items(), key=(lambda v: v[1]), reverse=True) | |
for func, (count, total, avg, ownavg) in rows: | |
pct = float(total / pytotal).__format__('.2%') | |
avg = avg.render('ns') | |
if ownavg is None: | |
# yield f' {func + "()":30} {count:>5} times, avg {avg:>4} {pct}' | |
yield fmt.format(func, count, avg, pct) | |
else: | |
ownpct = float(ownavg / pytotal).__format__('.2%') | |
ownavg = ownavg.render('ns') | |
# yield f' {func + "()":30} {count:>5} times, avg {avg:>4} {pct} ({ownavg} {ownpct})' | |
yield ownfmt.format(func, count, avg, pct, ownavg, ownpct) | |
yield '' | |
yield f'total: {pytotal.render("ns")}' | |
####################################### | |
# commands | |
class CommandFailure(RuntimeError): | |
... | |
def cmd_prepare(reporoot=None, targets=None, *, | |
install=None, | |
build=None, | |
forcebuild=False, | |
): | |
reporoot = resolve_repo_root(reporoot, fail=True) | |
if logfile == '-': | |
logfile = None | |
built, installed = prepare( | |
reporoot=reporoot, | |
targets=targets, | |
install=install, | |
build=build, | |
forcebuild=forcebuild, | |
) | |
if built: | |
yield f'prepared python executable (with profiling): {built}' | |
elif installed: | |
yield f'prepared to build python executable (with profiling) in {reporoot}' | |
else: | |
yield 'did nothing' | |
def cmd_run(python=None, *, | |
logfile=None, | |
builddir=None, | |
withsite=True, | |
): | |
if logfile != '-': | |
if not logfile: | |
logfile = ProfilerHelpersHeader.LOGFILE | |
yield f'logging to {logfile}' | |
stdout, pytotal = run_for_profile( | |
python=python, | |
logfile=logfile, | |
builddir=builddir, | |
withsite=withsite, | |
capture=not SHORTCUT, | |
) | |
if logfile: | |
yield f'raw profile data written to {logfile}' | |
elif stdout: | |
for line in stdout.splitlines(): | |
yield line | |
elif stdout is None: | |
yield 'results written to stdout or unknown log file' | |
else: | |
yield 'results written to unknown log file' | |
yield 'ran in {pytotal} seconds' | |
def cmd_show(): | |
... | |
def cmd_default(python=None, logfile=None, targets=None, *, | |
reporoot=None, | |
install=None, | |
build=None, | |
forcebuild=None, | |
withsite=True, | |
analyze=False, | |
fmt='tree', | |
): | |
if logfile != '-': | |
if not logfile: | |
logfile = ProfilerHelpersHeader.LOGFILE | |
yield f'logging to {logfile}' | |
reporoot = resolve_repo_root(reporoot, fail=True) | |
(python, exists, builddir, | |
) = resolve_python(python, reporoot, build, fail=False) | |
if not python: | |
raise CommandFailure('missing python arg') | |
elif builddir: | |
if not exists: | |
forcebuild = True | |
built, _ = prepare( | |
reporoot=reporoot, | |
targets=targets, | |
install=install, | |
build=build, | |
forcebuild=forcebuild, | |
) | |
assert not built or built == python, (built, python) | |
else: | |
if not exists: | |
raise CommandFailure(f'given python executable {python!r} not found') | |
if install: | |
raise CommandFailure('got unexpected install arg') | |
if forcebuild: | |
raise CommandFailure('got unexpected forcebuild arg') | |
if build: | |
raise CommandFailure(f'got unexpected build arg {build!r}') | |
stdout, pytotal = run_for_profile( | |
python=python, | |
logfile=logfile, | |
withsite=withsite, | |
capture=not SHORTCUT or fmt != 'raw', | |
) | |
if stdout: | |
yield from show_profile_data(stdout, pytotal, fmt) | |
COMMANDS = { | |
None: cmd_default, | |
'prepare': cmd_prepare, | |
'run': cmd_run, | |
'show': cmd_show, | |
} | |
####################################### | |
# the script | |
def parse_args(argv=sys.argv[1:], prog=sys.argv[0]): | |
import argparse | |
parser = argparse.ArgumentParser(prog=prog) | |
parser.add_argument('-v', '--verbose', action='count', default=0) | |
parser.add_argument('-q', '--quiet', action='count', default=0) | |
parser.add_argument('--reporoot') | |
parser.add_argument('--force-install', dest='install', | |
action='store_const', const=True) | |
parser.add_argument('--no-install', dest='install', | |
action='store_const', const=False) | |
parser.add_argument('--target', dest='targets', action='append') | |
handle_build = CPythonBuildSpec.add_cli(parser) | |
parser.add_argument('--python') | |
parser.add_argument('--with-site', dest='withsite', | |
action='store_const', const=True) | |
parser.add_argument('--no-site', '-S', dest='withsite', | |
action='store_const', const=False) | |
parser.set_defaults(withsite=True) | |
parser.add_argument('--format', dest='fmt', | |
choices=ProfileNode.FORMATS, default='tree') | |
parser.add_argument('logfile', nargs='?') | |
args = parser.parse_args(argv) | |
ns = vars(args) | |
cmd = None | |
verbose = ns.pop('verbose') | |
quiet = ns.pop('quiet') | |
verbosity = VERBOSITY + verbose - quiet | |
args.reporoot = resolve_repo_root(args.reporoot) | |
args.build = handle_build(args, args.reporoot) | |
return cmd, ns, verbosity | |
def main(cmd, cmd_kwargs): | |
try: | |
run_cmd = COMMANDS[cmd] | |
except KeyError: | |
sys.exit(f'ERROR: unsupported command {cmd!r}') | |
try: | |
for line in run_cmd(**cmd_kwargs): | |
print(line) | |
except CommandFailure as exc: | |
sys.exit(f'ERROR: {exc}') | |
if __name__ == '__main__': | |
cmd, cmd_kwargs, verbosity = parse_args() | |
configure_logger(logger, verbosity) | |
main(cmd, cmd_kwargs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment