Skip to content

Instantly share code, notes, and snippets.

@perrygeo
Last active February 27, 2017 15:14
Show Gist options
  • Save perrygeo/db45cce469ca93d3d96dd7f4d253ba11 to your computer and use it in GitHub Desktop.
Save perrygeo/db45cce469ca93d3d96dd7f4d253ba11 to your computer and use it in GitHub Desktop.
from __future__ import print_function, division
from collections import defaultdict
from functools import wraps
import os
import json
import logging
import threading
import time
import psutil
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def mean(xs):
return sum(xs) / len(xs)
def bytes_to_MB(b):
return b * 1e-6
def profile_function(cpu=True, mem=True, disk=False, poll_interval=0.2,
human=False, **extra_kwargs):
def decorator(func):
@wraps(func)
def proffunc(*args, **kwargs):
# Set up outer scope for observer thread
obs = defaultdict(list)
running = True
ppid = os.getpid()
parent = psutil.Process(ppid)
# Not all platforms support per-process IO metrics
_disk = disk
if disk and not hasattr(parent, 'io_counters'):
logger.warn(
'io_counters not supported on this platform, skipping disk profiling')
_disk = False
# Start new thread to poll memory
class ObserverThread(threading.Thread):
def run(self):
while running:
with parent.oneshot():
if cpu:
obs['cpu'].append(
parent.cpu_percent(interval=None))
if mem:
obs['mem'].append(
bytes_to_MB(parent.memory_info().rss))
if _disk:
try:
io = parent.io_counters()
obs['read'].append(bytes_to_MB(io.read_bytes))
obs['write'].append(bytes_to_MB(io.write_bytes))
except NotImplementedError:
logger.warn(
'io_counters not supported, '
'kernel needs CONFIG_TASK_IO_ACCOUNTING')
time.sleep(poll_interval)
ObserverThread().start()
# Run main func and track elapsed time
start = time.time()
res = func(*args, **kwargs)
running = False
agg_data = {
'name': func.__name__,
'module': func.__module__,
'elapsed': round(time.time() - start, 4)}
agg_data.update(extra_kwargs)
# Aggregate observations
for key, vals in obs.items():
# special case, first cpu reading of zero is not valid
if key == 'cpu':
if vals[0] == 0:
vals = vals[1:]
if len(vals) > 0:
agg_data[key] = {
"mean": round(mean(vals), 1),
"max": round(max(vals), 1)}
# output and return
if human:
logger.info(
"{module}.{name}\ntime: {elapsed}\n"
"CPU: {cpu}\nMem: {mem}\n".format(**agg_data))
else:
logger.info(json.dumps(agg_data, separators=(',', ':')))
return res
return proffunc
return decorator
import time
from profile_function import profile_function
@profile_function(cpu=True, mem=True, poll_interval=0.1)
def main():
a = list(range(int(3e7)))
n = 1e7
time.sleep(1)
a.extend(list(range(int(1e7))))
while n > 0:
n -= 1
return len(a)
if __name__ == '__main__':
print(main())
@perrygeo
Copy link
Author

perrygeo commented Feb 26, 2017

The log output (minus indentation and spaces) looks like

{
  "name": "main",
  "module": "__main__",
  "elapsed": 4.872,
  "cpu": {
    "mean": 53.4,
    "max": 99.9
  },
  "mem": {
    "mean": 1110.5,
    "max": 1497.1
  }
}
  • mem: Resident memory for the process, MB
  • cpu: Percentage of one cpu core used by the process, percent
  • elapsed: Total seconds of run time

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment