ionelmc · February 12, 2020 09:26
diff --git a/.gitignore b/.gitignore
 *.cbor
 *.log
 *.xml
 *.yaml
 *.json
 *.msgpack
 *.html
 *.marshal
 *.pickle
 .tox
 .idea
 .cache
 *.pyc

diff --git a/README.rst b/README.rst
diff --git a/conftest.py b/conftest.py
 def pytest_benchmark_generate_json(config, benchmarks, include_data):
    from pytest_benchmark.plugin import pytest_benchmark_generate_json

    return pytest_benchmark_generate_json(config=config, benchmarks=benchmarks, include_data=False)
diff --git a/data.py b/data.py
 import json
 import sys

 import dicttoxml
 import marshal
 import msgpack
 import yaml
 import cbor

 from util import is_format_needed

 try:
    import cPickle as pickle
 except ImportError:
    import pickle

 if __name__ == '__main__':
    if len(sys.argv) > 1:
        with open(sys.argv[1]) as fh:
            data = json.load(fh)
    else:
        data = {
            "foo": [{
                "bar": [
                    'A"\\ :,;\n1' * 20000000,
                ],
                "b": [
                    1, 0.333, True,
                ],
                "c": None,
            }]
        }

    if is_format_needed('json'):
        with open('data.json', 'w') as fh:
            print('Creating data.json ...')
            json.dump(data, fh)

    if is_format_needed('yaml'):
        with open('data.yaml', 'w') as fh:
            print('Creating data.yaml ...')
            yaml.dump(data, fh)

    if is_format_needed('msgpack'):
        with open('data.msgpack', 'wb') as fh:
            print('Creating data.msgpack ...')
            msgpack.dump(data, fh, use_bin_type=True)

    if is_format_needed('pickle'):
        with open('data.pickle', 'wb') as fh:
            print('Creating data.pickle ...')
            pickle.dump(data, fh, protocol=pickle.HIGHEST_PROTOCOL)

    if is_format_needed('marshal'):
        with open('data.marshal', 'wb') as fh:
            print('Creating data.marshal ...')
            marshal.dump(data, fh)

    if is_format_needed('xml'):
        with open('data.xml', 'wb') as fh:
            print('Creating data.xml ...')
            fh.write(dicttoxml.dicttoxml(data))

    if is_format_needed('cbor'):
        with open('data.cbor', 'wb') as fh:
            print('Creating data.cbor ...')
            cbor.dump(data, fh)
diff --git a/memory.py b/memory.py
 #!/usr/bin/env python
 from __future__ import division

 import argparse
 import os
 import sys

 from util import IMPLEMENTATIONS
 from util import get_loader
 from util import open_data

 backend = os.getenv("BM_MEM", "valgrind").lower()


 def run(impl):
    with open_data(impl) as fh:
        data = fh.read()

    loader = get_loader(impl)
    loader(data)

 parser = argparse.ArgumentParser()
 parser.add_argument('--save')
 parser.add_argument('impl', nargs="?")

 if __name__ == '__main__':
    args = parser.parse_args()
    if args.impl:
        run(args.impl)
    else:
        import operator
        import subprocess

        results = {}
        for impl in IMPLEMENTATIONS:
            try:
                __import__(impl)
            except ImportError:
                continue

            print('Testing memory use for %r ...' % impl)
            if backend == 'valgrind':
                massif_log = './massif-%s.log' % impl
                subprocess.check_call([
                    # 'strace',
                    # '-o',
                    # './strace-%s.log' % impl,
                    'valgrind',
                    '--tool=massif',
                    '--massif-out-file=%s' % massif_log,
                    '--pages-as-heap=yes',
                    '--heap=yes',
                    '--threshold=0',
                    '--max-snapshots=1000',
                    '--peak-inaccuracy=0',
                    sys.executable,
                    __file__,
                    impl
                ])
                memory = 0
                with open(massif_log) as fh:
                    for line in fh:
                        if line.startswith('mem_heap_B='):
                            memory = max(memory, int(line.split('=')[-1]))
                results[impl] = memory
            elif backend == 'maxrss':
                pid = os.fork()
                if pid:
                    _, exit_code, usage = os.wait4(pid, 0)
                    if exit_code:
                        raise RuntimeError("Failed to run loader. Exit code: %s. Used: %s Mb" % (
                            exit_code, usage.ru_maxrss / 1024
                        ))
                    results[impl] = usage.ru_maxrss * 1024
                else:
                    try:
                        run(impl)
                    except Exception:
                        import traceback

                        traceback.print_exc()
                        os._exit(5)
                    finally:
                        os._exit(0)
            else:
                raise RuntimeError("Unknown BM_MEM backend %r" % backend)

        print('MEMORY USAGE:')
        for impl, memory in sorted(results.items(), key=operator.itemgetter(1)):
            print('{:>20}: {:>7,.1f} Mb'.format(impl, memory / 1024 / 1024))

        if args.save:
            import json

            dirname = os.path.dirname(args.save)
            if not os.path.exists(dirname):
                os.makedirs(dirname)

            with open(args.save, 'w') as fh:
                json.dump(results, fh)
diff --git a/plot.ipynb b/plot.ipynb
diff --git a/plot.py b/plot.py
diff --git a/speed.py b/speed.py
diff --git a/tox.ini b/tox.ini
diff --git a/util.py b/util.py
	*.cbor
	*.log
	*.xml
	*.yaml
	*.json
	*.msgpack
	*.html
	*.marshal
	*.pickle
	.tox
	.idea
	.cache
	*.pyc
	def pytest_benchmark_generate_json(config, benchmarks, include_data):
	from pytest_benchmark.plugin import pytest_benchmark_generate_json

	return pytest_benchmark_generate_json(config=config, benchmarks=benchmarks, include_data=False)
	import json
	import sys

	import dicttoxml
	import marshal
	import msgpack
	import yaml
	import cbor

	from util import is_format_needed

	try:
	import cPickle as pickle
	except ImportError:
	import pickle

	if __name__ == '__main__':
	if len(sys.argv) > 1:
	with open(sys.argv[1]) as fh:
	data = json.load(fh)
	else:
	data = {
	"foo": [{
	"bar": [
	'A"\\ :,;\n1' * 20000000,
	],
	"b": [
	1, 0.333, True,
	],
	"c": None,
	}]
	}

	if is_format_needed('json'):
	with open('data.json', 'w') as fh:
	print('Creating data.json ...')
	json.dump(data, fh)

	if is_format_needed('yaml'):
	with open('data.yaml', 'w') as fh:
	print('Creating data.yaml ...')
	yaml.dump(data, fh)

	if is_format_needed('msgpack'):
	with open('data.msgpack', 'wb') as fh:
	print('Creating data.msgpack ...')
	msgpack.dump(data, fh, use_bin_type=True)

	if is_format_needed('pickle'):
	with open('data.pickle', 'wb') as fh:
	print('Creating data.pickle ...')
	pickle.dump(data, fh, protocol=pickle.HIGHEST_PROTOCOL)

	if is_format_needed('marshal'):
	with open('data.marshal', 'wb') as fh:
	print('Creating data.marshal ...')
	marshal.dump(data, fh)

	if is_format_needed('xml'):
	with open('data.xml', 'wb') as fh:
	print('Creating data.xml ...')
	fh.write(dicttoxml.dicttoxml(data))

	if is_format_needed('cbor'):
	with open('data.cbor', 'wb') as fh:
	print('Creating data.cbor ...')
	cbor.dump(data, fh)
	#!/usr/bin/env python
	from __future__ import division

	import argparse
	import os
	import sys

	from util import IMPLEMENTATIONS
	from util import get_loader
	from util import open_data

	backend = os.getenv("BM_MEM", "valgrind").lower()


	def run(impl):
	with open_data(impl) as fh:
	data = fh.read()

	loader = get_loader(impl)
	loader(data)

	parser = argparse.ArgumentParser()
	parser.add_argument('--save')
	parser.add_argument('impl', nargs="?")

	if __name__ == '__main__':
	args = parser.parse_args()
	if args.impl:
	run(args.impl)
	else:
	import operator
	import subprocess

	results = {}
	for impl in IMPLEMENTATIONS:
	try:
	__import__(impl)
	except ImportError:
	continue

	print('Testing memory use for %r ...' % impl)
	if backend == 'valgrind':
	massif_log = './massif-%s.log' % impl
	subprocess.check_call([
	# 'strace',
	# '-o',
	# './strace-%s.log' % impl,
	'valgrind',
	'--tool=massif',
	'--massif-out-file=%s' % massif_log,
	'--pages-as-heap=yes',
	'--heap=yes',
	'--threshold=0',
	'--max-snapshots=1000',
	'--peak-inaccuracy=0',
	sys.executable,
	__file__,
	impl
	])
	memory = 0
	with open(massif_log) as fh:
	for line in fh:
	if line.startswith('mem_heap_B='):
	memory = max(memory, int(line.split('=')[-1]))
	results[impl] = memory
	elif backend == 'maxrss':
	pid = os.fork()
	if pid:
	_, exit_code, usage = os.wait4(pid, 0)
	if exit_code:
	raise RuntimeError("Failed to run loader. Exit code: %s. Used: %s Mb" % (
	exit_code, usage.ru_maxrss / 1024
	))
	results[impl] = usage.ru_maxrss * 1024
	else:
	try:
	run(impl)
	except Exception:
	import traceback

	traceback.print_exc()
	os._exit(5)
	finally:
	os._exit(0)
	else:
	raise RuntimeError("Unknown BM_MEM backend %r" % backend)

	print('MEMORY USAGE:')
	for impl, memory in sorted(results.items(), key=operator.itemgetter(1)):
	print('{:>20}: {:>7,.1f} Mb'.format(impl, memory / 1024 / 1024))

	if args.save:
	import json

	dirname = os.path.dirname(args.save)
	if not os.path.exists(dirname):
	os.makedirs(dirname)

	with open(args.save, 'w') as fh:
	json.dump(results, fh)