Created
September 11, 2012 16:27
-
-
Save esc/3699663 to your computer and use it in GitHub Desktop.
Benchmark for python-blosc no-copy-on-compression patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 2aa62ca0f9b03df90202ef8c7a739939ba6f6395 Mon Sep 17 00:00:00 2001 | |
From: Valentin Haenel <[email protected]> | |
Date: Tue, 11 Sep 2012 13:40:46 +0200 | |
Subject: [PATCH] don't copy large buffers, only small ones | |
--- | |
blosc/blosc_extension.c | 23 ++++++++++------------- | |
1 file changed, 10 insertions(+), 13 deletions(-) | |
diff --git a/blosc/blosc_extension.c b/blosc/blosc_extension.c | |
index 7f3bd1dbe0..44ba66d169 100644 | |
--- a/blosc/blosc_extension.c | |
+++ b/blosc/blosc_extension.c | |
@@ -61,8 +61,8 @@ PyDoc_STRVAR(compress__doc__, | |
static PyObject * | |
PyBlosc_compress(PyObject *self, PyObject *args) | |
{ | |
- PyObject *result_str = NULL; | |
- void *input, *output; | |
+ PyObject *output = NULL; | |
+ void *input; | |
int clevel, shuffle, cbytes; | |
int nbytes, typesize; | |
@@ -72,7 +72,7 @@ PyBlosc_compress(PyObject *self, PyObject *args) | |
return NULL; | |
/* Alloc memory for compression */ | |
- output = malloc(nbytes+BLOSC_MAX_OVERHEAD); | |
+ output = PyBytes_FromStringAndSize(NULL, nbytes+BLOSC_MAX_OVERHEAD); | |
if (output == NULL) { | |
PyErr_SetString(PyExc_MemoryError, | |
"Can't allocate memory to compress data"); | |
@@ -82,22 +82,19 @@ PyBlosc_compress(PyObject *self, PyObject *args) | |
/* Compress */ | |
Py_BEGIN_ALLOW_THREADS; | |
cbytes = blosc_compress(clevel, shuffle, (size_t)typesize, (size_t)nbytes, | |
- input, output, nbytes+BLOSC_MAX_OVERHEAD); | |
+ input, PyBytes_AS_STRING(output), nbytes+BLOSC_MAX_OVERHEAD); | |
Py_END_ALLOW_THREADS; | |
if (cbytes < 0) { | |
blosc_error(cbytes, "while compressing data"); | |
- free(output); | |
return NULL; | |
} | |
- | |
- /* This forces a copy of the output, but anyway */ | |
- result_str = PyBytes_FromStringAndSize((char *)output, cbytes); | |
- | |
- /* Free the initial buffer */ | |
- free(output); | |
- | |
- return result_str; | |
+ /* Attempt to resize, if it's much smaller, a copy is required. */ | |
+ if (_PyBytes_Resize(&output, cbytes) < 0){ | |
+ /* the memory exception will have been set, hopefully */ | |
+ return NULL; | |
+ } | |
+ return output; | |
} | |
PyDoc_STRVAR(decompress__doc__, | |
-- | |
1.7.9.5 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 0ba7255b793023b1af108b619c2c384aa0dfc22d Mon Sep 17 00:00:00 2001 | |
From: Valentin Haenel <[email protected]> | |
Date: Mon, 10 Sep 2012 20:38:52 +0200 | |
Subject: [PATCH] use no-copy strategy from python-snappy | |
--- | |
blosc/blosc_extension.c | 36 +++++++++++++++++++++++------------- | |
1 file changed, 23 insertions(+), 13 deletions(-) | |
diff --git a/blosc/blosc_extension.c b/blosc/blosc_extension.c | |
index 7f3bd1dbe0..2b16ce8c6b 100644 | |
--- a/blosc/blosc_extension.c | |
+++ b/blosc/blosc_extension.c | |
@@ -13,6 +13,24 @@ | |
#include "Python.h" | |
#include "blosc.h" | |
+#define RESIZE_TOLERATION 0.75 | |
+ | |
+/* Shamelessly copied from python-snappy */ | |
+static inline PyObject * | |
+maybe_resize(PyObject *str, size_t expected_size, size_t actual_size) | |
+{ | |
+ // Tolerate up to 25% slop, to reduce the likelihood of | |
+ // reallocation and copying. | |
+ if (actual_size != expected_size) { | |
+ if (actual_size < expected_size * RESIZE_TOLERATION) { | |
+ _PyBytes_Resize(&str, actual_size); | |
+ return str; | |
+ } | |
+ Py_SIZE(str) = actual_size; | |
+ } | |
+ return str; | |
+} | |
+ | |
static PyObject *BloscError; | |
@@ -61,8 +79,8 @@ PyDoc_STRVAR(compress__doc__, | |
static PyObject * | |
PyBlosc_compress(PyObject *self, PyObject *args) | |
{ | |
- PyObject *result_str = NULL; | |
- void *input, *output; | |
+ PyObject *output = NULL; | |
+ void *input; | |
int clevel, shuffle, cbytes; | |
int nbytes, typesize; | |
@@ -72,7 +90,7 @@ PyBlosc_compress(PyObject *self, PyObject *args) | |
return NULL; | |
/* Alloc memory for compression */ | |
- output = malloc(nbytes+BLOSC_MAX_OVERHEAD); | |
+ output = PyBytes_FromStringAndSize(NULL, nbytes+BLOSC_MAX_OVERHEAD); | |
if (output == NULL) { | |
PyErr_SetString(PyExc_MemoryError, | |
"Can't allocate memory to compress data"); | |
@@ -82,22 +100,14 @@ PyBlosc_compress(PyObject *self, PyObject *args) | |
/* Compress */ | |
Py_BEGIN_ALLOW_THREADS; | |
cbytes = blosc_compress(clevel, shuffle, (size_t)typesize, (size_t)nbytes, | |
- input, output, nbytes+BLOSC_MAX_OVERHEAD); | |
+ input, PyBytes_AS_STRING(output), nbytes+BLOSC_MAX_OVERHEAD); | |
Py_END_ALLOW_THREADS; | |
if (cbytes < 0) { | |
blosc_error(cbytes, "while compressing data"); | |
- free(output); | |
return NULL; | |
} | |
- | |
- /* This forces a copy of the output, but anyway */ | |
- result_str = PyBytes_FromStringAndSize((char *)output, cbytes); | |
- | |
- /* Free the initial buffer */ | |
- free(output); | |
- | |
- return result_str; | |
+ return maybe_resize(output, nbytes, cbytes); | |
} | |
PyDoc_STRVAR(decompress__doc__, | |
-- | |
1.7.9.5 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import time | |
import numpy | |
import blosc | |
def do(str_): | |
print len(str_) | |
tic = time.time() | |
compressed = blosc.compress(str_, typesize=8) | |
toc = time.time() | |
print len(compressed) | |
print("Time: %.2f seconds" % (toc - tic)) | |
array_ = numpy.linspace(1, 100, 2e8) | |
items = len(array_) | |
str1 = array_.tostring() | |
do(str1) | |
del array_ | |
del str1 | |
array_ = numpy.random.rand(items) | |
str2 = array_.tostring() | |
do(str2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
url='https://github.com/esc/python-blosc' | |
git clone $url | |
base='29f168afe4' | |
do_bench(){ | |
cd python-blosc | |
git clean -dfx | |
git checkout $1 | |
git am $2 | |
python setup.py build_ext -i | |
cd .. | |
echo "Benchmark: $1 with patch: $2" | |
PYTHONPATH=python-blosc ./bench.py | |
} | |
# first benchmark the base commit, give fake patchfile | |
do_bench $base foo | |
# then benchmark the python snappy style | |
do_bench $base ../0001-use-no-copy-strategy-from-python-snappy.patch | |
# then benchmark my style | |
do_bench $base ../0001-don-t-copy-large-buffers-only-small-ones.patch | |
rm -rf python-blosc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment