Created
June 30, 2012 06:32
-
-
Save ambroff/3022655 to your computer and use it in GitHub Desktop.
comparing JSON and Thrift serialization speed / data size
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import jsonlib | |
import random | |
import timeit | |
import lz4 | |
from thrift.protocol.TBinaryProtocol import TBinaryProtocol | |
from thrift.protocol.TCompactProtocol import TCompactProtocol | |
from thrift.transport.TTransport import TMemoryBuffer | |
from example.ttypes import Item | |
from example.ttypes import ItemSet | |
def thrift_obj_to_dict(obj): | |
d = {} | |
for field_spec in obj.thrift_spec: | |
if field_spec: | |
field_name = field_spec[2] | |
field_value = getattr(obj, field_name, None) | |
if not field_value is None: | |
d[field_name] = field_value | |
return d | |
item_list = [] | |
dict_list = [] | |
#for i in xrange(1000, 5000): | |
for i in xrange(1000, 2000): | |
new_item = Item( | |
id=i, timestamp=i+1000, object_id=i+2000, | |
type=random.choice(['foo', 'bar']), recipient_id=i+3000, | |
sender_id=i+4000, metadata={str(i): str(i + 9)}) | |
item_list.append(new_item) | |
dict_list.append(thrift_obj_to_dict(new_item)) | |
TEST_OBJECT = ItemSet(new_events=item_list) | |
TEST_DICT = {'new_events': dict_list} | |
def test_thrift(compact=False): | |
transport_out = TMemoryBuffer() | |
if compact: | |
protocol_out = TCompactProtocol(transport_out) | |
else: | |
protocol_out = TBinaryProtocol(transport_out) | |
TEST_OBJECT.write(protocol_out) | |
# the string 'bytes' can be written out to disk | |
# to be read in at a different time | |
bytes = transport_out.getvalue() | |
transport_in = TMemoryBuffer(bytes) | |
if compact: | |
protocol_in = TCompactProtocol(transport_in) | |
else: | |
protocol_in = TBinaryProtocol(transport_in) | |
new_set = ItemSet() | |
new_set.read(protocol_in) | |
return len(bytes) | |
#ITERATIONS=1000000 | |
ITERATIONS=100 | |
def test_json(compress=False): | |
data = jsonlib.write(TEST_DICT) | |
if compress: | |
data = lz4.compress(data) | |
jsonlib.read(lz4.uncompress(data)) | |
else: | |
jsonlib.read(data) | |
return len(data) | |
print '==', ITERATIONS, \ | |
'iterations serializing and deserializing a large object', '==' | |
print 'THRIFT:' | |
print ' - ', \ | |
timeit.timeit( | |
'test_thrift()', 'from __main__ import test_thrift', number=ITERATIONS), \ | |
'seconds' | |
print ' - ', test_thrift(), 'bytes' | |
print 'COMPACT THRIFT:' | |
print ' - ', \ | |
timeit.timeit( | |
'test_thrift(True)', 'from __main__ import test_thrift', | |
number=ITERATIONS), 'seconds' | |
print ' - ', test_thrift(True), 'bytes' | |
print 'JSON:' | |
print ' - ', \ | |
timeit.timeit( | |
'test_json()', 'from __main__ import test_json', number=ITERATIONS), \ | |
'seconds' | |
print ' - ', test_json(), 'bytes' | |
print 'LZ4 COMPRESSED JSON:' | |
print ' - ', \ | |
timeit.timeit( | |
'test_json(True)', 'from __main__ import test_json', number=ITERATIONS), \ | |
'seconds' | |
print ' - ', test_json(True), 'bytes' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace py example | |
struct Item { | |
/* The ID of this event. */ | |
1: required i64 id, | |
/* When this event occurred UTC. */ | |
2: required i64 timestamp, | |
/* The ID of the object related to this event. */ | |
3: required i64 object_id, | |
/* Type of this event or object. */ | |
4: required string type, | |
/* The account_id for the user receiving this event. */ | |
5: required i64 recipient_id, | |
/* ID of the user that triggered this event. */ | |
6: required i64 sender_id, | |
/* Additional metadata that you may want to attatch to this item */ | |
7: optional map<string, string> metadata | |
} | |
struct ItemSet { | |
1: required list<Item> new_events | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
== 100 iterations serializing and deserializing a large object == | |
THRIFT: | |
- 27.6038601398 seconds | |
- 91009 bytes | |
COMPACT THRIFT: | |
- 44.0193760395 seconds | |
- 34005 bytes | |
JSON: | |
- 1.46023511887 seconds | |
- 123016 bytes | |
LZ4 COMPRESSED JSON: | |
- 1.53419780731 seconds | |
- 34556 bytes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment