Last active
August 29, 2015 14:15
-
-
Save esheffield/3514988368eb42db1203 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------------------------------------------------------------- | |
10000 User objects | |
----------------------------------------------------------------------- | |
[eddie@localhost avro]$ python compare.py | |
Creating 10000 users with meta size 0 | |
Done! | |
Serializing with Avro... | |
Difference: 0:00:02.865458 | |
*************** | |
Serializing with JSON... | |
Difference: 0:00:00.487032 | |
Done! | |
[eddie@localhost avro]$ du -b avro | |
5910110 avro | |
[eddie@localhost avro]$ du -b json | |
1353120 json | |
----------------------------------------------------------------------- | |
[eddie@localhost avro]$ python compare.py | |
Creating 10000 users with meta size 50 | |
Done! | |
Serializing with Avro... | |
Difference: 0:00:11.440458 | |
*************** | |
Serializing with JSON... | |
Difference: 0:00:00.750708 | |
Done! | |
[eddie@localhost avro]$ du -b avro | |
23954045 avro | |
[eddie@localhost avro]$ du -b json | |
22357188 json | |
----------------------------------------------------------------------- | |
100000 User objects | |
----------------------------------------------------------------------- | |
[eddie@localhost avro]$ python compare.py | |
Creating 100000 users with meta size 0 | |
Done! | |
Serializing with Avro... | |
Difference: 0:00:29.126173 | |
*************** | |
Serializing with JSON... | |
Difference: 0:00:05.138391 | |
Done! | |
[eddie@localhost avro]$ | |
[eddie@localhost avro]$ du -b avro | |
58995214 avro | |
[eddie@localhost avro]$ du -b json | |
14018502 json | |
----------------------------------------------------------------------- | |
[eddie@localhost avro]$ python compare.py | |
Creating 100000 users with meta size 50 | |
Done! | |
Serializing with Avro... | |
Difference: 0:01:57.889641 | |
*************** | |
Serializing with JSON... | |
Difference: 0:00:07.903804 | |
Done! | |
[eddie@localhost avro]$ du -b avro | |
239206740 avro | |
[eddie@localhost avro]$ du -b json | |
223830205 json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import string | |
import random | |
import json | |
import avro.schema | |
from avro.datafile import DataFileReader, DataFileWriter | |
from avro.io import DatumReader, DatumWriter | |
USER_COUNT = 100000 | |
META_SIZE = 0 | |
SIZES = ["LARGE", "MEDIUM", "SMALL"] | |
def strgen(size=6, chars=string.ascii_uppercase + string.digits): | |
return ''.join(random.choice(chars) for _ in range(size)) | |
def rand_str_or_none(size=6, none_chance=0.25): | |
# Kinda dump impl, but good enough | |
cutoff = 1000 * none_chance; | |
is_none = random.randint(0, 1000) <= cutoff | |
if is_none: | |
return None | |
else: | |
return strgen(size=size) | |
def rand_int_or_none(min=0, max=100, none_chance=0.25): | |
# Kinda dump impl, but good enough | |
cutoff = 1000 * none_chance; | |
is_none = random.randint(0, 1000) <= cutoff | |
if is_none: | |
return None | |
else: | |
return random.randint(min, max) | |
def make_user(meta_size=50): | |
user={ | |
"name": strgen(size=random.randint(6, 15)), | |
"favorite_number": rand_int_or_none(min=0, max=1000), | |
"favorite_color": rand_str_or_none(), | |
"size": SIZES[random.randint(0, 2)], | |
"meta": {} | |
} | |
for _ in range(meta_size): | |
user["meta"][strgen()] = strgen(size=random.randint(6, 50)) | |
return user | |
print("Creating %d users with meta size %d" % (USER_COUNT, META_SIZE)) | |
users = [] | |
for _ in range(USER_COUNT): | |
users.append(make_user(meta_size=META_SIZE)) | |
print("Done!") | |
print("Serializing with Avro...") | |
schema1 = avro.schema.parse(open("user.avsc").read()) | |
start = datetime.datetime.now() | |
i = 0 | |
for user in users: | |
writer = DataFileWriter(open("avro/users_%d.avro" % i, "wb"), DatumWriter(), schema1) | |
writer.append(user) | |
writer.close() | |
i += 1 | |
end = datetime.datetime.now() | |
print("Difference: %s" % (end - start)) | |
print("***************") | |
print("Serializing with JSON...") | |
start = datetime.datetime.now() | |
i = 0 | |
for user in users: | |
writer=open("json/users_%d.json" % i, "w") | |
writer.write(json.dumps(user)) | |
writer.close() | |
i += 1 | |
end = datetime.datetime.now() | |
print("Difference: %s" % (end - start)) | |
print("Done!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"namespace": "example.avro", | |
"type": "record", | |
"name": "User", | |
"fields": [ | |
{"name": "name", "type": "string"}, | |
{"name": "favorite_number", "type": ["int", "null"]}, | |
{"name": "favorite_color", "type": [ "null", "string"], "default": null }, | |
{ | |
"name": "size", | |
"type": [{ | |
"name": "Size", | |
"type": "enum", | |
"symbols": ["LARGE", "MEDIUM", "SMALL"] | |
}, "null"], | |
"default": "SMALL" | |
}, | |
{"name": "meta", "type": {"type": "map", "values": "string"}} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment