Skip to content

Instantly share code, notes, and snippets.

@esheffield
Last active August 29, 2015 14:15
Show Gist options
  • Save esheffield/3514988368eb42db1203 to your computer and use it in GitHub Desktop.
Save esheffield/3514988368eb42db1203 to your computer and use it in GitHub Desktop.
-----------------------------------------------------------------------
10000 User objects
-----------------------------------------------------------------------
[eddie@localhost avro]$ python compare.py
Creating 10000 users with meta size 0
Done!
Serializing with Avro...
Difference: 0:00:02.865458
***************
Serializing with JSON...
Difference: 0:00:00.487032
Done!
[eddie@localhost avro]$ du -b avro
5910110 avro
[eddie@localhost avro]$ du -b json
1353120 json
-----------------------------------------------------------------------
[eddie@localhost avro]$ python compare.py
Creating 10000 users with meta size 50
Done!
Serializing with Avro...
Difference: 0:00:11.440458
***************
Serializing with JSON...
Difference: 0:00:00.750708
Done!
[eddie@localhost avro]$ du -b avro
23954045 avro
[eddie@localhost avro]$ du -b json
22357188 json
-----------------------------------------------------------------------
100000 User objects
-----------------------------------------------------------------------
[eddie@localhost avro]$ python compare.py
Creating 100000 users with meta size 0
Done!
Serializing with Avro...
Difference: 0:00:29.126173
***************
Serializing with JSON...
Difference: 0:00:05.138391
Done!
[eddie@localhost avro]$
[eddie@localhost avro]$ du -b avro
58995214 avro
[eddie@localhost avro]$ du -b json
14018502 json
-----------------------------------------------------------------------
[eddie@localhost avro]$ python compare.py
Creating 100000 users with meta size 50
Done!
Serializing with Avro...
Difference: 0:01:57.889641
***************
Serializing with JSON...
Difference: 0:00:07.903804
Done!
[eddie@localhost avro]$ du -b avro
239206740 avro
[eddie@localhost avro]$ du -b json
223830205 json
import datetime
import string
import random
import json
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
USER_COUNT = 100000
META_SIZE = 0
SIZES = ["LARGE", "MEDIUM", "SMALL"]
def strgen(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def rand_str_or_none(size=6, none_chance=0.25):
# Kinda dump impl, but good enough
cutoff = 1000 * none_chance;
is_none = random.randint(0, 1000) <= cutoff
if is_none:
return None
else:
return strgen(size=size)
def rand_int_or_none(min=0, max=100, none_chance=0.25):
# Kinda dump impl, but good enough
cutoff = 1000 * none_chance;
is_none = random.randint(0, 1000) <= cutoff
if is_none:
return None
else:
return random.randint(min, max)
def make_user(meta_size=50):
user={
"name": strgen(size=random.randint(6, 15)),
"favorite_number": rand_int_or_none(min=0, max=1000),
"favorite_color": rand_str_or_none(),
"size": SIZES[random.randint(0, 2)],
"meta": {}
}
for _ in range(meta_size):
user["meta"][strgen()] = strgen(size=random.randint(6, 50))
return user
print("Creating %d users with meta size %d" % (USER_COUNT, META_SIZE))
users = []
for _ in range(USER_COUNT):
users.append(make_user(meta_size=META_SIZE))
print("Done!")
print("Serializing with Avro...")
schema1 = avro.schema.parse(open("user.avsc").read())
start = datetime.datetime.now()
i = 0
for user in users:
writer = DataFileWriter(open("avro/users_%d.avro" % i, "wb"), DatumWriter(), schema1)
writer.append(user)
writer.close()
i += 1
end = datetime.datetime.now()
print("Difference: %s" % (end - start))
print("***************")
print("Serializing with JSON...")
start = datetime.datetime.now()
i = 0
for user in users:
writer=open("json/users_%d.json" % i, "w")
writer.write(json.dumps(user))
writer.close()
i += 1
end = datetime.datetime.now()
print("Difference: %s" % (end - start))
print("Done!")
{
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": [ "null", "string"], "default": null },
{
"name": "size",
"type": [{
"name": "Size",
"type": "enum",
"symbols": ["LARGE", "MEDIUM", "SMALL"]
}, "null"],
"default": "SMALL"
},
{"name": "meta", "type": {"type": "map", "values": "string"}}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment