Skip to content

Instantly share code, notes, and snippets.

@pfreixes
Last active July 18, 2018 21:00
Show Gist options
  • Save pfreixes/881db840e36d844fc2f8f278ed0e0766 to your computer and use it in GitHub Desktop.
Save pfreixes/881db840e36d844fc2f8f278ed0e0766 to your computer and use it in GitHub Desktop.
Comparing Proto vs JSON with Python
syntax = "proto2";
package tutorial;
message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;
enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}
message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}
repeated PhoneNumber phones = 4;
}
import json
import ujson
from timeit import timeit
from google.protobuf import json_format
from address_book_pb2 import Person
SAMPLES = 100
def proto_deserialize(data):
p = Person.FromString(data)
def json_deserialize(data):
p = json.loads(data)
def ujson_deserialize(data):
p = ujson.loads(data)
def proto_serialize(proto):
b = proto.SerializeToString()
def json_serialize(data):
b = json.dumps(data)
def ujson_serialize(data):
p = ujson.dumps(data)
def build_data(cnt):
return {
'name': 'foo',
'id': 1,
'email': '[email protected]',
'phones': [
{
'number': '666 666 666',
'type': 0
} for _ in range(cnt)
]
}
def report(f, phones, len_, time):
print("{} {} records ({} bytes): {}".format(
f.__name__,
phones,
len_,
time/SAMPLES)
)
for phones in (10, 100, 1000, 2000, 4000, 8000, 16000):
data = build_data(phones)
json_data = json.dumps(data)
proto = json_format.Parse(json_data, Person())
proto_data = proto.SerializeToString()
report(
json_deserialize,
phones,
len(json_data),
timeit("json_deserialize(json_data)", number=SAMPLES, setup="from __main__ import json_deserialize, json_data")
)
report(
ujson_deserialize,
phones,
len(json_data),
timeit("ujson_deserialize(json_data)", number=SAMPLES, setup="from __main__ import ujson_deserialize, json_data")
)
report(
proto_deserialize,
phones,
len(proto_data),
timeit("proto_deserialize(proto_data)", number=SAMPLES, setup="from __main__ import proto_deserialize, proto_data")
)
report(
json_serialize,
phones,
len(json_data),
timeit("json_serialize(data)", number=SAMPLES, setup="from __main__ import json_serialize, data")
)
report(
ujson_serialize,
phones,
len(json_data),
timeit("ujson_serialize(data)", number=SAMPLES, setup="from __main__ import ujson_serialize, data")
)
report(
proto_serialize,
phones,
len(proto_data),
timeit("proto_serialize(proto)", number=SAMPLES, setup="from __main__ import proto_serialize, proto")
)
print("-"*50)
with open("data/proto_{}".format(phones), "wb+") as fd:
fd.write(proto_data)
#include <iostream>
#include <fstream>
#include <string>
#include <cstdlib>
#include "address_book.pb.h"
using namespace std;
// Main function: Reads the entire address book from a file,
// adds one person based on user input, then writes it back out to the same
// file.
int main(int argc, char* argv[]) {
// Verify that the version of the library that we linked against is
// compatible with the version of the headers we compiled against.
GOOGLE_PROTOBUF_VERIFY_VERSION;
if (argc != 3) {
cerr << "Usage: " << argv[0] << " ADDRESS_BOOK_FILE ITERATIONS" << endl;
return -1;
}
tutorial::Person person;
{
// Read the existing address book.
fstream input(argv[1], ios::in | ios::binary);
if (!input) {
cout << argv[1] << ": File not found. Creating a new file." << endl;
}
std::string data;
char buffer[4096];
while (input.read(buffer, sizeof(buffer)))
{
data.append(buffer, sizeof(buffer));
}
data.append(buffer, input.gcount());
int cnt = atoi(argv[2]);
for(int i=0;i<cnt;i++)
{
if (!person.ParseFromString(data)) {
cerr << "Failed to parse address book." << endl;
return -1;
}
}
}
// Optional: Delete all global objects allocated by libprotobuf.
google::protobuf::ShutdownProtobufLibrary();
return 0;
@pfreixes
Copy link
Author

ujson_deserialize 10 records (440 bytes): 8.017208892852068e-06
proto_deserialize 10 records (190 bytes): 7.543331012129784e-06
json_serialize 10 records (440 bytes): 1.394373131915927e-05
ujson_serialize 10 records (440 bytes): 4.64295968413353e-06
proto_serialize 10 records (190 bytes): 9.073750115931034e-06
--------------------------------------------------
json_deserialize 100 records (3860 bytes): 6.394404917955398e-05
ujson_deserialize 100 records (3860 bytes): 6.478365976363421e-05
proto_deserialize 100 records (1720 bytes): 0.0001041777990758419
json_serialize 100 records (3860 bytes): 0.00011640744982287288
ujson_serialize 100 records (3860 bytes): 4.963275976479053e-05
proto_serialize 100 records (1720 bytes): 0.00010579047957435251
--------------------------------------------------
json_deserialize 1000 records (38060 bytes): 0.0008191897999495268
ujson_deserialize 1000 records (38060 bytes): 0.00060686751967296
proto_deserialize 1000 records (17020 bytes): 0.0007077015889808536
json_serialize 1000 records (38060 bytes): 0.0010706773586571217
ujson_serialize 1000 records (38060 bytes): 0.0004153703199699521
proto_serialize 1000 records (17020 bytes): 0.0008657670207321644
--------------------------------------------------
json_deserialize 2000 records (76060 bytes): 0.0013717662007547914
ujson_deserialize 2000 records (76060 bytes): 0.001152788051404059
proto_deserialize 2000 records (34020 bytes): 0.0012685389397665858
json_serialize 2000 records (76060 bytes): 0.002137752859853208
ujson_serialize 2000 records (76060 bytes): 0.0007581304898485542
proto_serialize 2000 records (34020 bytes): 0.0016305385110899806
--------------------------------------------------
json_deserialize 4000 records (152060 bytes): 0.002768501581158489
ujson_deserialize 4000 records (152060 bytes): 0.0023405916406773032
proto_deserialize 4000 records (68020 bytes): 0.0024065101705491544
json_serialize 4000 records (152060 bytes): 0.004116422380320728
ujson_serialize 4000 records (152060 bytes): 0.0016698998515494168
proto_serialize 4000 records (68020 bytes): 0.003445039901416749
--------------------------------------------------
json_deserialize 8000 records (304060 bytes): 0.005839942158199846
ujson_deserialize 8000 records (304060 bytes): 0.005278962450101971
proto_deserialize 8000 records (136020 bytes): 0.0048038902413100005
json_serialize 8000 records (304060 bytes): 0.008382113680709154
ujson_serialize 8000 records (304060 bytes): 0.003303151289001107
proto_serialize 8000 records (136020 bytes): 0.006377647898625582
--------------------------------------------------
json_deserialize 16000 records (608060 bytes): 0.013284061218146235
ujson_deserialize 16000 records (608060 bytes): 0.011920544640161098
proto_deserialize 16000 records (272020 bytes): 0.009432364178355784
json_serialize 16000 records (608060 bytes): 0.01749424057081342
ujson_serialize 16000 records (608060 bytes): 0.006446549200918525
proto_serialize 16000 records (272020 bytes): 0.012395728619303555
--------------------------------------------------

@pfreixes
Copy link
Author

$ clang -std=c++11 -o load_addrbook reading.cpp address_book.pb.cc -lprotobuf -lc++
$ time ./load_addrbook data/proto_16000 100

real	0m0.450s
user	0m0.438s
sys	0m0.005s

Approximately the cost of deserialization in C++ is 4ms for the 16000 fixture.

So times in raw of only that fixture for the different technologies are

  • Proto C++ 4ms
  • Proto Python 9ms **
  • JSON ujson 11 ms
  • JSON json 13ms

** Under the hood uses the C++ library

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment