Last active
July 18, 2018 21:00
-
-
Save pfreixes/881db840e36d844fc2f8f278ed0e0766 to your computer and use it in GitHub Desktop.
Comparing Proto vs JSON with Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
syntax = "proto2"; | |
package tutorial; | |
message Person { | |
required string name = 1; | |
required int32 id = 2; | |
optional string email = 3; | |
enum PhoneType { | |
MOBILE = 0; | |
HOME = 1; | |
WORK = 2; | |
} | |
message PhoneNumber { | |
required string number = 1; | |
optional PhoneType type = 2 [default = HOME]; | |
} | |
repeated PhoneNumber phones = 4; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import ujson | |
from timeit import timeit | |
from google.protobuf import json_format | |
from address_book_pb2 import Person | |
SAMPLES = 100 | |
def proto_deserialize(data): | |
p = Person.FromString(data) | |
def json_deserialize(data): | |
p = json.loads(data) | |
def ujson_deserialize(data): | |
p = ujson.loads(data) | |
def proto_serialize(proto): | |
b = proto.SerializeToString() | |
def json_serialize(data): | |
b = json.dumps(data) | |
def ujson_serialize(data): | |
p = ujson.dumps(data) | |
def build_data(cnt): | |
return { | |
'name': 'foo', | |
'id': 1, | |
'email': '[email protected]', | |
'phones': [ | |
{ | |
'number': '666 666 666', | |
'type': 0 | |
} for _ in range(cnt) | |
] | |
} | |
def report(f, phones, len_, time): | |
print("{} {} records ({} bytes): {}".format( | |
f.__name__, | |
phones, | |
len_, | |
time/SAMPLES) | |
) | |
for phones in (10, 100, 1000, 2000, 4000, 8000, 16000): | |
data = build_data(phones) | |
json_data = json.dumps(data) | |
proto = json_format.Parse(json_data, Person()) | |
proto_data = proto.SerializeToString() | |
report( | |
json_deserialize, | |
phones, | |
len(json_data), | |
timeit("json_deserialize(json_data)", number=SAMPLES, setup="from __main__ import json_deserialize, json_data") | |
) | |
report( | |
ujson_deserialize, | |
phones, | |
len(json_data), | |
timeit("ujson_deserialize(json_data)", number=SAMPLES, setup="from __main__ import ujson_deserialize, json_data") | |
) | |
report( | |
proto_deserialize, | |
phones, | |
len(proto_data), | |
timeit("proto_deserialize(proto_data)", number=SAMPLES, setup="from __main__ import proto_deserialize, proto_data") | |
) | |
report( | |
json_serialize, | |
phones, | |
len(json_data), | |
timeit("json_serialize(data)", number=SAMPLES, setup="from __main__ import json_serialize, data") | |
) | |
report( | |
ujson_serialize, | |
phones, | |
len(json_data), | |
timeit("ujson_serialize(data)", number=SAMPLES, setup="from __main__ import ujson_serialize, data") | |
) | |
report( | |
proto_serialize, | |
phones, | |
len(proto_data), | |
timeit("proto_serialize(proto)", number=SAMPLES, setup="from __main__ import proto_serialize, proto") | |
) | |
print("-"*50) | |
with open("data/proto_{}".format(phones), "wb+") as fd: | |
fd.write(proto_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string> | |
#include <cstdlib> | |
#include "address_book.pb.h" | |
using namespace std; | |
// Main function: Reads the entire address book from a file, | |
// adds one person based on user input, then writes it back out to the same | |
// file. | |
int main(int argc, char* argv[]) { | |
// Verify that the version of the library that we linked against is | |
// compatible with the version of the headers we compiled against. | |
GOOGLE_PROTOBUF_VERIFY_VERSION; | |
if (argc != 3) { | |
cerr << "Usage: " << argv[0] << " ADDRESS_BOOK_FILE ITERATIONS" << endl; | |
return -1; | |
} | |
tutorial::Person person; | |
{ | |
// Read the existing address book. | |
fstream input(argv[1], ios::in | ios::binary); | |
if (!input) { | |
cout << argv[1] << ": File not found. Creating a new file." << endl; | |
} | |
std::string data; | |
char buffer[4096]; | |
while (input.read(buffer, sizeof(buffer))) | |
{ | |
data.append(buffer, sizeof(buffer)); | |
} | |
data.append(buffer, input.gcount()); | |
int cnt = atoi(argv[2]); | |
for(int i=0;i<cnt;i++) | |
{ | |
if (!person.ParseFromString(data)) { | |
cerr << "Failed to parse address book." << endl; | |
return -1; | |
} | |
} | |
} | |
// Optional: Delete all global objects allocated by libprotobuf. | |
google::protobuf::ShutdownProtobufLibrary(); | |
return 0; |
Author
pfreixes
commented
Jul 18, 2018
$ clang -std=c++11 -o load_addrbook reading.cpp address_book.pb.cc -lprotobuf -lc++
$ time ./load_addrbook data/proto_16000 100
real 0m0.450s
user 0m0.438s
sys 0m0.005s
Approximately the cost of deserialization in C++ is 4ms for the 16000 fixture.
So times in raw of only that fixture for the different technologies are
- Proto C++ 4ms
- Proto Python 9ms **
- JSON ujson 11 ms
- JSON json 13ms
** Under the hood uses the C++ library
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment