Created
January 31, 2022 20:58
-
-
Save jcrist/782c463f9eb52211e255caa96f57ab97 to your computer and use it in GitHub Desktop.
A (naive) benchmark comparing pydantic & msgspec performance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This benchmark is a modified version of the benchmark available at | |
https://github.com/samuelcolvin/pydantic/tree/master/benchmarks to support | |
benchmarking msgspec. | |
The benchmark measures the time to JSON encode/decode `n` random objects | |
matching a specific schema. It compares the time required for both | |
serialization _and_ schema validation. | |
""" | |
import argparse | |
import random | |
import string | |
import time | |
from functools import partial | |
from datetime import datetime | |
from typing import List, Optional | |
import orjson | |
import pydantic | |
import msgspec | |
PUNCTUATION = " \t\n!\"#$%&'()*+,-./" | |
LETTERS = string.ascii_letters | |
UNICODE = "\xa0\xad¡¢£¤¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ" | |
ALL = PUNCTUATION * 5 + LETTERS * 20 + UNICODE | |
def rand_string(min_length, max_length, corpus=ALL): | |
return "".join(random.choices(corpus, k=random.randrange(min_length, max_length))) | |
MISSING = object() | |
def null_missing_v(f, null_chance=0.2, missing_chance=None): | |
r = random.random() | |
if random.random() < null_chance: | |
return None | |
missing_chance = null_chance if missing_chance is None else missing_chance | |
if r < (null_chance + missing_chance): | |
return MISSING | |
return f() | |
def null_missing_string(*args, **kwargs): | |
f = partial(rand_string, *args) | |
return null_missing_v(f, **kwargs) | |
def rand_date(): | |
r = random.randrange | |
return f"{r(1900, 2020):04}-{r(1, 12):02}-{r(1, 28):02}T{r(0, 24):02}:{r(0, 60):02}:{r(0, 60):02}Z" | |
def remove_missing(d): | |
if isinstance(d, dict): | |
return {k: remove_missing(v) for k, v in d.items() if v is not MISSING} | |
elif isinstance(d, list): | |
return [remove_missing(d_) for d_ in d] | |
else: | |
return d | |
def generate_case(): | |
return remove_missing( | |
dict( | |
id=random.randrange(1, 2000), | |
client_name=rand_string(10, 280), | |
sort_index=random.random() * 200, | |
client_phone=null_missing_string(5, 15), | |
location=dict( | |
latitude=random.random() * 180 - 90, | |
longitude=random.random() * 180, | |
), | |
contractor=random.randrange(-100, 2000), | |
upstream_http_referrer=null_missing_string(10, 1050), | |
grecaptcha_response=null_missing_string( | |
10, 1050, null_chance=0.05, missing_chance=0.05 | |
), | |
last_updated=rand_date(), | |
skills=[ | |
dict( | |
subject=rand_string(5, 20), | |
subject_id=i, | |
category=rand_string(5, 20), | |
qual_level=rand_string(5, 20), | |
qual_level_id=random.randrange(2000), | |
qual_level_ranking=random.random() * 20, | |
) | |
for i in range(random.randrange(1, 5)) | |
], | |
) | |
) | |
class BenchPydantic: | |
name = "pydantic + orjson" | |
def __init__(self): | |
class Location(pydantic.BaseModel): | |
latitude: float | |
longitude: float | |
class Skill(pydantic.BaseModel): | |
subject: str | |
subject_id: int | |
category: str | |
qual_level: str | |
qual_level_id: int | |
qual_level_ranking: float = 0 | |
class Client(pydantic.BaseModel): | |
id: int | |
client_name: str | |
sort_index: float | |
client_phone: Optional[str] = None | |
location: Optional[Location] = None | |
contractor: Optional[int] = None | |
upstream_http_referrer: Optional[str] = None | |
grecaptcha_response: Optional[str] = None | |
last_updated: Optional[datetime] = None | |
skills: List[Skill] = [] | |
class Model(pydantic.BaseModel): | |
clients: List[Client] | |
self.model = Model | |
def decode(self, msg): | |
raw = orjson.loads(msg) | |
return self.model.parse_obj(raw) | |
def encode(self, obj): | |
raw = obj.dict() | |
return orjson.dumps(raw) | |
class BenchMsgspec: | |
name = "msgspec" | |
def __init__(self): | |
class Location(msgspec.Struct): | |
latitude: float | |
longitude: float | |
class Skill(msgspec.Struct): | |
subject: str | |
subject_id: int | |
category: str | |
qual_level: str | |
qual_level_id: int | |
qual_level_ranking: float = 0 | |
class Client(msgspec.Struct): | |
id: int | |
client_name: str | |
sort_index: float | |
client_phone: Optional[str] = None | |
location: Optional[Location] = None | |
contractor: Optional[int] = None | |
upstream_http_referrer: Optional[str] = None | |
grecaptcha_response: Optional[str] = None | |
last_updated: Optional[datetime] = None | |
skills: List[Skill] = [] | |
class Model(msgspec.Struct): | |
clients: List[Client] | |
self.encoder = msgspec.json.Encoder() | |
self.decoder = msgspec.json.Decoder(Model) | |
def decode(self, msg): | |
return self.decoder.decode(msg) | |
def encode(self, obj): | |
return self.encoder.encode(obj) | |
def main(n): | |
# Estimate a good number of rounds | |
rounds = min(1000, 10000 // n) | |
print(f"Benchmarking JSON encoding/decoding performance ({n} objects)") | |
data = orjson.dumps({"clients": [generate_case() for _ in range(n)]}) | |
for bench in [BenchPydantic(), BenchMsgspec()]: | |
print(f"* {bench.name}:") | |
start = time.perf_counter() | |
for _ in range(rounds): | |
bench.decode(data) | |
dec_time = (time.perf_counter() - start) / rounds | |
print(f" - Decoding: {(dec_time * 1e3):.3f} ms") | |
obj = bench.decode(data) | |
start = time.perf_counter() | |
for _ in range(rounds): | |
bench.encode(obj) | |
enc_time = (time.perf_counter() - start) / rounds | |
print(f" - Encoding: {(enc_time * 1e3):.3f} ms") | |
total = enc_time + dec_time | |
print(f" - Total: {(total * 1e3):.3f} ms") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser("bench-pydantic", description=__doc__) | |
parser.add_argument("-n", type=int, default=1000, help="How many objects to bench") | |
args = parser.parse_args() | |
main(args.n) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results on my machine: