Last active
November 28, 2025 12:30
-
-
Save jcrist/d62f450594164d284fbea957fd48b743 to your computer and use it in GitHub Desktop.
A quick benchmark comparing msgspec (https://github.com/jcrist/msgspec), pydantic v1, and pydantic v2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """A quick benchmark comparing the performance of: | |
| - msgspec: https://github.com/jcrist/msgspec | |
| - pydantic V1: https://docs.pydantic.dev/1.10/ | |
| - pydantic V2: https://docs.pydantic.dev/dev-v2/ | |
| The benchmark is modified from the one in the msgspec repo here: | |
| https://github.com/jcrist/msgspec/blob/main/benchmarks/bench_validation.py | |
| I make no claims that it's illustrative of all use cases. I wrote this up | |
| mostly to get an understanding of how msgspec's performance compares with that | |
| of pydantic V2. | |
| """ | |
| from __future__ import annotations | |
| import datetime | |
| import random | |
| import string | |
| import timeit | |
| import uuid | |
| from typing import List, Literal, Union, Annotated | |
| import msgspec | |
| import pydantic | |
| import pydantic.v1 | |
| def make_filesystem_data(capacity): | |
| """Generate a tree structure representing a fake filesystem""" | |
| UTC = datetime.timezone.utc | |
| DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC) | |
| DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC) | |
| UUIDS = [str(uuid.uuid4()) for _ in range(30)] | |
| rand = random.Random(42) | |
| def randdt(min, max): | |
| ts = rand.randint(min.timestamp(), max.timestamp()) | |
| return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC) | |
| def randstr(min=None, max=None): | |
| if max is not None: | |
| min = rand.randint(min, max) | |
| return "".join(rand.choices(string.ascii_letters, k=min)) | |
| def make_node(is_dir): | |
| nonlocal capacity | |
| name = randstr(4, 30) | |
| created_by = rand.choice(UUIDS) | |
| created_at = randdt(DATE_2018, DATE_2023) | |
| updated_at = randdt(created_at, DATE_2023) | |
| data = { | |
| "type": "directory" if is_dir else "file", | |
| "name": name, | |
| "created_by": created_by, | |
| "created_at": created_at.isoformat(), | |
| "updated_at": updated_at.isoformat(), | |
| } | |
| if is_dir: | |
| n = min(rand.randint(0, 30), capacity) | |
| capacity -= n | |
| data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)] | |
| else: | |
| data["nbytes"] = rand.randint(0, 1000000) | |
| return data | |
| capacity -= 1 | |
| out = make_node(True) | |
| while capacity: | |
| capacity -= 1 | |
| out["contents"].append(make_node(rand.random() > 0.9)) | |
| return out | |
| def bench(raw_data, dumps, loads, convert): | |
| msg = convert(raw_data) | |
| json_data = dumps(msg) | |
| msg2 = loads(json_data) | |
| assert msg == msg2 | |
| del msg2 | |
| timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg}) | |
| n, t = timer.autorange() | |
| dumps_time = t / n | |
| timer = timeit.Timer( | |
| "func(data)", setup="", globals={"func": loads, "data": json_data} | |
| ) | |
| n, t = timer.autorange() | |
| loads_time = t / n | |
| return dumps_time, loads_time | |
| ############################################################################# | |
| # msgspec # | |
| ############################################################################# | |
| class File(msgspec.Struct, tag="file"): | |
| name: Annotated[str, msgspec.Meta(min_length=1)] | |
| created_by: uuid.UUID | |
| created_at: datetime.datetime | |
| updated_at: datetime.datetime | |
| nbytes: Annotated[int, msgspec.Meta(ge=0)] | |
| class Directory(msgspec.Struct, tag="directory"): | |
| name: Annotated[str, msgspec.Meta(min_length=1)] | |
| created_by: uuid.UUID | |
| created_at: datetime.datetime | |
| updated_at: datetime.datetime | |
| contents: List[Union[File, Directory]] | |
| def bench_msgspec(data): | |
| enc = msgspec.json.Encoder() | |
| dec = msgspec.json.Decoder(Directory) | |
| def convert(data): | |
| return msgspec.convert(data, Directory) | |
| return bench(data, enc.encode, dec.decode, convert) | |
| ############################################################################# | |
| # pydantic V2 # | |
| ############################################################################# | |
| class FileModel(pydantic.BaseModel): | |
| type: Literal["file"] = "file" | |
| name: str = pydantic.Field(min_length=1) | |
| created_by: uuid.UUID | |
| created_at: datetime.datetime | |
| updated_at: datetime.datetime | |
| nbytes: pydantic.NonNegativeInt | |
| class DirectoryModel(pydantic.BaseModel): | |
| type: Literal["directory"] = "directory" | |
| name: str = pydantic.Field(min_length=1) | |
| created_by: uuid.UUID | |
| created_at: datetime.datetime | |
| updated_at: datetime.datetime | |
| contents: List[Union[DirectoryModel, FileModel]] | |
| def bench_pydantic_v2(data): | |
| return bench( | |
| data, | |
| lambda p: p.model_dump_json(), | |
| DirectoryModel.model_validate_json, | |
| lambda data: DirectoryModel(**data), | |
| ) | |
| ############################################################################# | |
| # pydantic V1 # | |
| ############################################################################# | |
| class FileModelV1(pydantic.v1.BaseModel): | |
| type: Literal["file"] = "file" | |
| name: str = pydantic.v1.Field(min_length=1) | |
| created_by: uuid.UUID | |
| created_at: datetime.datetime | |
| updated_at: datetime.datetime | |
| nbytes: pydantic.v1.NonNegativeInt | |
| class DirectoryModelV1(pydantic.v1.BaseModel): | |
| type: Literal["directory"] = "directory" | |
| name: str = pydantic.v1.Field(min_length=1) | |
| created_by: uuid.UUID | |
| created_at: datetime.datetime | |
| updated_at: datetime.datetime | |
| contents: List[Union[DirectoryModelV1, FileModelV1]] | |
| def bench_pydantic_v1(data): | |
| return bench( | |
| data, | |
| lambda p: p.json(), | |
| DirectoryModelV1.parse_raw, | |
| lambda data: DirectoryModelV1(**data), | |
| ) | |
| if __name__ == "__main__": | |
| N = 1000 | |
| data = make_filesystem_data(N) | |
| ms_dumps, ms_loads = bench_msgspec(data) | |
| ms_total = ms_dumps + ms_loads | |
| title = f"msgspec {msgspec.__version__}" | |
| print(title) | |
| print("-" * len(title)) | |
| print(f"dumps: {ms_dumps * 1e6:.1f} us") | |
| print(f"loads: {ms_loads * 1e6:.1f} us") | |
| print(f"total: {ms_total * 1e6:.1f} us") | |
| for title, func in [ | |
| (f"pydantic {pydantic.__version__}", bench_pydantic_v2), | |
| (f"pydantic {pydantic.v1.__version__}", bench_pydantic_v1) | |
| ]: | |
| print() | |
| print(title) | |
| print("-" * len(title)) | |
| dumps, loads = func(data) | |
| total = dumps + loads | |
| print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)") | |
| print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)") | |
| print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)") |
Building on top of the previous comment:
msgspec 0.20.0
--------------
dumps: 190.1 us
loads: 551.2 us
total: 741.3 us
pydantic 2.12.5
---------------
dumps: 2509.5 us (13.2x slower)
loads: 8589.7 us (15.6x slower)
total: 11099.3 us (15.0x slower)
pydantic 1.10.21
----------------
dumps: 14967.6 us (78.7x slower)
loads: 64642.1 us (117.3x slower)
total: 79609.6 us (107.4x slower)
duct tapeed py dataclass
------------------------
dumps: 28349.2 us (149.1x slower)
loads: 3176.0 us (5.8x slower)
total: 31525.3 us (42.5x slower)"""A quick benchmark comparing the performance of:
- msgspec: https://github.com/jcrist/msgspec
- pydantic V1: https://docs.pydantic.dev/1.10/
- pydantic V2: https://docs.pydantic.dev/dev-v2/
- python dataclasses: https://docs.python.org/3.14/library/dataclasses.html
The benchmark is modified from the one in the msgspec repo here:
https://github.com/jcrist/msgspec/blob/main/benchmarks/bench_validation.py
I make no claims that it's illustrative of all use cases. I wrote this up
mostly to get an understanding of how msgspec's performance compares with that
of pydantic V2.
"""
import datetime
import json
import random
import string
import timeit
import uuid
from dataclasses import dataclass, field, asdict
from typing import List, Literal, Union, Annotated
import msgspec
import pydantic
import pydantic.v1
def make_filesystem_data(capacity):
"""Generate a tree structure representing a fake filesystem"""
UTC = datetime.timezone.utc
DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
UUIDS = [str(uuid.uuid4()) for _ in range(30)]
rand = random.Random(42)
def randdt(min, max):
ts = rand.randint(int(min.timestamp()), int(max.timestamp()))
return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC)
def randstr(min=None, max=None):
if max is not None:
min = rand.randint(min, max)
return "".join(rand.choices(string.ascii_letters, k=min))
def make_node(is_dir):
nonlocal capacity
name = randstr(4, 30)
created_by = rand.choice(UUIDS)
created_at = randdt(DATE_2018, DATE_2023)
updated_at = randdt(created_at, DATE_2023)
data = {
"type": "directory" if is_dir else "file",
"name": name,
"created_by": created_by,
"created_at": created_at.isoformat(),
"updated_at": updated_at.isoformat(),
}
if is_dir:
n = min(rand.randint(0, 30), capacity)
capacity -= n
data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)]
else:
data["nbytes"] = rand.randint(0, 1000000)
return data
capacity -= 1
out = make_node(True)
while capacity:
capacity -= 1
out["contents"].append(make_node(rand.random() > 0.9))
return out
def bench(raw_data, dumps, loads, convert):
msg = convert(raw_data)
json_data = dumps(msg)
msg2 = loads(json_data)
assert msg == msg2
del msg2
timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg})
n, t = timer.autorange()
dumps_time = t / n
timer = timeit.Timer(
"func(data)", setup="", globals={"func": loads, "data": json_data}
)
n, t = timer.autorange()
loads_time = t / n
return dumps_time, loads_time
#############################################################################
# msgspec #
#############################################################################
class File(msgspec.Struct, tag="file"):
name: Annotated[str, msgspec.Meta(min_length=1)]
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
nbytes: Annotated[int, msgspec.Meta(ge=0)]
class Directory(msgspec.Struct, tag="directory"):
name: Annotated[str, msgspec.Meta(min_length=1)]
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
contents: List[Union[File, "Directory"]]
def bench_msgspec(data):
enc = msgspec.json.Encoder()
dec = msgspec.json.Decoder(Directory)
def convert(data):
return msgspec.convert(data, Directory)
return bench(data, enc.encode, dec.decode, convert)
#############################################################################
# pydantic V2 #
#############################################################################
class FileModel(pydantic.BaseModel):
type: Literal["file"] = "file"
name: str = pydantic.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
nbytes: pydantic.NonNegativeInt
class DirectoryModel(pydantic.BaseModel):
type: Literal["directory"] = "directory"
name: str = pydantic.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
contents: List[Union["DirectoryModel", FileModel]]
# Rebuild the model to resolve forward references
DirectoryModel.model_rebuild()
def bench_pydantic_v2(data):
return bench(
data,
lambda p: p.model_dump_json(),
DirectoryModel.model_validate_json,
lambda data: DirectoryModel(**data),
)
#############################################################################
# pydantic V1 #
#############################################################################
class FileModelV1(pydantic.v1.BaseModel):
type: Literal["file"] = "file"
name: str = pydantic.v1.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
nbytes: pydantic.v1.NonNegativeInt
class DirectoryModelV1(pydantic.v1.BaseModel):
type: Literal["directory"] = "directory"
name: str = pydantic.v1.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
contents: List[Union["DirectoryModelV1", FileModelV1]]
# Update forward references for pydantic V1
DirectoryModelV1.update_forward_refs()
def bench_pydantic_v1(data):
return bench(
data,
lambda p: p.json(),
DirectoryModelV1.parse_raw,
lambda data: DirectoryModelV1(**data),
)
#############################################################################
# Python dataclass #
#############################################################################
@dataclass
class FileDataclass:
name: str = field()
created_by: uuid.UUID = field()
created_at: datetime.datetime = field()
updated_at: datetime.datetime = field()
nbytes: int = field()
type: str = "file"
def __post_init__(self):
if len(self.name) < 1:
raise ValueError("name must have min_length=1")
if self.nbytes < 0:
raise ValueError("nbytes must be >= 0")
@dataclass
class DirectoryDataclass:
name: str = field()
created_by: uuid.UUID = field()
created_at: datetime.datetime = field()
updated_at: datetime.datetime = field()
contents: List[Union["DirectoryDataclass", FileDataclass]] = field(
default_factory=list
)
type: str = "directory"
def __post_init__(self):
if len(self.name) < 1:
raise ValueError("name must have min_length=1")
def _json_serializer(obj):
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime.datetime, datetime.date)):
return obj.isoformat()
elif isinstance(obj, uuid.UUID):
return str(obj)
raise TypeError(f"Type {type(obj)} not serializable")
def _dict_to_dataclass(data):
"""Convert dict to dataclass instances"""
if data["type"] == "file":
return FileDataclass(
type=data["type"],
name=data["name"],
created_by=uuid.UUID(data["created_by"])
if isinstance(data["created_by"], str)
else data["created_by"],
created_at=datetime.datetime.fromisoformat(data["created_at"])
if isinstance(data["created_at"], str)
else data["created_at"],
updated_at=datetime.datetime.fromisoformat(data["updated_at"])
if isinstance(data["updated_at"], str)
else data["updated_at"],
nbytes=data["nbytes"],
)
else: # directory
return DirectoryDataclass(
type=data["type"],
name=data["name"],
created_by=uuid.UUID(data["created_by"])
if isinstance(data["created_by"], str)
else data["created_by"],
created_at=datetime.datetime.fromisoformat(data["created_at"])
if isinstance(data["created_at"], str)
else data["created_at"],
updated_at=datetime.datetime.fromisoformat(data["updated_at"])
if isinstance(data["updated_at"], str)
else data["updated_at"],
contents=[_dict_to_dataclass(item) for item in data["contents"]],
)
def bench_dataclass(data):
def convert(data):
return _dict_to_dataclass(data)
def dumps(obj):
return json.dumps(asdict(obj), default=_json_serializer)
def loads(json_data):
return _dict_to_dataclass(json.loads(json_data))
return bench(data, dumps, loads, convert)
if __name__ == "__main__":
N = 1000
data = make_filesystem_data(N)
ms_dumps, ms_loads = bench_msgspec(data)
ms_total = ms_dumps + ms_loads
title = f"msgspec {msgspec.__version__}"
print(title)
print("-" * len(title))
print(f"dumps: {ms_dumps * 1e6:.1f} us")
print(f"loads: {ms_loads * 1e6:.1f} us")
print(f"total: {ms_total * 1e6:.1f} us")
for title, func in [
(f"pydantic {pydantic.__version__}", bench_pydantic_v2),
(f"pydantic {pydantic.v1.__version__}", bench_pydantic_v1),
("duct tapeed py dataclass", bench_dataclass),
]:
print()
print(title)
print("-" * len(title))
dumps, loads = func(data)
total = dumps + loads
print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)")
print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)")
print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I get an error, maybe