jcrist/bench.py

samuelcolvin · 2023-06-28T05:35:30Z

Definitely worth either removing pydantic v1, or using the actual package - the pydantic.v1 code is not complied with cython unlike installing pydantic==1.10.9.

jcrist · 2023-06-28T05:46:29Z

Thanks for the feedback Samuel! That's fair, although using the compiled version for V1 seems to have a minimal improvement on this benchmark:

msgspec vs pydantic V1 benchmark (using cython compiled pydantic V1 package)

from __future__ import annotations

import datetime
import random
import string
import timeit
import uuid
from typing import List, Literal, Union, Annotated

import msgspec
import pydantic


def make_filesystem_data(capacity):
    """Generate a tree structure representing a fake filesystem"""
    UTC = datetime.timezone.utc
    DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
    DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
    UUIDS = [str(uuid.uuid4()) for _ in range(30)]

    rand = random.Random(42)

    def randdt(min, max):
        ts = rand.randint(min.timestamp(), max.timestamp())
        return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC)

    def randstr(min=None, max=None):
        if max is not None:
            min = rand.randint(min, max)
        return "".join(rand.choices(string.ascii_letters, k=min))

    def make_node(is_dir):
        nonlocal capacity

        name = randstr(4, 30)
        created_by = rand.choice(UUIDS)
        created_at = randdt(DATE_2018, DATE_2023)
        updated_at = randdt(created_at, DATE_2023)
        data = {
            "type": "directory" if is_dir else "file",
            "name": name,
            "created_by": created_by,
            "created_at": created_at.isoformat(),
            "updated_at": updated_at.isoformat(),
        }
        if is_dir:
            n = min(rand.randint(0, 30), capacity)
            capacity -= n
            data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)]
        else:
            data["nbytes"] = rand.randint(0, 1000000)
        return data

    capacity -= 1
    out = make_node(True)
    while capacity:
        capacity -= 1
        out["contents"].append(make_node(rand.random() > 0.9))
    return out


def bench(raw_data, dumps, loads, convert):
    msg = convert(raw_data)
    json_data = dumps(msg)
    msg2 = loads(json_data)
    assert msg == msg2
    del msg2

    timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg})
    n, t = timer.autorange()
    dumps_time = t / n

    timer = timeit.Timer(
        "func(data)", setup="", globals={"func": loads, "data": json_data}
    )
    n, t = timer.autorange()
    loads_time = t / n
    return dumps_time, loads_time


#############################################################################
#  msgspec                                                                  #
#############################################################################


class File(msgspec.Struct, tag="file"):
    name: Annotated[str, msgspec.Meta(min_length=1)]
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: Annotated[int, msgspec.Meta(ge=0)]


class Directory(msgspec.Struct, tag="directory"):
    name: Annotated[str, msgspec.Meta(min_length=1)]
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union[File, Directory]]


def bench_msgspec(data):
    enc = msgspec.json.Encoder()
    dec = msgspec.json.Decoder(Directory)

    def convert(data):
        return msgspec.convert(data, Directory)

    return bench(data, enc.encode, dec.decode, convert)


#############################################################################
#  pydantic V2                                                              #
#############################################################################


class FileModel(pydantic.BaseModel):
    type: Literal["file"] = "file"
    name: str = pydantic.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: pydantic.NonNegativeInt


class DirectoryModel(pydantic.BaseModel):
    type: Literal["directory"] = "directory"
    name: str = pydantic.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union[DirectoryModel, FileModel]]


def bench_pydantic_v1(data):
    return bench(
        data,
        lambda p: p.json(),
        DirectoryModel.parse_raw,
        lambda data: DirectoryModel(**data),
    )


if __name__ == "__main__":
    N = 1000
    data = make_filesystem_data(N)
    ms_dumps, ms_loads = bench_msgspec(data)
    ms_total = ms_dumps + ms_loads
    title = f"msgspec {msgspec.__version__}"
    print(title)
    print("-" * len(title))
    print(f"dumps: {ms_dumps * 1e6:.1f} us")
    print(f"loads: {ms_loads * 1e6:.1f} us")
    print(f"total: {ms_total * 1e6:.1f} us")

    for title, func in [
        (f"pydantic {pydantic.__version__}", bench_pydantic_v1)
    ]:
        print()
        print(title)
        print("-" * len(title))
        dumps, loads = func(data)
        total = dumps + loads
        print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)")
        print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)")
        print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)")

Output:

msgspec 0.16.0
--------------
dumps: 178.6 us
loads: 497.8 us
total: 676.3 us

pydantic 1.10.9
---------------
dumps: 18206.4 us (102.0x slower)
loads: 55122.1 us (110.7x slower)
total: 73328.5 us (108.4x slower)

Either way, the main point of the benchmark in this gist was to compare pydantic V2 and msgspec, happy to remove v1 if it's a distraction.

samuelcolvin · 2023-06-28T06:20:01Z

Up to you, just making the observation really.

legraphista · 2023-07-05T07:52:10Z

quick update on the numbers as pydantic v2 became stable:

msgspec 0.16.0
--------------
dumps: 179.3 us
loads: 477.0 us
total: 656.3 us

pydantic 2.0.1
--------------
dumps: 4292.0 us (23.9x slower)
loads: 6666.6 us (14.0x slower)
total: 10958.6 us (16.7x slower)

pydantic 1.10.11
----------------
dumps: 24176.3 us (134.8x slower)
loads: 73471.1 us (154.0x slower)
total: 97647.4 us (148.8x slower)

nrbnlulu · 2024-06-18T04:46:14Z

Fix python 3.12
https://gist.github.com/jcrist/d62f450594164d284fbea957fd48b743#file-bench-py-L38
should be

ts = rand.randint(int(min.timestamp()), int(max.timestamp()))

nrbnlulu · 2024-06-18T04:47:46Z

BTW @samuelcolvin You said that

Although msgspec and pydantic have different aims and features

What are the different aims if I may ask?

nrbnlulu · 2024-06-18T08:21:38Z

Leaving here my benchmark results

import json
import timeit
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Iterator, TypedDict

import mimesis
import msgspec
import pydantic
from pydantic.type_adapter import TypeAdapter

provider = mimesis.Generic()
def create_user() -> dict:
    return {
        "id": provider.person.identifier(),
        "username": provider.person.username(),
        "password": provider.person.password(),
        "email": provider.person.email(),
        "blog": provider.internet.url(),
        "first_name": provider.person.name(),
        "last_name": provider.person.last_name(),
        "is_active": provider.development.boolean(),
        "is_staff": provider.development.boolean(),
        "is_superuser": provider.development.boolean(),
        "date_joined": provider.person.birthdate(),
        "last_login": provider.person.birthdate(),
        "friend": create_user() if provider.development.boolean() else None
    }



data = [create_user() for _ in range(100000)]
data_raw = msgspec.json.encode(data)


class MsgSpecUser(msgspec.Struct):
    id: str
    username: str
    password: str
    email: str
    blog: str
    first_name: str
    last_name: str
    is_active: bool
    is_staff: bool
    is_superuser: bool
    date_joined: str
    last_login: str
    friend: "MsgSpecUser | None"

class PydanticUser(pydantic.BaseModel):
    id: str
    username: str
    password: str
    email: str
    blog: str
    first_name: str
    last_name: str
    is_active: bool
    is_staff: bool
    is_superuser: bool
    date_joined: str
    last_login: str
    friend: "PydanticUser | None"

@dataclass
class TimeitResult:
    task: str 
    seconds: float | None = None

@contextmanager
def time_it(task: str) -> Iterator[TimeitResult]:
    start = timeit.default_timer()
    res = TimeitResult(task=task)
    yield res
    end = timeit.default_timer()
    print(f"{task} took {end - start:1f} seconds")
    res.seconds = end - start

def match_precentage(pydantic: float, msgspec: float) -> str:
    if pydantic < msgspec:
        return f"Pydantic is faster by %{((msgspec - pydantic) / pydantic) * 100:1f}"
    return f"MsgSpec is faster by %{((pydantic - msgspec) / msgspec) * 100:1f}"

msgspec_decoder = msgspec.json.Decoder(list[MsgSpecUser])

with time_it("msgspec_decode") as msgspec_res:
    msgspec_data = msgspec_decoder.decode(data_raw)

users_ta = TypeAdapter(list[PydanticUser])

with time_it("pydantic_decode") as pydantic_res:
    pydantic_data = users_ta.validate_json(data_raw)

print(f"DECODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}")

# ------------ encode ------------

msgspec_encoder = msgspec.json.Encoder()


with time_it("msgspec_encode") as msgspec_res:
    msgspec_data_raw = msgspec_encoder.encode(msgspec_data)

with time_it("pydantic_encode") as pydantic_res:
    pydantic_data_raw = users_ta.dump_json(pydantic_data)


print(f"ENCODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}")

msgspec_decode took 0.162186 seconds
pydantic_decode took 1.120969 seconds
DECODE: MsgSpec is faster by %591.163625
msgspec_encode took 0.044265 seconds
pydantic_encode took 0.223537 seconds
ENCODE: MsgSpec is faster by %404.997775

nrbnlulu · 2024-06-18T08:46:08Z

Created another benchmark that uses custom types
https://gist.github.com/nrbnlulu/e983ab23bed5806cff5bb8ba97434d6d

results are quite surprising

msgspec_decode took 0.050580 seconds
pydantic_decode took 0.150948 seconds
DECODE: MsgSpec is faster by %198.433165
msgspec_encode took 0.015060 seconds
pydantic_encode took 0.060530 seconds
ENCODE: MsgSpec is faster by %301.920586

MSDehghan · 2024-10-01T08:54:00Z

Updated results with python 3.12 and latest available versions of pydantic and msgspec:

msgspec 0.18.6
--------------
dumps: 178.8 us
loads: 509.6 us
total: 688.4 us

pydantic 2.9.2
--------------
dumps: 9064.2 us (50.7x slower)
loads: 10563.7 us (20.7x slower)
total: 19627.9 us (28.5x slower)

pydantic 1.10.18
----------------
dumps: 13753.4 us (76.9x slower)
loads: 53922.3 us (105.8x slower)
total: 67675.7 us (98.3x slower)

raceychan · 2025-01-27T20:47:29Z

I tested myself and did not notice a 10x + difference.
from my test, msgspec is about 70% faster than pydantic.

    @dataclass
    class Item:
        product_id: int
        name: str
        quantity: int
        price: float

    @dataclass
    class Order:
        order_id: str
        customer_name: str
        customer_email: str
        items: List[Item]
        shipping_address: str
        payment_status: str
        total_amount: float
        discount: Optional[float] = 0.0

    # Step 2: Example body data in dictionary format
    order_data = {
        "order_id": "ORD12345",
        "customer_name": "Jane Doe",
        "customer_email": "[email protected]",
        "items": [
            {"product_id": 101, "name": "Laptop", "quantity": 1, "price": 1200.00},
            {"product_id": 102, "name": "Mouse", "quantity": 2, "price": 25.50},
            {"product_id": 103, "name": "Keyboard", "quantity": 1, "price": 75.75},
        ],
        "shipping_address": "1234 Elm Street, Springfield, IL",
        "payment_status": "Paid",
        "total_amount": 1400.75,
        "discount": 100.0,  # optional discount
    }

    data = json.dumps(order_data).encode()

    rounds = 10000

    order_adapter = TypeAdapter(Order)
    p1 = perf_counter()

    for _ in range(rounds):
        porder = order_adapter.validate_json(data)
    p2 = perf_counter()

    r1 = round(p2 - p1, 6)

    p1 = perf_counter()

    for _ in range(rounds):
        morder = decode(data, type=Order)
    p2 = perf_counter()

    r2 = round(p2 - p1, 6)

    print(f"pydantic costs {r1} seconds")
    print(f"msgspec costs {r2} seconds")
    print(f"pydantic is {round(r1/r2,3)}x slower")

pydantic costs 0.023508 seconds
msgspec costs 0.013425 seconds
pydantic is 1.751x slower

PYDANTIC_VERSION = '2.10.6'
MSGSPEC_VERSION = '0.19.0'

========== Update =========

When use msgspec.Struct and pydantic.BaseModel to define model instead of dataclass, msgspec is much more performant than
pydantic.

Interestingly, it is even faster to user a TypeAdapter(list[dataclasses.dataclass]) than TypeAdapter(list[pydantic.BaseModel])

TKaluza · 2025-06-07T22:33:40Z

I get an error, maybe

"""A quick benchmark comparing the performance of:

- msgspec: https://github.com/jcrist/msgspec
- pydantic V1: https://docs.pydantic.dev/1.10/
- pydantic V2: https://docs.pydantic.dev/dev-v2/

The benchmark is modified from the one in the msgspec repo here:
https://github.com/jcrist/msgspec/blob/main/benchmarks/bench_validation.py

I make no claims that it's illustrative of all use cases. I wrote this up
mostly to get an understanding of how msgspec's performance compares with that
of pydantic V2.
"""

import datetime
import random
import string
import timeit
import uuid
from typing import List, Literal, Union, Annotated

import msgspec
import pydantic
import pydantic.v1


def make_filesystem_data(capacity):
    """Generate a tree structure representing a fake filesystem"""
    UTC = datetime.timezone.utc
    DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
    DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
    UUIDS = [str(uuid.uuid4()) for _ in range(30)]

    rand = random.Random(42)

    def randdt(min, max):
        ts = rand.randint(int(min.timestamp()), int(max.timestamp()))
        return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC)

    def randstr(min=None, max=None):
        if max is not None:
            min = rand.randint(min, max)
        return "".join(rand.choices(string.ascii_letters, k=min))

    def make_node(is_dir):
        nonlocal capacity

        name = randstr(4, 30)
        created_by = rand.choice(UUIDS)
        created_at = randdt(DATE_2018, DATE_2023)
        updated_at = randdt(created_at, DATE_2023)
        data = {
            "type": "directory" if is_dir else "file",
            "name": name,
            "created_by": created_by,
            "created_at": created_at.isoformat(),
            "updated_at": updated_at.isoformat(),
        }
        if is_dir:
            n = min(rand.randint(0, 30), capacity)
            capacity -= n
            data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)]
        else:
            data["nbytes"] = rand.randint(0, 1000000)
        return data

    capacity -= 1
    out = make_node(True)
    while capacity:
        capacity -= 1
        out["contents"].append(make_node(rand.random() > 0.9))
    return out


def bench(raw_data, dumps, loads, convert):
    msg = convert(raw_data)
    json_data = dumps(msg)
    msg2 = loads(json_data)
    assert msg == msg2
    del msg2

    timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg})
    n, t = timer.autorange()
    dumps_time = t / n

    timer = timeit.Timer(
        "func(data)", setup="", globals={"func": loads, "data": json_data}
    )
    n, t = timer.autorange()
    loads_time = t / n
    return dumps_time, loads_time


#############################################################################
#  msgspec                                                                  #
#############################################################################


class File(msgspec.Struct, tag="file"):
    name: Annotated[str, msgspec.Meta(min_length=1)]
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: Annotated[int, msgspec.Meta(ge=0)]


class Directory(msgspec.Struct, tag="directory"):
    name: Annotated[str, msgspec.Meta(min_length=1)]
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union[File, "Directory"]]


def bench_msgspec(data):
    enc = msgspec.json.Encoder()
    dec = msgspec.json.Decoder(Directory)

    def convert(data):
        return msgspec.convert(data, Directory)

    return bench(data, enc.encode, dec.decode, convert)


#############################################################################
#  pydantic V2                                                              #
#############################################################################


class FileModel(pydantic.BaseModel):
    type: Literal["file"] = "file"
    name: str = pydantic.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: pydantic.NonNegativeInt


class DirectoryModel(pydantic.BaseModel):
    type: Literal["directory"] = "directory"
    name: str = pydantic.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union["DirectoryModel", FileModel]]


# Rebuild the model to resolve forward references
DirectoryModel.model_rebuild()


def bench_pydantic_v2(data):
    return bench(
        data,
        lambda p: p.model_dump_json(),
        DirectoryModel.model_validate_json,
        lambda data: DirectoryModel(**data),
    )

#############################################################################
#  pydantic V1                                                              #
#############################################################################


class FileModelV1(pydantic.v1.BaseModel):
    type: Literal["file"] = "file"
    name: str = pydantic.v1.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: pydantic.v1.NonNegativeInt


class DirectoryModelV1(pydantic.v1.BaseModel):
    type: Literal["directory"] = "directory"
    name: str = pydantic.v1.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union["DirectoryModelV1", FileModelV1]]


# Update forward references for pydantic V1
DirectoryModelV1.update_forward_refs()


def bench_pydantic_v1(data):
    return bench(
        data,
        lambda p: p.json(),
        DirectoryModelV1.parse_raw,
        lambda data: DirectoryModelV1(**data),
    )


if __name__ == "__main__":
    N = 1000
    data = make_filesystem_data(N)
    ms_dumps, ms_loads = bench_msgspec(data)
    ms_total = ms_dumps + ms_loads
    title = f"msgspec {msgspec.__version__}"
    print(title)
    print("-" * len(title))
    print(f"dumps: {ms_dumps * 1e6:.1f} us")
    print(f"loads: {ms_loads * 1e6:.1f} us")
    print(f"total: {ms_total * 1e6:.1f} us")

    for title, func in [
        (f"pydantic {pydantic.__version__}", bench_pydantic_v2),
        (f"pydantic {pydantic.v1.__version__}", bench_pydantic_v1)
    ]:
        print()
        print(title)
        print("-" * len(title))
        dumps, loads = func(data)
        total = dumps + loads
        print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)")
        print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)")
        print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)")

idrissbellil · 2025-11-28T12:30:02Z

Building on top of the previous comment:

msgspec 0.20.0
--------------
dumps: 190.1 us
loads: 551.2 us
total: 741.3 us

pydantic 2.12.5
---------------
dumps: 2509.5 us (13.2x slower)
loads: 8589.7 us (15.6x slower)
total: 11099.3 us (15.0x slower)

pydantic 1.10.21
----------------
dumps: 14967.6 us (78.7x slower)
loads: 64642.1 us (117.3x slower)
total: 79609.6 us (107.4x slower)

duct tapeed py dataclass
------------------------
dumps: 28349.2 us (149.1x slower)
loads: 3176.0 us (5.8x slower)
total: 31525.3 us (42.5x slower)

"""A quick benchmark comparing the performance of:

- msgspec: https://github.com/jcrist/msgspec
- pydantic V1: https://docs.pydantic.dev/1.10/
- pydantic V2: https://docs.pydantic.dev/dev-v2/
- python dataclasses: https://docs.python.org/3.14/library/dataclasses.html

The benchmark is modified from the one in the msgspec repo here:
https://github.com/jcrist/msgspec/blob/main/benchmarks/bench_validation.py

I make no claims that it's illustrative of all use cases. I wrote this up
mostly to get an understanding of how msgspec's performance compares with that
of pydantic V2.
"""

import datetime
import json
import random
import string
import timeit
import uuid
from dataclasses import dataclass, field, asdict
from typing import List, Literal, Union, Annotated

import msgspec
import pydantic
import pydantic.v1


def make_filesystem_data(capacity):
    """Generate a tree structure representing a fake filesystem"""
    UTC = datetime.timezone.utc
    DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
    DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
    UUIDS = [str(uuid.uuid4()) for _ in range(30)]

    rand = random.Random(42)

    def randdt(min, max):
        ts = rand.randint(int(min.timestamp()), int(max.timestamp()))
        return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC)

    def randstr(min=None, max=None):
        if max is not None:
            min = rand.randint(min, max)
        return "".join(rand.choices(string.ascii_letters, k=min))

    def make_node(is_dir):
        nonlocal capacity

        name = randstr(4, 30)
        created_by = rand.choice(UUIDS)
        created_at = randdt(DATE_2018, DATE_2023)
        updated_at = randdt(created_at, DATE_2023)
        data = {
            "type": "directory" if is_dir else "file",
            "name": name,
            "created_by": created_by,
            "created_at": created_at.isoformat(),
            "updated_at": updated_at.isoformat(),
        }
        if is_dir:
            n = min(rand.randint(0, 30), capacity)
            capacity -= n
            data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)]
        else:
            data["nbytes"] = rand.randint(0, 1000000)
        return data

    capacity -= 1
    out = make_node(True)
    while capacity:
        capacity -= 1
        out["contents"].append(make_node(rand.random() > 0.9))
    return out


def bench(raw_data, dumps, loads, convert):
    msg = convert(raw_data)
    json_data = dumps(msg)
    msg2 = loads(json_data)
    assert msg == msg2
    del msg2

    timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg})
    n, t = timer.autorange()
    dumps_time = t / n

    timer = timeit.Timer(
        "func(data)", setup="", globals={"func": loads, "data": json_data}
    )
    n, t = timer.autorange()
    loads_time = t / n
    return dumps_time, loads_time


#############################################################################
#  msgspec                                                                  #
#############################################################################


class File(msgspec.Struct, tag="file"):
    name: Annotated[str, msgspec.Meta(min_length=1)]
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: Annotated[int, msgspec.Meta(ge=0)]


class Directory(msgspec.Struct, tag="directory"):
    name: Annotated[str, msgspec.Meta(min_length=1)]
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union[File, "Directory"]]


def bench_msgspec(data):
    enc = msgspec.json.Encoder()
    dec = msgspec.json.Decoder(Directory)

    def convert(data):
        return msgspec.convert(data, Directory)

    return bench(data, enc.encode, dec.decode, convert)


#############################################################################
#  pydantic V2                                                              #
#############################################################################


class FileModel(pydantic.BaseModel):
    type: Literal["file"] = "file"
    name: str = pydantic.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: pydantic.NonNegativeInt


class DirectoryModel(pydantic.BaseModel):
    type: Literal["directory"] = "directory"
    name: str = pydantic.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union["DirectoryModel", FileModel]]


# Rebuild the model to resolve forward references
DirectoryModel.model_rebuild()


def bench_pydantic_v2(data):
    return bench(
        data,
        lambda p: p.model_dump_json(),
        DirectoryModel.model_validate_json,
        lambda data: DirectoryModel(**data),
    )


#############################################################################
#  pydantic V1                                                              #
#############################################################################


class FileModelV1(pydantic.v1.BaseModel):
    type: Literal["file"] = "file"
    name: str = pydantic.v1.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    nbytes: pydantic.v1.NonNegativeInt


class DirectoryModelV1(pydantic.v1.BaseModel):
    type: Literal["directory"] = "directory"
    name: str = pydantic.v1.Field(min_length=1)
    created_by: uuid.UUID
    created_at: datetime.datetime
    updated_at: datetime.datetime
    contents: List[Union["DirectoryModelV1", FileModelV1]]


# Update forward references for pydantic V1
DirectoryModelV1.update_forward_refs()


def bench_pydantic_v1(data):
    return bench(
        data,
        lambda p: p.json(),
        DirectoryModelV1.parse_raw,
        lambda data: DirectoryModelV1(**data),
    )


#############################################################################
#  Python dataclass                                                         #
#############################################################################


@dataclass
class FileDataclass:
    name: str = field()
    created_by: uuid.UUID = field()
    created_at: datetime.datetime = field()
    updated_at: datetime.datetime = field()
    nbytes: int = field()
    type: str = "file"

    def __post_init__(self):
        if len(self.name) < 1:
            raise ValueError("name must have min_length=1")
        if self.nbytes < 0:
            raise ValueError("nbytes must be >= 0")


@dataclass
class DirectoryDataclass:
    name: str = field()
    created_by: uuid.UUID = field()
    created_at: datetime.datetime = field()
    updated_at: datetime.datetime = field()
    contents: List[Union["DirectoryDataclass", FileDataclass]] = field(
        default_factory=list
    )
    type: str = "directory"

    def __post_init__(self):
        if len(self.name) < 1:
            raise ValueError("name must have min_length=1")


def _json_serializer(obj):
    """JSON serializer for objects not serializable by default json code"""
    if isinstance(obj, (datetime.datetime, datetime.date)):
        return obj.isoformat()
    elif isinstance(obj, uuid.UUID):
        return str(obj)
    raise TypeError(f"Type {type(obj)} not serializable")


def _dict_to_dataclass(data):
    """Convert dict to dataclass instances"""
    if data["type"] == "file":
        return FileDataclass(
            type=data["type"],
            name=data["name"],
            created_by=uuid.UUID(data["created_by"])
            if isinstance(data["created_by"], str)
            else data["created_by"],
            created_at=datetime.datetime.fromisoformat(data["created_at"])
            if isinstance(data["created_at"], str)
            else data["created_at"],
            updated_at=datetime.datetime.fromisoformat(data["updated_at"])
            if isinstance(data["updated_at"], str)
            else data["updated_at"],
            nbytes=data["nbytes"],
        )
    else:  # directory
        return DirectoryDataclass(
            type=data["type"],
            name=data["name"],
            created_by=uuid.UUID(data["created_by"])
            if isinstance(data["created_by"], str)
            else data["created_by"],
            created_at=datetime.datetime.fromisoformat(data["created_at"])
            if isinstance(data["created_at"], str)
            else data["created_at"],
            updated_at=datetime.datetime.fromisoformat(data["updated_at"])
            if isinstance(data["updated_at"], str)
            else data["updated_at"],
            contents=[_dict_to_dataclass(item) for item in data["contents"]],
        )


def bench_dataclass(data):
    def convert(data):
        return _dict_to_dataclass(data)

    def dumps(obj):
        return json.dumps(asdict(obj), default=_json_serializer)

    def loads(json_data):
        return _dict_to_dataclass(json.loads(json_data))

    return bench(data, dumps, loads, convert)


if __name__ == "__main__":
    N = 1000
    data = make_filesystem_data(N)
    ms_dumps, ms_loads = bench_msgspec(data)
    ms_total = ms_dumps + ms_loads
    title = f"msgspec {msgspec.__version__}"
    print(title)
    print("-" * len(title))
    print(f"dumps: {ms_dumps * 1e6:.1f} us")
    print(f"loads: {ms_loads * 1e6:.1f} us")
    print(f"total: {ms_total * 1e6:.1f} us")

    for title, func in [
        (f"pydantic {pydantic.__version__}", bench_pydantic_v2),
        (f"pydantic {pydantic.v1.__version__}", bench_pydantic_v1),
        ("duct tapeed py dataclass", bench_dataclass),
    ]:
        print()
        print(title)
        print("-" * len(title))
        dumps, loads = func(data)
        total = dumps + loads
        print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)")
        print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)")
        print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)")

jcrist/bench.py

Select an option

No results found

Select an option

No results found

samuelcolvin commented Jun 28, 2023

Uh oh!

jcrist commented Jun 28, 2023

Uh oh!

samuelcolvin commented Jun 28, 2023

Uh oh!

legraphista commented Jul 5, 2023

Uh oh!

nrbnlulu commented Jun 18, 2024

Uh oh!

nrbnlulu commented Jun 18, 2024

Uh oh!

nrbnlulu commented Jun 18, 2024

Uh oh!

nrbnlulu commented Jun 18, 2024 •

edited

Loading

Uh oh!

MSDehghan commented Oct 1, 2024

Uh oh!

raceychan commented Jan 27, 2025 •

edited

Loading

Uh oh!

TKaluza commented Jun 7, 2025

Uh oh!

idrissbellil commented Nov 28, 2025

Uh oh!

	"""A quick benchmark comparing the performance of:

	- msgspec: https://github.com/jcrist/msgspec
	- pydantic V1: https://docs.pydantic.dev/1.10/
	- pydantic V2: https://docs.pydantic.dev/dev-v2/

	The benchmark is modified from the one in the msgspec repo here:
	https://github.com/jcrist/msgspec/blob/main/benchmarks/bench_validation.py

	I make no claims that it's illustrative of all use cases. I wrote this up
	mostly to get an understanding of how msgspec's performance compares with that
	of pydantic V2.
	"""
	from __future__ import annotations

	import datetime
	import random
	import string
	import timeit
	import uuid
	from typing import List, Literal, Union, Annotated

	import msgspec
	import pydantic
	import pydantic.v1


	def make_filesystem_data(capacity):
	"""Generate a tree structure representing a fake filesystem"""
	UTC = datetime.timezone.utc
	DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
	DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
	UUIDS = [str(uuid.uuid4()) for _ in range(30)]

	rand = random.Random(42)

	def randdt(min, max):
	ts = rand.randint(min.timestamp(), max.timestamp())
	return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC)

	def randstr(min=None, max=None):
	if max is not None:
	min = rand.randint(min, max)
	return "".join(rand.choices(string.ascii_letters, k=min))

	def make_node(is_dir):
	nonlocal capacity

	name = randstr(4, 30)
	created_by = rand.choice(UUIDS)
	created_at = randdt(DATE_2018, DATE_2023)
	updated_at = randdt(created_at, DATE_2023)
	data = {
	"type": "directory" if is_dir else "file",
	"name": name,
	"created_by": created_by,
	"created_at": created_at.isoformat(),
	"updated_at": updated_at.isoformat(),
	}
	if is_dir:
	n = min(rand.randint(0, 30), capacity)
	capacity -= n
	data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)]
	else:
	data["nbytes"] = rand.randint(0, 1000000)
	return data

	capacity -= 1
	out = make_node(True)
	while capacity:
	capacity -= 1
	out["contents"].append(make_node(rand.random() > 0.9))
	return out


	def bench(raw_data, dumps, loads, convert):
	msg = convert(raw_data)
	json_data = dumps(msg)
	msg2 = loads(json_data)
	assert msg == msg2
	del msg2

	timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg})
	n, t = timer.autorange()
	dumps_time = t / n

	timer = timeit.Timer(
	"func(data)", setup="", globals={"func": loads, "data": json_data}
	)
	n, t = timer.autorange()
	loads_time = t / n
	return dumps_time, loads_time


	#############################################################################
	# msgspec #
	#############################################################################


	class File(msgspec.Struct, tag="file"):
	name: Annotated[str, msgspec.Meta(min_length=1)]
	created_by: uuid.UUID
	created_at: datetime.datetime
	updated_at: datetime.datetime
	nbytes: Annotated[int, msgspec.Meta(ge=0)]


	class Directory(msgspec.Struct, tag="directory"):
	name: Annotated[str, msgspec.Meta(min_length=1)]
	created_by: uuid.UUID
	created_at: datetime.datetime
	updated_at: datetime.datetime
	contents: List[Union[File, Directory]]


	def bench_msgspec(data):
	enc = msgspec.json.Encoder()
	dec = msgspec.json.Decoder(Directory)

	def convert(data):
	return msgspec.convert(data, Directory)

	return bench(data, enc.encode, dec.decode, convert)


	#############################################################################
	# pydantic V2 #
	#############################################################################


	class FileModel(pydantic.BaseModel):
	type: Literal["file"] = "file"
	name: str = pydantic.Field(min_length=1)
	created_by: uuid.UUID
	created_at: datetime.datetime
	updated_at: datetime.datetime
	nbytes: pydantic.NonNegativeInt


	class DirectoryModel(pydantic.BaseModel):
	type: Literal["directory"] = "directory"
	name: str = pydantic.Field(min_length=1)
	created_by: uuid.UUID
	created_at: datetime.datetime
	updated_at: datetime.datetime
	contents: List[Union[DirectoryModel, FileModel]]


	def bench_pydantic_v2(data):
	return bench(
	data,
	lambda p: p.model_dump_json(),
	DirectoryModel.model_validate_json,
	lambda data: DirectoryModel(**data),
	)

	#############################################################################
	# pydantic V1 #
	#############################################################################


	class FileModelV1(pydantic.v1.BaseModel):
	type: Literal["file"] = "file"
	name: str = pydantic.v1.Field(min_length=1)
	created_by: uuid.UUID
	created_at: datetime.datetime
	updated_at: datetime.datetime
	nbytes: pydantic.v1.NonNegativeInt


	class DirectoryModelV1(pydantic.v1.BaseModel):
	type: Literal["directory"] = "directory"
	name: str = pydantic.v1.Field(min_length=1)
	created_by: uuid.UUID
	created_at: datetime.datetime
	updated_at: datetime.datetime
	contents: List[Union[DirectoryModelV1, FileModelV1]]


	def bench_pydantic_v1(data):
	return bench(
	data,
	lambda p: p.json(),
	DirectoryModelV1.parse_raw,
	lambda data: DirectoryModelV1(**data),
	)


	if __name__ == "__main__":
	N = 1000
	data = make_filesystem_data(N)
	ms_dumps, ms_loads = bench_msgspec(data)
	ms_total = ms_dumps + ms_loads
	title = f"msgspec {msgspec.__version__}"
	print(title)
	print("-" * len(title))
	print(f"dumps: {ms_dumps * 1e6:.1f} us")
	print(f"loads: {ms_loads * 1e6:.1f} us")
	print(f"total: {ms_total * 1e6:.1f} us")

	for title, func in [
	(f"pydantic {pydantic.__version__}", bench_pydantic_v2),
	(f"pydantic {pydantic.v1.__version__}", bench_pydantic_v1)
	]:
	print()
	print(title)
	print("-" * len(title))
	dumps, loads = func(data)
	total = dumps + loads
	print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)")
	print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)")
	print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)")

jcrist/bench.py

samuelcolvin commented Jun 28, 2023

Uh oh!

jcrist commented Jun 28, 2023

Uh oh!

samuelcolvin commented Jun 28, 2023

Uh oh!

legraphista commented Jul 5, 2023

Uh oh!

nrbnlulu commented Jun 18, 2024

Uh oh!

nrbnlulu commented Jun 18, 2024

Uh oh!

nrbnlulu commented Jun 18, 2024

Uh oh!

nrbnlulu commented Jun 18, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

MSDehghan commented Oct 1, 2024

Uh oh!

raceychan commented Jan 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

========== Update =========

Uh oh!

TKaluza commented Jun 7, 2025

Uh oh!

idrissbellil commented Nov 28, 2025

Uh oh!

nrbnlulu commented Jun 18, 2024 •

edited

Loading

raceychan commented Jan 27, 2025 •

edited

Loading