Skip to content

Instantly share code, notes, and snippets.

@pdmtt
Forked from imankulov/sqlalchemy_with_pydantic.py
Last active June 23, 2025 22:38
Show Gist options
  • Save pdmtt/a6dc62f051c5597a8cdeeb8271c1e079 to your computer and use it in GitHub Desktop.
Save pdmtt/a6dc62f051c5597a8cdeeb8271c1e079 to your computer and use it in GitHub Desktop.
Using pydantic models as SQLAlchemy JSON fields (convert beween JSON and pydantic.BaseModel subclasses) with modern syntax
import datetime
from typing import TYPE_CHECKING, Any, Optional, final
import sqlalchemy as sa
from pydantic import BaseModel, Field
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, declarative_base, mapped_column, sessionmaker
from typing_extensions import override
if TYPE_CHECKING:
from typing import Any
from sqlalchemy import Dialect
from sqlalchemy.sql.type_api import TypeEngine
# --------------------------------------------------------------------------------------
# Define pydantic-alchemy specific types (once per application)
# --------------------------------------------------------------------------------------
@final
class PydanticType(sa.types.TypeDecorator["BaseModel"]):
"""Pydantic type.
SAVING:
- Uses SQLAlchemy JSON type under the hood.
- Acceps the pydantic model and converts it to a dict on save.
- SQLAlchemy engine JSON-encodes the dict to a string.
RETRIEVING:
- Pulls the string from the database.
- SQLAlchemy engine JSON-decodes the string to a dict.
- Uses the dict to create a pydantic model.
"""
# If you intend to use this class with one dialect only,
# you could pick a type from the specific dialect for
# simplicity sake.
#
# E.g., if you work with PostgreSQL, you can consider using
# sqlalchemy.dialects.postgresql.JSONB instead of a
# generic sa.types.JSON
# Ref: https://www.postgresql.org/docs/13/datatype-json.html
#
# Otherwise, you should implement the `load_dialect_impl`
# method to handle different dialects. In this case, the
# impl variable can reference TypeEngine as a placeholder.
impl = sa.types.JSON
def __init__(self, pydantic_type: type["BaseModel"]) -> None:
super().__init__()
self.pydantic_type = pydantic_type
@override
def load_dialect_impl(self, dialect: "Dialect") -> "TypeEngine[JSONB | sa.JSON]":
# You should implement this method to handle different dialects
# if you intend to use this class with more than one.
# E.g., use JSONB for PostgreSQL and the generic JSON type for
# other databases.
if dialect.name == "postgresql":
return dialect.type_descriptor(JSONB())
else:
return dialect.type_descriptor(sa.JSON())
@override
def process_bind_param(
self,
value: "BaseModel | None",
dialect: "Dialect",
) -> "dict[str, Any] | None":
if value is None:
return None
if not isinstance(value, BaseModel): # dynamic typing.
raise TypeError(f'The value "{value!r}" is not a pydantic model')
# Method .dict() is deprecated as of pydantic V2.
#
# You should think before setting exclude_unset to True. This can reduce the size
# of the dump, but unset values might mean something depending on your context.
#
# Setting mode to "json" entails that you won't need to define a custom json
# serializer ahead.
return value.model_dump(mode="json", exclude_unset=True)
@override
def process_result_value(
self,
value: "dict[str, Any] | None",
dialect: "Dialect",
) -> "BaseModel | None":
# `parse_obj_as` is deprecated as of pydantic V2.
# We're assuming that the value will be a dictionary here.
return self.pydantic_type(**value) if value else None
# --------------------------------------------------------------------------------------
# Configure SQLAlchemy engine, session and declarative base (once per application)
# --------------------------------------------------------------------------------------
engine = sa.create_engine("sqlite:///:memory:")
Session = sessionmaker(bind=engine, expire_on_commit=False)
Base = declarative_base()
# --------------------------------------------------------------------------------------
# Define your Pydantic and SQLAlchemy models (as many as needed)
# --------------------------------------------------------------------------------------
class UserSettings(BaseModel):
notify_at: datetime.datetime = Field(default_factory=datetime.datetime.now)
@final
class User(Base):
__tablename__ = "users"
# Using Annotated Declarative Table.
# docs.sqlalchemy.org/en/20/orm/declarative_tables.html#using-annotated-declarative-table-type-annotated-forms-for-mapped-column
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(doc="User name", comment="User name")
settings: Mapped[Optional[UserSettings]] = mapped_column(
PydanticType(UserSettings), nullable=True
)
# --------------------------------------------------------------------------------------
# Create tables (once per application)
# --------------------------------------------------------------------------------------
Base.metadata.create_all(engine)
# --------------------------------------------------------------------------------------
# Usage example (we use 2.0 querying style with selects)
# Ref: https://docs.sqlalchemy.org/en/20/orm/session_basics.htm
# --------------------------------------------------------------------------------------
session = Session()
user = User(name="user", settings=UserSettings())
session.add(user)
session.commit()
same_user = session.execute(sa.select(User)).scalars().first()
@AdamBryantLaunchWindow
Copy link

What is the purpose of optionally picking between JSON and JSONB here:
impl = sa.types.JSON

If the override block forces the impl depending on the caller anyway?

    @override
    def load_dialect_impl(self, dialect: "Dialect") -> "TypeEngine[JSONB | sa.JSON]":
        # Use JSONB for PostgreSQL and JSON for other databases.
        if dialect.name == "postgresql":
            return dialect.type_descriptor(JSONB())
        else:
            return dialect.type_descriptor(sa.JSON())

Am I misinterpreting?

@pdmtt
Copy link
Author

pdmtt commented May 5, 2025

What is the purpose of optionally picking between JSON and JSONB here: impl = sa.types.JSON

If the override block forces the impl depending on the caller anyway?

    @override
    def load_dialect_impl(self, dialect: "Dialect") -> "TypeEngine[JSONB | sa.JSON]":
        # Use JSONB for PostgreSQL and JSON for other databases.
        if dialect.name == "postgresql":
            return dialect.type_descriptor(JSONB())
        else:
            return dialect.type_descriptor(sa.JSON())

Am I misinterpreting?

No, you aren't misinterpreting.

Method load_dialect_impl is useful only if you intend to use the class with more than one dialect. If you'll be using it with only one dialect, you could pick a type from the specific dialect for simplicity sake as the value of impl.

Anyway, impl is a required attribute, so it should always be defined. If you're defining the load_dialect_impl method, "the impl variable can reference TypeEngine as a placeholder", as stated by the docs.

I'll edit the code to improve clarity.

Thanks for the feedback!

@AdamBryantLaunchWindow
Copy link

Thank you very much for the clarification! :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment