Created
November 25, 2024 19:49
-
-
Save kurtbrose/8a40c2f430e87c4af1f67dac05db43ad to your computer and use it in GitHub Desktop.
Globally unique public ids in sqlalchemy + postgres
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import enum | |
from datetime import datetime | |
from sqlalchemy import Column | |
from sqlalchemy import DateTime | |
from sqlalchemy import Enum | |
from sqlalchemy import event | |
from sqlalchemy import func | |
from sqlalchemy import select | |
from sqlalchemy import String | |
from sqlalchemy.dialects.postgresql import array | |
from sqlalchemy.orm import Mapped | |
from somewhere import get_public_id | |
class PublicIDType(enum.Enum): | |
# one entry per table that has a public id... | |
class PublicID(base.BaseModel): | |
"""A globally unique public id.""" | |
__tablename__ = "public_ids" | |
public_id: Mapped[str] = Column(String, primary_key=True) | |
public_id_type: Mapped[PublicIDType] = Column(Enum(PublicIDType), nullable=False) | |
created: Mapped[datetime] = Column(DateTime, server_default=func.now()) | |
@classmethod | |
def create(cls, session, public_id_type: PublicIDType, public_id: str | None = None) -> "PublicID": | |
self = cls(public_id=public_id, public_id_type=public_id_type) | |
session.add(self) | |
session.flush([self]) | |
return self | |
@event.listens_for(PublicID, "before_insert") | |
def _before_insert(_mapper, _connection, target): | |
"""Inject an automatic public-id-generating sub-select when a new PublicID row is created if one isn't specfied.""" | |
if target.public_id is not None: | |
return | |
# Generate many candidate public-ids and have postgres pick one that isn't used yet. | |
# As long as at least one candidate is unused, this will succeed. | |
# The odds of them all being used goes down exponentially with the number of candidates. | |
# 200 candidates gives a >99.9999999% (1 - 0.9^200) chance of success, even if 90% of possible ids are taken. | |
# on M2 macbook this takes ~0.390ms, so this is a reasonable overhead | |
candidates = [get_public_id() for _ in range(200)] | |
# convert into an unnset that we can select from | |
cte = select(func.unnest(array(candidates)).label("candidate")).cte("candidates") | |
candidate_col = cte.columns["candidate"] | |
# construct the sub-select for the first unused candidate public id | |
unused = ( | |
select(candidate_col) | |
.where(~select(PublicID.public_id).where(PublicID.public_id == candidate_col).exists()) | |
.limit(1) | |
) | |
# assign this sub-select to the public_id column | |
target.public_id = unused |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment