Skip to content

Instantly share code, notes, and snippets.

@kurtbrose
Created November 25, 2024 19:49
Show Gist options
  • Save kurtbrose/8a40c2f430e87c4af1f67dac05db43ad to your computer and use it in GitHub Desktop.
Save kurtbrose/8a40c2f430e87c4af1f67dac05db43ad to your computer and use it in GitHub Desktop.
Globally unique public ids in sqlalchemy + postgres
import enum
from datetime import datetime
from sqlalchemy import Column
from sqlalchemy import DateTime
from sqlalchemy import Enum
from sqlalchemy import event
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy import String
from sqlalchemy.dialects.postgresql import array
from sqlalchemy.orm import Mapped
from somewhere import get_public_id
class PublicIDType(enum.Enum):
# one entry per table that has a public id...
class PublicID(base.BaseModel):
"""A globally unique public id."""
__tablename__ = "public_ids"
public_id: Mapped[str] = Column(String, primary_key=True)
public_id_type: Mapped[PublicIDType] = Column(Enum(PublicIDType), nullable=False)
created: Mapped[datetime] = Column(DateTime, server_default=func.now())
@classmethod
def create(cls, session, public_id_type: PublicIDType, public_id: str | None = None) -> "PublicID":
self = cls(public_id=public_id, public_id_type=public_id_type)
session.add(self)
session.flush([self])
return self
@event.listens_for(PublicID, "before_insert")
def _before_insert(_mapper, _connection, target):
"""Inject an automatic public-id-generating sub-select when a new PublicID row is created if one isn't specfied."""
if target.public_id is not None:
return
# Generate many candidate public-ids and have postgres pick one that isn't used yet.
# As long as at least one candidate is unused, this will succeed.
# The odds of them all being used goes down exponentially with the number of candidates.
# 200 candidates gives a >99.9999999% (1 - 0.9^200) chance of success, even if 90% of possible ids are taken.
# on M2 macbook this takes ~0.390ms, so this is a reasonable overhead
candidates = [get_public_id() for _ in range(200)]
# convert into an unnset that we can select from
cte = select(func.unnest(array(candidates)).label("candidate")).cte("candidates")
candidate_col = cte.columns["candidate"]
# construct the sub-select for the first unused candidate public id
unused = (
select(candidate_col)
.where(~select(PublicID.public_id).where(PublicID.public_id == candidate_col).exists())
.limit(1)
)
# assign this sub-select to the public_id column
target.public_id = unused
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment