kurtbrose · November 25, 2024 19:49
diff --git a/auto_public_id.py b/auto_public_id.py
 import enum
 from datetime import datetime

 from sqlalchemy import Column
 from sqlalchemy import DateTime
 from sqlalchemy import Enum
 from sqlalchemy import event
 from sqlalchemy import func
 from sqlalchemy import select
 from sqlalchemy import String
 from sqlalchemy.dialects.postgresql import array
 from sqlalchemy.orm import Mapped

 from somewhere import get_public_id


 class PublicIDType(enum.Enum):
    # one entry per table that has a public id...


 class PublicID(base.BaseModel):
    """A globally unique public id."""

    __tablename__ = "public_ids"

    public_id: Mapped[str] = Column(String, primary_key=True)
    public_id_type: Mapped[PublicIDType] = Column(Enum(PublicIDType), nullable=False)
    created: Mapped[datetime] = Column(DateTime, server_default=func.now())

    @classmethod
    def create(cls, session, public_id_type: PublicIDType, public_id: str | None = None) -> "PublicID":
        self = cls(public_id=public_id, public_id_type=public_id_type)
        session.add(self)
        session.flush([self])
        return self


 @event.listens_for(PublicID, "before_insert")
 def _before_insert(_mapper, _connection, target):
    """Inject an automatic public-id-generating sub-select when a new PublicID row is created if one isn't specfied."""
    if target.public_id is not None:
        return

    # Generate many candidate public-ids and have postgres pick one that isn't used yet.
    # As long as at least one candidate is unused, this will succeed.
    # The odds of them all being used goes down exponentially with the number of candidates.
    # 200 candidates gives a >99.9999999% (1 - 0.9^200) chance of success, even if 90% of possible ids are taken.

    # on M2 macbook this takes ~0.390ms, so this is a reasonable overhead
    candidates = [get_public_id() for _ in range(200)]
    # convert into an unnset that we can select from
    cte = select(func.unnest(array(candidates)).label("candidate")).cte("candidates")
    candidate_col = cte.columns["candidate"]

    # construct the sub-select for the first unused candidate public id
    unused = (
        select(candidate_col)
        .where(~select(PublicID.public_id).where(PublicID.public_id == candidate_col).exists())
        .limit(1)
    )
    # assign this sub-select to the public_id column
    target.public_id = unused
	import enum
	from datetime import datetime

	from sqlalchemy import Column
	from sqlalchemy import DateTime
	from sqlalchemy import Enum
	from sqlalchemy import event
	from sqlalchemy import func
	from sqlalchemy import select
	from sqlalchemy import String
	from sqlalchemy.dialects.postgresql import array
	from sqlalchemy.orm import Mapped

	from somewhere import get_public_id


	class PublicIDType(enum.Enum):
	# one entry per table that has a public id...


	class PublicID(base.BaseModel):
	"""A globally unique public id."""

	__tablename__ = "public_ids"

	public_id: Mapped[str] = Column(String, primary_key=True)
	public_id_type: Mapped[PublicIDType] = Column(Enum(PublicIDType), nullable=False)
	created: Mapped[datetime] = Column(DateTime, server_default=func.now())

	@classmethod
	def create(cls, session, public_id_type: PublicIDType, public_id: str \| None = None) -> "PublicID":
	self = cls(public_id=public_id, public_id_type=public_id_type)
	session.add(self)
	session.flush([self])
	return self


	@event.listens_for(PublicID, "before_insert")
	def _before_insert(_mapper, _connection, target):
	"""Inject an automatic public-id-generating sub-select when a new PublicID row is created if one isn't specfied."""
	if target.public_id is not None:
	return

	# Generate many candidate public-ids and have postgres pick one that isn't used yet.
	# As long as at least one candidate is unused, this will succeed.
	# The odds of them all being used goes down exponentially with the number of candidates.
	# 200 candidates gives a >99.9999999% (1 - 0.9^200) chance of success, even if 90% of possible ids are taken.

	# on M2 macbook this takes ~0.390ms, so this is a reasonable overhead
	candidates = [get_public_id() for _ in range(200)]
	# convert into an unnset that we can select from
	cte = select(func.unnest(array(candidates)).label("candidate")).cte("candidates")
	candidate_col = cte.columns["candidate"]

	# construct the sub-select for the first unused candidate public id
	unused = (
	select(candidate_col)
	.where(~select(PublicID.public_id).where(PublicID.public_id == candidate_col).exists())
	.limit(1)
	)
	# assign this sub-select to the public_id column
	target.public_id = unused