Skip to content

Instantly share code, notes, and snippets.

@densumesh
Created July 1, 2024 17:19
Show Gist options
  • Save densumesh/1083357e7cb33be68134abc2eb6f3426 to your computer and use it in GitHub Desktop.
Save densumesh/1083357e7cb33be68134abc2eb6f3426 to your computer and use it in GitHub Desktop.
import os
import uuid
import asyncio
import asyncpg
DATABASE_URL = os.getenv("DATABASE_URL")
DELETE_CHUNK_BATCH_SIZE = int(os.getenv("DELETE_CHUNK_BATCH_SIZE", "5000"))
async def delete_chunks():
conn = await asyncpg.connect(DATABASE_URL)
last_offset_id = uuid.UUID(int=0)
while True:
chunk_and_qdrant_ids = await conn.fetch(
"""
SELECT id, qdrant_point_id
FROM chunk_metadata
WHERE qdrant_point_id IS NULL AND id > $1
ORDER BY id
LIMIT $2
""",
last_offset_id,
DELETE_CHUNK_BATCH_SIZE,
)
chunk_ids = [record["id"] for record in chunk_and_qdrant_ids]
if not chunk_ids:
break
async with conn.transaction():
await conn.execute(
"""
DELETE FROM chunk_metadata
WHERE id = ANY($1::uuid[])
""",
chunk_ids,
)
last_offset_id = chunk_ids[-1]
await conn.close()
async def main():
await delete_chunks()
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment