Skip to content

Instantly share code, notes, and snippets.

@densumesh
Created June 2, 2024 03:28
Show Gist options
  • Save densumesh/51665497e7d4939045656ab26b4731cc to your computer and use it in GitHub Desktop.
Save densumesh/51665497e7d4939045656ab26b4731cc to your computer and use it in GitHub Desktop.
import psycopg2
import os
import dotenv
import uuid
dotenv.load_dotenv()
# Connect to the PostgreSQL database
# Get the PostgreSQL connection details from environment variables
conn = psycopg2.connect(
os.getenv("DATABASE_URL"),
)
# Create a cursor object to interact with the database
cur = conn.cursor()
# Retrieve the tag_set columns from chunk_metadata, chunk_group, and files tables
tables = ["chunk_metadata", "chunk_group", "files"]
for table in tables:
# Execute the query to retrieve the tag_set column
import psycopg2.extensions
lastBusiness_id = uuid.UUID(int=0)
while True:
# Execute the query to retrieve the tag_set column
cur.execute(
f"SELECT id, tag_set FROM {table} WHERE id > (%s)::uuid AND tag_set IS NOT NULL AND array_length(tag_set_array, 1) IS NULL ORDER BY id LIMIT 1000",
(str(lastBusiness_id),),
)
# Fetch the first 10000 rows from the result set
rows = cur.fetchall()
if not rows:
break
# Convert the tag_set field from a comma-separated string to an array within PostgreSQL
for row in rows:
tag_set = row[1]
if tag_set:
tag_set_array = tag_set.split(",")
tag_set = tag_set.replace("'", "\\'")
cur.execute(
f"UPDATE {table} SET tag_set_array = %s WHERE id = (%s)::uuid",
(
tag_set_array,
row[0],
),
)
# Fetch the next 10000 rows from the result set
lastRecord = rows[-1]
lastBusiness_id = lastRecord[0]
# Commit the changes to the database
conn.commit()
# Close the cursor and connection
cur.close()
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment