Skip to content

Instantly share code, notes, and snippets.

@anandtripathi5
Created December 22, 2024 18:12
Show Gist options
  • Save anandtripathi5/5d3a18084470d06d776f009f97140124 to your computer and use it in GitHub Desktop.
Save anandtripathi5/5d3a18084470d06d776f009f97140124 to your computer and use it in GitHub Desktop.
Analyze postgres database parallely to save time by given number of processes
import argparse
import os
import time
import psycopg2
import multiprocessing
# Define command-line arguments
parser = argparse.ArgumentParser(description='Analyze all tables in a PostgreSQL database in parallel.')
parser.add_argument('--host', type=str, required=True, help='The hostname of the PostgreSQL server')
parser.add_argument('--port', type=int, default=5432, help='The port number of the PostgreSQL server (default: 5432)')
parser.add_argument('--user', type=str, required=True,
help='The username to use when connecting to the PostgreSQL server')
parser.add_argument('--password', type=str, required=True,
help='The password to use when connecting to the PostgreSQL server')
parser.add_argument('--database', type=str, required=True, help='The name of the PostgreSQL database')
parser.add_argument('--processes', type=int,
help='The number of parallel jobs to run')
parser.add_argument('--verbose', action='store_true', help='Output logs to stdout')
# Parse command-line arguments
args = parser.parse_args()
# Define function to analyze a table
def analyze_table(table):
conn = psycopg2.connect(host=args.host, port=args.port, user=args.user, password=args.password,
database=args.database)
cur = conn.cursor()
cur.execute(f'ANALYZE VERBOSE {table}')
conn.commit()
cur.close()
conn.close()
print(f'Analyzed table {table} by {os.getpid()}')
# Get list of tables in the database
conn = psycopg2.connect(host=args.host, port=args.port, user=args.user, password=args.password, database=args.database)
cur = conn.cursor()
cur.execute(
"SELECT schemaname, tablename FROM pg_tables WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'")
tables = [f'{schema}.{table}' for schema, table in cur.fetchall()]
cur.close()
conn.close()
print(f'Found {len(tables)} tables in database')
if __name__ == '__main__':
print("Analyze tables in parallel")
start_time = time.time()
with multiprocessing.Pool(processes=args.processes) as pool:
pool.map(analyze_table, tables)
print(f"Analyze completed in {int((time.time() - start_time)/60)} minutes")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment