Skip to content

Instantly share code, notes, and snippets.

@snopoke
Last active March 15, 2019 11:25
Show Gist options
  • Save snopoke/c3923f67480d8975224386ac29483d67 to your computer and use it in GitHub Desktop.
Save snopoke/c3923f67480d8975224386ac29483d67 to your computer and use it in GitHub Desktop.
Script to drop child tables of intermediate dashboard tables (ICDS)
# Drop child tables of intermediate aggregation tables.
# Each table must have a 'month' column.
# If tables have usage as reported by pg_stat_user_tables
# they will not be dropped.
import itertools
from datetime import date
from collections import defaultdict
from corehq.sql_db.connections import connection_manager
from sqlalchemy.exc import ProgrammingError
from six.moves import input
def get_child_tables(engine, parent_table):
with engine.begin() as conn:
res = conn.execute(
"""
SELECT c.relname AS child
FROM
pg_inherits JOIN pg_class AS c ON (inhrelid=c.oid)
JOIN pg_class as p ON (inhparent=p.oid)
where p.relname = %s;
""",
parent_table
)
return [row.child for row in res]
def categorise_tables_by_date(engine, tables):
tables_by_date = defaultdict(list)
with engine.begin() as c:
for t in tables:
res = list(c.execute("select month from {} limit 1".format(t)))
if res:
tables_by_date[res[0].month].append(t)
return tables_by_date
def unlink_tables_from_parent(engine, parent_table, tables):
tables_in_use = []
unlinked = []
not_exists = []
for table in tables:
try:
with engine.begin() as conn:
seq = conn.execute('select seq_tup_read from pg_stat_user_tables where relname = %s', table)
reads = list(seq)[0].seq_tup_read
if reads > 1: # we just read from the table so this won't be 0
tables_in_use.append((table, reads))
continue
conn.execute('alter table "{}" no inherit "{}"'.format(table, parent_table))
unlinked.append(table)
except ProgrammingError as ex:
if 'does not exist' not in str(ex):
raise
not_exists.append(table)
return unlinked, tables_in_use, not_exists
def drop_tables(engine, tables):
for table in tables:
with engine.begin() as conn:
conn.execute('drop table if exists "{}"'.format(table))
def prune_child_tables(engine, parent_table, till_date):
child_tables = get_child_tables(engine, parent_table)
tables_by_date = categorise_tables_by_date(engine, child_tables)
tables_to_prune = {
date: tables
for date, tables in tables_by_date.items()
if date < till_date
}
tables_in_order = itertools.chain(*[
tables_to_prune[month]
for month in sorted(tables_to_prune.keys())
])
unlinked, tables_in_use, not_exists = unlink_tables_from_parent(engine, parent_table, tables_in_order)
if tables_in_use:
print("Some tables still in use:")
for table, reads in tables_in_use:
print("\t{} ({} reads)".format(table, reads))
if not_exists:
print("Tables that don't exist:")
for table in not_exists:
print("\t{}".format(table))
ok = input("Type 'drop' to continue: ")
if ok != "drop":
print('Abort')
return
drop_tables(engine, unlinked)
engine = connection_manager.get_engine('<engine ID>')
parent_table = '<parent table>'
prune_child_tables(engine, parent_table, date(2018, 6, 1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment