Last active
March 15, 2019 11:25
-
-
Save snopoke/c3923f67480d8975224386ac29483d67 to your computer and use it in GitHub Desktop.
Script to drop child tables of intermediate dashboard tables (ICDS)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Drop child tables of intermediate aggregation tables. | |
# Each table must have a 'month' column. | |
# If tables have usage as reported by pg_stat_user_tables | |
# they will not be dropped. | |
import itertools | |
from datetime import date | |
from collections import defaultdict | |
from corehq.sql_db.connections import connection_manager | |
from sqlalchemy.exc import ProgrammingError | |
from six.moves import input | |
def get_child_tables(engine, parent_table): | |
with engine.begin() as conn: | |
res = conn.execute( | |
""" | |
SELECT c.relname AS child | |
FROM | |
pg_inherits JOIN pg_class AS c ON (inhrelid=c.oid) | |
JOIN pg_class as p ON (inhparent=p.oid) | |
where p.relname = %s; | |
""", | |
parent_table | |
) | |
return [row.child for row in res] | |
def categorise_tables_by_date(engine, tables): | |
tables_by_date = defaultdict(list) | |
with engine.begin() as c: | |
for t in tables: | |
res = list(c.execute("select month from {} limit 1".format(t))) | |
if res: | |
tables_by_date[res[0].month].append(t) | |
return tables_by_date | |
def unlink_tables_from_parent(engine, parent_table, tables): | |
tables_in_use = [] | |
unlinked = [] | |
not_exists = [] | |
for table in tables: | |
try: | |
with engine.begin() as conn: | |
seq = conn.execute('select seq_tup_read from pg_stat_user_tables where relname = %s', table) | |
reads = list(seq)[0].seq_tup_read | |
if reads > 1: # we just read from the table so this won't be 0 | |
tables_in_use.append((table, reads)) | |
continue | |
conn.execute('alter table "{}" no inherit "{}"'.format(table, parent_table)) | |
unlinked.append(table) | |
except ProgrammingError as ex: | |
if 'does not exist' not in str(ex): | |
raise | |
not_exists.append(table) | |
return unlinked, tables_in_use, not_exists | |
def drop_tables(engine, tables): | |
for table in tables: | |
with engine.begin() as conn: | |
conn.execute('drop table if exists "{}"'.format(table)) | |
def prune_child_tables(engine, parent_table, till_date): | |
child_tables = get_child_tables(engine, parent_table) | |
tables_by_date = categorise_tables_by_date(engine, child_tables) | |
tables_to_prune = { | |
date: tables | |
for date, tables in tables_by_date.items() | |
if date < till_date | |
} | |
tables_in_order = itertools.chain(*[ | |
tables_to_prune[month] | |
for month in sorted(tables_to_prune.keys()) | |
]) | |
unlinked, tables_in_use, not_exists = unlink_tables_from_parent(engine, parent_table, tables_in_order) | |
if tables_in_use: | |
print("Some tables still in use:") | |
for table, reads in tables_in_use: | |
print("\t{} ({} reads)".format(table, reads)) | |
if not_exists: | |
print("Tables that don't exist:") | |
for table in not_exists: | |
print("\t{}".format(table)) | |
ok = input("Type 'drop' to continue: ") | |
if ok != "drop": | |
print('Abort') | |
return | |
drop_tables(engine, unlinked) | |
engine = connection_manager.get_engine('<engine ID>') | |
parent_table = '<parent table>' | |
prune_child_tables(engine, parent_table, date(2018, 6, 1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment