Created
November 27, 2018 23:51
-
-
Save ryantuck/14931c852d399d7d41f1dc09787a6dfc to your computer and use it in GitHub Desktop.
Configurable python code to generate random SQL tables and indexes using pure SQL.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import random | |
def _gen_ids(b,e): | |
return f'generate_series({b}, {e})::bigint' | |
def _gen_float(): | |
return 'random()' | |
def _gen_int(b=0,e=100000): | |
return f'(random()*{e})::int + {b}' | |
def _gen_str(): | |
return 'substring(md5(random()::text), (random()*4)::int+1, (random()*20)::int+5)' | |
def _gen_date(): | |
return "('2000-01-01'::timestamp + random() * '20 years'::interval)::date" | |
def _gen_timestamp(): | |
return "'2000-01-01'::timestamp + random() * '20 years'::interval" | |
def _gen_bool(): | |
return '(random() < 0.5)' | |
def _gen_select_as(field_name, fn): | |
return f'{fn()} as {field_name}' | |
def _gen_set_f(field_name, fn): | |
return f'{field_name} = {fn()}' | |
def _gen_create_index(table_name, field_name): | |
return f'create index on junk.{table_name} ({field_name});' | |
def _gen_on_conflict_do_update(ff_map): | |
updates = [ | |
_gen_set_f(field_name, fn) | |
for field_name, fn in ff_map.items() | |
] | |
updates_str = ',\n\t'.join(updates) | |
on_conflict_str = f'on conflict (id) do update\nset\n\t{updates_str}' | |
return on_conflict_str | |
def _gen_selects(ff_map, b, e): | |
id_select = f'\t{_gen_ids(b, e)} as id' | |
selects = [id_select] + [ | |
_gen_select_as(field_name, fn) | |
for field_name, fn in ff_map.items() | |
] | |
selects_str = ',\n\t'.join(selects) | |
return f'select \n{selects_str}' | |
def _pick_fn(): | |
fns = [ | |
_gen_float, | |
_gen_int, | |
_gen_str, | |
_gen_date, | |
_gen_timestamp, | |
_gen_bool, | |
] | |
return random.choice(fns) | |
def gen_random_field_fn_map(): | |
max_fields = 10 | |
return {f'f_{i}': _pick_fn() for i in range(random.randint(1,max_fields))} | |
def pick_random_fields_from_field_fn_map(ff_map): | |
fields = list(ff_map.keys()) | |
random.shuffle(fields) | |
return fields[:random.randint(0, len(fields)-1)] | |
def gen_create_table_as_stmt(table_name, ff_map, n_rows): | |
selects = _gen_selects(ff_map, 1, n_rows) | |
create_stmt = f'create table junk.{table_name} as\n{selects}\n;' | |
pkey_stmt = f'alter table junk.{table_name} add primary key (id);' | |
owner_stmt = f'alter table junk.{table_name} owner to reporting;' | |
stmts = [create_stmt, pkey_stmt, owner_stmt] | |
return '\n'.join(stmts) | |
def gen_create_index_stmts(table_name, index_fields): | |
index_stmts = [_gen_create_index(table_name, f) for f in index_fields] | |
return '\n'.join(index_stmts) | |
def gen_insert_into_stmt(table_name, ff_map, b, e): | |
selects = _gen_selects(ff_map, b, e) | |
on_conflict_str = _gen_on_conflict_do_update(ff_map) | |
return f'insert into junk.{table_name}\n{selects}\n{on_conflict_str}\n;' | |
def gen_recreate_schema_stmt(): | |
stmts = [ | |
'drop schema if exists junk cascade;', | |
'create schema junk;', | |
'alter schema junk owner to reporting;', | |
] | |
return '\n'.join(stmts) | |
def generate_config(n_tables, min_rows, max_rows): | |
table_configs = [] | |
for i in range(n_tables): | |
ff_map = gen_random_field_fn_map() | |
idx_fields = pick_random_fields_from_field_fn_map(ff_map) | |
table_name = f't_{i}' | |
n_rows = random.randint(min_rows, max_rows) | |
table_configs.append({ | |
'field_fn_map': ff_map, | |
'index_fields': idx_fields, | |
'table_name': table_name, | |
'n_rows': n_rows, | |
}) | |
return table_configs | |
def write_config_to_file(config, filepath): | |
with open(filepath, 'w') as f: | |
json.dump(config, f, indent=4, sort_keys=True, default=lambda x:str(x)) | |
def read_config_from_file(filepath): | |
with open(filepath) as f: | |
return json.load(f) | |
def mini_test(): | |
ff_map = { | |
'd': _gen_date, | |
'e': _gen_bool, | |
'f': _gen_timestamp, | |
'g': _gen_str, | |
} | |
ss = gen_recreate_schema_stmt() | |
crs = gen_create_table_as_stmt('my_table', ff_map, 1000000) | |
print(ss) | |
print(crs) | |
def generate_create_stmts_from_config(config): | |
stmts = [] | |
for cfg in config: | |
stmt = gen_create_table_as_stmt( | |
table_name=cfg['table_name'], | |
ff_map=cfg['field_fn_map'], | |
n_rows=cfg['n_rows'], | |
) | |
stmts.append(stmt) | |
return '\n\n'.join(stmts) | |
def generate_create_index_stmts_from_config(config): | |
stmts = [] | |
for cfg in config: | |
stmt = gen_create_index_stmts( | |
table_name=cfg['table_name'], | |
index_fields=cfg['index_fields'], | |
) | |
stmts.append(stmt) | |
return '\n\n'.join(stmts) | |
def generate_insert_stmts_from_config(config): | |
stmts = [] | |
for cfg in config: | |
stmt = gen_insert_into_stmt( | |
table_name=cfg['table_name'], | |
ff_map=cfg['field_fn_map'], | |
b=1, | |
e=random.randint(1, cfg['n_rows']), | |
) | |
stmts.append(stmt) | |
return '\n\n'.join(stmts) | |
def main(): | |
config = generate_config(50, 100, 1000000) | |
write_config_to_file(config, 'config.json') | |
cr = generate_create_stmts_from_config(config) | |
idx = generate_create_index_stmts_from_config(config) | |
ins = generate_insert_stmts_from_config(config) | |
print(cr) | |
print(idx) | |
print(ins) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment