Last active
June 25, 2018 08:35
-
-
Save Ladsgroup/28471578ca710b2b6e9e650a4657d327 to your computer and use it in GitHub Desktop.
db_checker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import time | |
import subprocess | |
shard_mapping = { | |
's1': [ | |
'db1052', | |
'db1067', | |
'db1080', | |
'db1083', | |
'db1089', | |
'db1099:3311', | |
'db1105:3311', | |
'db1106', | |
'db1114', | |
'db1119', | |
], | |
's2': [ | |
'db1066', | |
# 'db1054', | |
'db1074', | |
'db1076', | |
'db1090:3312', | |
'db1122', | |
'db1103:3312', | |
'db1105:3312', | |
], | |
's3': [ | |
'db1075', | |
'db1077', | |
'db1078', | |
'db1123', | |
], | |
's4': [ | |
'db1068', | |
'db1081', | |
'db1084', | |
'db1091', | |
'db1097:3314', | |
'db1103:3314', | |
'db1121', | |
], | |
's5': [ | |
'db1070', | |
'db1082', | |
'db1096:3315', | |
'db1097:3315', | |
'db1100', | |
'db1110', | |
'db1113:3315', | |
], | |
's6': [ | |
'db1061', | |
'db1085', | |
'db1088', | |
'db1093', | |
'db1096:3316', | |
'db1098:3316', | |
'db1113:3316', | |
], | |
's7': [ | |
'db1062', | |
'db1079', | |
'db1086', | |
'db1090:3317', | |
'db1094', | |
'db1098:3317', | |
'db1101:3317', | |
], | |
's8': [ | |
'db1071', | |
'db1087', | |
'db1092', | |
'db1099:3318', | |
'db1101:3318', | |
'db1104', | |
'db1109', | |
], | |
} | |
# https://stackoverflow.com/questions/5903720/recursive-diff-of-two-python-dictionaries-keys-and-values | |
def dd(d1, d2, ctx=""): | |
for k in d1: | |
if k not in d2: | |
print(k + " removed from " + ctx) | |
for k in d2: | |
if k not in d1: | |
print(k + " added in " + ctx) | |
continue | |
if d2[k] != d1[k]: | |
if type(d2[k]) not in (dict, list): | |
print(k + " changed in " + ctx + " to " + str(d2[k])) | |
else: | |
if type(d1[k]) != type(d2[k]): | |
print(k + " changed in " + ctx + " to " + str(d2[k])) | |
continue | |
else: | |
if type(d2[k]) == dict: | |
dd(d1[k], d2[k], k + ' in ' + ctx) | |
continue | |
return | |
def parse_sql(sql): | |
result = {} | |
sql = sql.replace('IF NOT EXISTS ', '') | |
for table_chunk in sql.split('CREATE TABLE '): | |
table_chunk = table_chunk.lower() | |
table_chunk = re.sub(r'/\*.+?\*/', '', table_chunk) | |
table_chunk = re.sub(r'\n\s*\-\-.*', '', table_chunk) | |
table_chunk = re.sub(r'\n\s*\n', '\n', table_chunk) | |
table_name = table_chunk.split('(')[0].strip() | |
if not table_name or '\n' in table_name: | |
continue | |
if '(' not in table_chunk: | |
continue | |
indexes = {} | |
for res in re.findall(r'create( +unique|) +index +(\S+?) +on +%s +\((.+?)\)\;' % table_name, table_chunk): | |
indexes[res[1]] = {'unique': bool(res[0]), 'columns': res[2]} | |
table_structure = re.split(r'create( +unique|) +index', '('.join(table_chunk.split('(')[1:]))[0] | |
table_structure_real = {} | |
pk = None | |
for line in table_structure.split('\n'): | |
line = line.strip() | |
if not line or line.endswith(';'): | |
continue | |
# Why strip(',') doesn't work? | |
if line.endswith(','): | |
line = line[:-1] | |
if line.startswith('primary key'): | |
pk = line.split('(')[1].split(')')[0].replace(' ', '') | |
continue | |
line = re.sub(r' +', ' ', line).split('--')[0] | |
if line.split(' ')[1].startswith('enum'): | |
real_type = ' '.join(line.split(')')[0].split(' ')[1:]) + ')' | |
real_type = real_type.replace('"', '\'').replace(' ','') | |
else: | |
real_type = line.split(' ')[1] | |
if ' unsigned ' in line: | |
line = line.replace(' unsigned ', ' ') | |
real_type += ' unsigned' | |
table_structure_real[line.split(' ')[0]] = {'type': real_type, 'config': ' '.join(line.split(' ')[2:])} | |
result[table_name] = {'structure': table_structure_real, 'indexes': indexes} | |
return result | |
def compare_table_with_prod(host, table_name, expected_table_structure): | |
verbose = '-v' in sys.argv | |
port = None | |
if host != 'localhost': | |
if ':' in host: | |
port = host.split(':')[1] | |
host = host.split(':')[0] | |
host += '.eqiad.wmnet' | |
if verbose: | |
print('Checking table' + table_name) | |
sql_command = sys.argv[2] | |
if port: | |
sql_command += ' -P ' + port | |
p = subprocess.Popen(sql_command + ' -h %s -e "DESC %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True) | |
(output, _) = p.communicate(input='') | |
res = output.decode('utf-8') | |
fields_in_prod = [] | |
return_result = {'fields': {}} | |
for line in res.split('\n'): | |
if line.startswith('ERROR'): | |
return return_result | |
if not line or line.startswith('Field'): | |
continue | |
field_structure = line.lower().split('\t') | |
fields_in_prod.append(field_structure[0]) | |
name = field_structure[0] | |
if name not in expected_table_structure['structure']: | |
print(host, table_name, name, 'field-mismatch-prod-extra') | |
continue | |
return_result['fields'][field_structure[0]] = field_structure[1] | |
if '--important-only' in sys.argv: | |
continue | |
expected_type = expected_table_structure['structure'][name]['type'].replace('varchar', 'varbinary').replace('integer', 'int') | |
if expected_type != field_structure[1].replace('varchar', 'varbinary'): | |
actual_size = None | |
if '(' in field_structure[1]: | |
actual_size = field_structure[1].split('(')[1].split(')')[0] | |
expected_size = None | |
if '(' in expected_type: | |
expected_size = expected_type.split('(')[1].split(')')[0] | |
if actual_size and expected_size and actual_size != expected_size: | |
print(host, table_name, name, 'field-size-mismatch', expected_size + ' ' + actual_size) | |
if (field_structure[1] + expected_type).count(' unsigned') == 1: | |
print(host, table_name, name, 'field-unsigned-mismatch', field_structure[1] + ' ' + expected_type) | |
actual_type = field_structure[1].split('(')[0].split(' ')[0] | |
expected_type = expected_type.split('(')[0].split(' ')[0] | |
if actual_type != expected_type: | |
print(host, table_name, name, 'field-type-mismatch', expected_type + ' ' + actual_type) | |
expected_config = expected_table_structure['structure'][name]['config'] | |
if (field_structure[2] == 'no' and 'not null' not in expected_config) or (field_structure[2] == 'yes' and 'not null' in expected_config): | |
print(host, table_name, name, 'field-null-mismatch') | |
# if len(field_structure[4]) < 4: | |
# default = '' | |
#else: | |
# default = field_structure[4] | |
#if default == 'null' and field_structure[2] == 'no': | |
# continue | |
#print(default, expected_config) | |
#if (default and 'default ' + default not in expected_config) or (not default and 'default ' in expected_config): | |
# print(host, table_name, name, 'field-default-mismatch') | |
#print(expected_config) | |
for field in expected_table_structure['structure']: | |
if field not in fields_in_prod: | |
print(host, table_name, field, 'field-mismatch-codebase-extra') | |
p = subprocess.Popen(sql_command + ' -h %s -e "SHOW INDEX FROM %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True) | |
(output, _) = p.communicate(input='') | |
res = output.decode('utf-8') | |
return_result['indexes'] = {} | |
indexes = {} | |
for line in res.split('\n'): | |
if line.startswith('ERROR'): | |
return return_result | |
if not line or line.startswith('Table'): | |
continue | |
index_structure = line.lower().split('\t') | |
if index_structure[2] not in indexes: | |
indexes[index_structure[2]] = {'unique': index_structure[1] == '0', 'columns': [index_structure[4]]} | |
else: | |
indexes[index_structure[2]]['columns'].append(index_structure[4]) | |
return_result['indexes'] = indexes | |
expected_indexes = expected_table_structure['indexes'] | |
for index in indexes: | |
#clean up primaries later | |
if index == 'primary': | |
continue | |
if index not in expected_indexes: | |
if index == 'tmp1': | |
print('wtf') | |
print(host, table_name, index, 'index-mismatch-prod-extra') | |
continue | |
if indexes[index]['unique'] != expected_indexes[index]['unique']: | |
print(host, table_name, index, 'index-uniqueness-mismatch') | |
expected_columns = expected_indexes[index]['columns'].replace(' ', '') | |
expected_columns = re.sub(r'\(.+?\)', '', expected_columns) | |
if ','.join(indexes[index]['columns']) != expected_columns: | |
print(host, table_name, index, 'index-columns-mismatch', ','.join(indexes[index]['columns']) + ' ' + expected_columns) | |
for index in expected_indexes: | |
if index not in indexes: | |
print(host, table_name, index, 'index-mismatch-code-extra') | |
return return_result | |
def main(): | |
with open(sys.argv[1], 'r') as f: | |
sql = f.read() | |
sql_data = parse_sql(sql) | |
final_result = {} | |
hosts = ['localhost'] | |
if '-prod' in sys.argv: | |
hosts = shard_mapping[sys.argv[3]] | |
for host in hosts: | |
final_result[host] = {} | |
for table in sql_data: | |
sys.stderr.write(host + ' ' + table + '\n') | |
sys.stderr.flush() | |
final_result[host][table] = compare_table_with_prod(host, table, sql_data[table]) | |
time.sleep(2) | |
for host in hosts: | |
for table in final_result[hosts[0]]: | |
if final_result[host][table] != final_result[hosts[0]][table]: | |
dd(final_result[host][table], final_result[hosts[0]][table], table + ':' + host) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment