Skip to content

Instantly share code, notes, and snippets.

@Ladsgroup
Last active June 25, 2018 08:35
Show Gist options
  • Save Ladsgroup/28471578ca710b2b6e9e650a4657d327 to your computer and use it in GitHub Desktop.
Save Ladsgroup/28471578ca710b2b6e9e650a4657d327 to your computer and use it in GitHub Desktop.
db_checker
import sys
import re
import time
import subprocess
shard_mapping = {
's1': [
'db1052',
'db1067',
'db1080',
'db1083',
'db1089',
'db1099:3311',
'db1105:3311',
'db1106',
'db1114',
'db1119',
],
's2': [
'db1066',
# 'db1054',
'db1074',
'db1076',
'db1090:3312',
'db1122',
'db1103:3312',
'db1105:3312',
],
's3': [
'db1075',
'db1077',
'db1078',
'db1123',
],
's4': [
'db1068',
'db1081',
'db1084',
'db1091',
'db1097:3314',
'db1103:3314',
'db1121',
],
's5': [
'db1070',
'db1082',
'db1096:3315',
'db1097:3315',
'db1100',
'db1110',
'db1113:3315',
],
's6': [
'db1061',
'db1085',
'db1088',
'db1093',
'db1096:3316',
'db1098:3316',
'db1113:3316',
],
's7': [
'db1062',
'db1079',
'db1086',
'db1090:3317',
'db1094',
'db1098:3317',
'db1101:3317',
],
's8': [
'db1071',
'db1087',
'db1092',
'db1099:3318',
'db1101:3318',
'db1104',
'db1109',
],
}
# https://stackoverflow.com/questions/5903720/recursive-diff-of-two-python-dictionaries-keys-and-values
def dd(d1, d2, ctx=""):
for k in d1:
if k not in d2:
print(k + " removed from " + ctx)
for k in d2:
if k not in d1:
print(k + " added in " + ctx)
continue
if d2[k] != d1[k]:
if type(d2[k]) not in (dict, list):
print(k + " changed in " + ctx + " to " + str(d2[k]))
else:
if type(d1[k]) != type(d2[k]):
print(k + " changed in " + ctx + " to " + str(d2[k]))
continue
else:
if type(d2[k]) == dict:
dd(d1[k], d2[k], k + ' in ' + ctx)
continue
return
def parse_sql(sql):
result = {}
sql = sql.replace('IF NOT EXISTS ', '')
for table_chunk in sql.split('CREATE TABLE '):
table_chunk = table_chunk.lower()
table_chunk = re.sub(r'/\*.+?\*/', '', table_chunk)
table_chunk = re.sub(r'\n\s*\-\-.*', '', table_chunk)
table_chunk = re.sub(r'\n\s*\n', '\n', table_chunk)
table_name = table_chunk.split('(')[0].strip()
if not table_name or '\n' in table_name:
continue
if '(' not in table_chunk:
continue
indexes = {}
for res in re.findall(r'create( +unique|) +index +(\S+?) +on +%s +\((.+?)\)\;' % table_name, table_chunk):
indexes[res[1]] = {'unique': bool(res[0]), 'columns': res[2]}
table_structure = re.split(r'create( +unique|) +index', '('.join(table_chunk.split('(')[1:]))[0]
table_structure_real = {}
pk = None
for line in table_structure.split('\n'):
line = line.strip()
if not line or line.endswith(';'):
continue
# Why strip(',') doesn't work?
if line.endswith(','):
line = line[:-1]
if line.startswith('primary key'):
pk = line.split('(')[1].split(')')[0].replace(' ', '')
continue
line = re.sub(r' +', ' ', line).split('--')[0]
if line.split(' ')[1].startswith('enum'):
real_type = ' '.join(line.split(')')[0].split(' ')[1:]) + ')'
real_type = real_type.replace('"', '\'').replace(' ','')
else:
real_type = line.split(' ')[1]
if ' unsigned ' in line:
line = line.replace(' unsigned ', ' ')
real_type += ' unsigned'
table_structure_real[line.split(' ')[0]] = {'type': real_type, 'config': ' '.join(line.split(' ')[2:])}
result[table_name] = {'structure': table_structure_real, 'indexes': indexes}
return result
def compare_table_with_prod(host, table_name, expected_table_structure):
verbose = '-v' in sys.argv
port = None
if host != 'localhost':
if ':' in host:
port = host.split(':')[1]
host = host.split(':')[0]
host += '.eqiad.wmnet'
if verbose:
print('Checking table' + table_name)
sql_command = sys.argv[2]
if port:
sql_command += ' -P ' + port
p = subprocess.Popen(sql_command + ' -h %s -e "DESC %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True)
(output, _) = p.communicate(input='')
res = output.decode('utf-8')
fields_in_prod = []
return_result = {'fields': {}}
for line in res.split('\n'):
if line.startswith('ERROR'):
return return_result
if not line or line.startswith('Field'):
continue
field_structure = line.lower().split('\t')
fields_in_prod.append(field_structure[0])
name = field_structure[0]
if name not in expected_table_structure['structure']:
print(host, table_name, name, 'field-mismatch-prod-extra')
continue
return_result['fields'][field_structure[0]] = field_structure[1]
if '--important-only' in sys.argv:
continue
expected_type = expected_table_structure['structure'][name]['type'].replace('varchar', 'varbinary').replace('integer', 'int')
if expected_type != field_structure[1].replace('varchar', 'varbinary'):
actual_size = None
if '(' in field_structure[1]:
actual_size = field_structure[1].split('(')[1].split(')')[0]
expected_size = None
if '(' in expected_type:
expected_size = expected_type.split('(')[1].split(')')[0]
if actual_size and expected_size and actual_size != expected_size:
print(host, table_name, name, 'field-size-mismatch', expected_size + ' ' + actual_size)
if (field_structure[1] + expected_type).count(' unsigned') == 1:
print(host, table_name, name, 'field-unsigned-mismatch', field_structure[1] + ' ' + expected_type)
actual_type = field_structure[1].split('(')[0].split(' ')[0]
expected_type = expected_type.split('(')[0].split(' ')[0]
if actual_type != expected_type:
print(host, table_name, name, 'field-type-mismatch', expected_type + ' ' + actual_type)
expected_config = expected_table_structure['structure'][name]['config']
if (field_structure[2] == 'no' and 'not null' not in expected_config) or (field_structure[2] == 'yes' and 'not null' in expected_config):
print(host, table_name, name, 'field-null-mismatch')
# if len(field_structure[4]) < 4:
# default = ''
#else:
# default = field_structure[4]
#if default == 'null' and field_structure[2] == 'no':
# continue
#print(default, expected_config)
#if (default and 'default ' + default not in expected_config) or (not default and 'default ' in expected_config):
# print(host, table_name, name, 'field-default-mismatch')
#print(expected_config)
for field in expected_table_structure['structure']:
if field not in fields_in_prod:
print(host, table_name, field, 'field-mismatch-codebase-extra')
p = subprocess.Popen(sql_command + ' -h %s -e "SHOW INDEX FROM %s;"' % (host, table_name), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None,shell=True)
(output, _) = p.communicate(input='')
res = output.decode('utf-8')
return_result['indexes'] = {}
indexes = {}
for line in res.split('\n'):
if line.startswith('ERROR'):
return return_result
if not line or line.startswith('Table'):
continue
index_structure = line.lower().split('\t')
if index_structure[2] not in indexes:
indexes[index_structure[2]] = {'unique': index_structure[1] == '0', 'columns': [index_structure[4]]}
else:
indexes[index_structure[2]]['columns'].append(index_structure[4])
return_result['indexes'] = indexes
expected_indexes = expected_table_structure['indexes']
for index in indexes:
#clean up primaries later
if index == 'primary':
continue
if index not in expected_indexes:
if index == 'tmp1':
print('wtf')
print(host, table_name, index, 'index-mismatch-prod-extra')
continue
if indexes[index]['unique'] != expected_indexes[index]['unique']:
print(host, table_name, index, 'index-uniqueness-mismatch')
expected_columns = expected_indexes[index]['columns'].replace(' ', '')
expected_columns = re.sub(r'\(.+?\)', '', expected_columns)
if ','.join(indexes[index]['columns']) != expected_columns:
print(host, table_name, index, 'index-columns-mismatch', ','.join(indexes[index]['columns']) + ' ' + expected_columns)
for index in expected_indexes:
if index not in indexes:
print(host, table_name, index, 'index-mismatch-code-extra')
return return_result
def main():
with open(sys.argv[1], 'r') as f:
sql = f.read()
sql_data = parse_sql(sql)
final_result = {}
hosts = ['localhost']
if '-prod' in sys.argv:
hosts = shard_mapping[sys.argv[3]]
for host in hosts:
final_result[host] = {}
for table in sql_data:
sys.stderr.write(host + ' ' + table + '\n')
sys.stderr.flush()
final_result[host][table] = compare_table_with_prod(host, table, sql_data[table])
time.sleep(2)
for host in hosts:
for table in final_result[hosts[0]]:
if final_result[host][table] != final_result[hosts[0]][table]:
dd(final_result[host][table], final_result[hosts[0]][table], table + ':' + host)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment