Skip to content

Instantly share code, notes, and snippets.

@meeuw
Created February 26, 2014 19:45
Show Gist options
  • Save meeuw/9236957 to your computer and use it in GitHub Desktop.
Save meeuw/9236957 to your computer and use it in GitHub Desktop.
script to fix a messed up mysql database with invalid collation / charset and invalid use of utf8encode / utf8decode in latin1 tables.
import MySQLdb
import unicodedata
mysql_cred = {'host':'localhost', 'user':'root', 'passwd':'new-password', 'db':'test'}
conn_latin1 = MySQLdb.connect(**mysql_cred)
c_latin1 = conn_latin1.cursor(MySQLdb.cursors.DictCursor)
c_latin1.execute("SELECT id FROM texts")
ids = []
while 1:
row = c_latin1.fetchone()
if not row: break
ids.append(row['id'])
mysql_cred['charset'] = 'utf8'
conn_utf8 = MySQLdb.connect(**mysql_cred)
c_utf8 = conn_utf8.cursor(MySQLdb.cursors.DictCursor)
c_utf8.execute("SELECT id FROM texts")
f = open('output.sql', 'wb')
for id in ids:
c_utf8.execute("SELECT * FROM texts WHERE id = %s", id)
row_utf8 = c_utf8.fetchone()
c_latin1.execute("SELECT * FROM texts WHERE id = %s", id)
row_latin1 = c_latin1.fetchone()
if row_latin1['thevalue'] != '':
thevalues = []
try:
thevalues.append(row_latin1['thevalue'].decode('utf8', 'strict'))
except UnicodeDecodeError: pass
thevalues.append(row_utf8['thevalue'])
best = ''
bestscore = -99999
for thevalue in thevalues:
score = -len(thevalue)
for c in thevalue:
if c == u'?': score -= 1
if bestscore < score:
best = thevalue
bestscore = score
c_utf8.execute("UPDATE texts SET thevalue = %s WHERE id = %s", (best, id))
f.write(c_utf8._last_executed+';\n')
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment