-
-
Save geocodinglife/121472a66c2ac7b9ce13f2122138f2be to your computer and use it in GitHub Desktop.
A ruby script to help automate getting a Rails application out of MySQL's character encoding hell.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The below script attempts to automate the character encoding challenges | |
# encountered with mysql, latin1 and UTF8 | |
# http://www.bluebox.net/news/2009/07/mysql_encoding/ | |
# ============================================================================== | |
# Copyright © 2013 University of Notre Dame | |
# Additional copyright may be held by others, as reflected in the commit history. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
DB_CONFIG = YAML.load_file(Rails.root.join('config/database.yml').to_s)[Rails.env] | |
DB_CONFIG['username'] ||= 'root' | |
MYSQL_PARAMS = "-u#{DB_CONFIG['username']} #{'-p' << DB_CONFIG['password'] if DB_CONFIG['password']} #{'-h' << DB_CONFIG['host'] if DB_CONFIG['host']} #{DB_CONFIG['database']}".freeze | |
def load_new_database | |
sql_filename = Rails.root.join('db/db.sql') | |
system("mysql #{MYSQL_PARAMS} < #{sql_filename}") | |
end | |
def attempt_conversion | |
ActiveRecord::Base.connection.select_all(%(show table status where collation like '%latin1%')).each do |options| | |
table_name = options['Name'] | |
puts "Started Processing\t#{table_name}" | |
FileUtils.mkdir_p(Rails.root.join('tmp/db-conversion')) | |
filename_pre_transform = Rails.root.join("tmp/db-conversion/conductor.#{table_name}.pre.sql").to_s | |
commands = [] | |
commands << %(mysqldump #{MYSQL_PARAMS} --opt --skip-set-charset --default-character-set=latin1 --skip-extended-insert --tables #{table_name} > #{filename_pre_transform}) | |
commands << %(perl -i -pe 's/DEFAULT CHARSET=latin1/DEFAULT CHARSET=utf8/' #{filename_pre_transform}) | |
filename_to_load = filename_pre_transform | |
commands << %(mysql #{MYSQL_PARAMS} < #{filename_to_load}) | |
system(commands.join(';')) | |
puts "Finished Processing\t#{table_name}" | |
end | |
end | |
def verify_character_encoding_at_table_level | |
schema_query = %( | |
SELECT TABLE_SCHEMA, | |
TABLE_NAME, | |
CCSA.CHARACTER_SET_NAME AS DEFAULT_CHAR_SET, | |
COLUMN_NAME, | |
COLUMN_TYPE, | |
C.CHARACTER_SET_NAME | |
FROM information_schema.TABLES AS T | |
JOIN information_schema.COLUMNS AS C USING (TABLE_SCHEMA, TABLE_NAME) | |
JOIN information_schema.COLLATION_CHARACTER_SET_APPLICABILITY AS CCSA | |
ON (T.TABLE_COLLATION = CCSA.COLLATION_NAME) | |
WHERE TABLE_SCHEMA=SCHEMA() | |
AND C.DATA_TYPE IN ('enum', 'varchar', 'char', 'text', 'mediumtext', 'longtext') | |
ORDER BY TABLE_SCHEMA, | |
TABLE_NAME, | |
COLUMN_NAME | |
) | |
ActiveRecord::Base.connection.select_all(schema_query).each do |options| | |
table_name = options['TABLE_NAME'] | |
column_name = options['COLUMN_NAME'] | |
sub_query = "SELECT count(`#{column_name}`) AS count FROM `#{table_name}` WHERE LENGTH(`#{column_name}`) != CHAR_LENGTH(`#{column_name}`);" | |
response = ActiveRecord::Base.connection.select_one(sub_query) | |
if response['count'].to_i > 0 | |
puts "Found `#{table_name}`.`#{column_name}`\t#{response['count']}" | |
convert_double_encoded(table_name, column_name, options['COLUMN_TYPE']) | |
end | |
end | |
end | |
def convert_double_encoded(table_name, column_name, column_type) | |
puts "Processing `#{table_name}`.`#{column_name}`" | |
# normalized_column_type = column_type.sub(/\([^\)]\)/,'').strip | |
temp_table_name = "temp_#{table_name}_#{column_name}" | |
ActiveRecord::Base.connection.execute( | |
"DROP TABLE IF EXISTS `#{temp_table_name}`" | |
) | |
ActiveRecord::Base.connection.execute( | |
"CREATE TABLE `#{temp_table_name}` | |
( | |
SELECT * FROM `#{table_name}` | |
WHERE LENGTH(`#{column_name}`) != CHAR_LENGTH(`#{column_name}`) | |
)" | |
) | |
ActiveRecord::Base.connection.execute( | |
"ALTER TABLE `#{temp_table_name}` | |
MODIFY `#{temp_table_name}`.`#{column_name}` #{column_type} CHARACTER SET latin1" | |
) | |
ActiveRecord::Base.connection.execute( | |
"ALTER TABLE `#{temp_table_name}` | |
MODIFY `#{temp_table_name}`.`#{column_name}` BLOB" | |
) | |
ActiveRecord::Base.connection.execute( | |
"ALTER TABLE `#{temp_table_name}` | |
MODIFY `#{temp_table_name}`.`#{column_name}` #{column_type} CHARACTER SET UTF8" | |
) | |
ActiveRecord::Base.connection.execute( | |
"DELETE FROM `#{temp_table_name}` | |
WHERE LENGTH(`#{column_name}`) = CHAR_LENGTH(`#{column_name}`)" | |
) | |
ActiveRecord::Base.connection.execute( | |
"REPLACE INTO `#{table_name}` (SELECT * FROM `#{temp_table_name}`)" | |
) | |
ActiveRecord::Base.connection.execute( | |
"DROP TABLE IF EXISTS `#{temp_table_name}`" | |
) | |
end | |
# load_new_database | |
attempt_conversion | |
verify_character_encoding_at_table_level |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment