Created
November 19, 2014 20:42
-
-
Save ludo/3676e121fb7e4634b57d to your computer and use it in GitHub Desktop.
Source: @kadkins @ https://github.com/brianmario/mysql2/issues/367. Usage: Copy and paste into utf8mb4helper.rb and alter the settings in top of the script and then run ruby utf8mb4helper.rb > utf8mb4helper.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ruby script to generate ddl statements to convert utf8 to utf8mb4 | |
# you will need ruby, rubygems and the mysql2 gem to run this script | |
# => gem install mysql2 --no-rdoc --no-ri | |
require 'rubygems' | |
require 'mysql2' | |
### settings | |
dbhost='localhost' | |
dbuser='root' | |
dbuserpwd='password' | |
database='testdbname' | |
charset='utf8mb4' | |
collation='utf8mb4_unicode_ci' | |
### these will store the statements | |
no_index_and_data_less_than_191="" | |
alter_tables="" | |
alter_column_sizes="" | |
needs_index_resized="" | |
no_index_but_data_greater_than_191="" | |
has_index_but_less_than_191 = "" | |
### connect to database | |
con = Mysql2::Client.new(:host=>dbhost,:username=>dbuser,:password=>dbuserpwd) | |
# For each table in database | |
tables = con.query("SELECT table_name from information_schema.tables where table_schema = '" + database + "';") | |
tables.each do |h| | |
# check and update columns | |
columns = con.query("SHOW FULL COLUMNS FROM `" + database + "`.`" + h["table_name"] + "`;") | |
columns.each do |c| | |
# if the column has an index, set the max field size to 191 | |
checkindex=con.query("SELECT * FROM information_schema.statistics WHERE table_schema = '#{database}' AND table_name = '#{h["table_name"]}' AND column_name = '#{c["Field"]}';") | |
if c["Type"] =~ /^varchar/ | |
# varchar.. check the size | |
maxsize = con.query("SELECT MAX(LENGTH(`#{c["Field"]}`)) FROM `#{database}`.`#{h["table_name"]}`;") | |
size = maxsize.first.each_value.collect.first || 0 | |
if checkindex.size > 0 | |
# varchar with an index | |
if size > 191 | |
needs_index_resized << "/* ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` varchar(191); */\n" | |
else | |
has_index_but_less_than_191 << "ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` varchar(191);\n" | |
end | |
else # no index | |
utf8 = con.query("SELECT character_set_name FROM information_schema.`COLUMNS` C WHERE table_schema = '#{database}' AND table_name = '#{h["table_name"]}' AND column_name = '#{c["Field"]}' AND character_set_name='utf8';") | |
if utf8.size > 0 # utf8 varchar | |
if size > 191 | |
no_index_but_data_greater_than_191 << "ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` #{c["Type"]} CHARACTER SET '#{charset}' COLLATE '#{collation}';\n" | |
else | |
no_index_and_data_less_than_191 << "ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` #{c["Type"]} CHARACTER SET '#{charset}' COLLATE '#{collation}';\n" | |
end | |
end | |
end | |
end | |
end | |
# check and update table | |
alter_tables << "ALTER TABLE `#{h["table_name"]}` CONVERT TO CHARACTER SET '#{charset}' COLLATE '#{collation}';\n" | |
end | |
puts "use `#{database}`;" | |
puts "/* ############ THESE COLUMNS HAVE INDEXES BUT NO DATA > THAN 191-- MAY NEED TO REBUILD INDEX ############ */" | |
puts has_index_but_less_than_191 | |
puts "\n" | |
puts "/* ############ THESE COLUMNS HAVE INDEXES AND DATA > 191.. ALTERING THEM WOULD TRUNCATE DATA!!! RESIZE INDEX */" | |
puts needs_index_resized | |
puts "\n" | |
puts "/* ############ THESE COLUMNS DO NOT HAVE AN INDEX AND BUT HAVE DATA > 191 -- BE AWARE OF FUTURE INDICES ############ */" | |
puts no_index_but_data_greater_than_191 | |
puts "\n" | |
puts "/* ############ THESE COLUMNS DO NOT HAVE AN INDEX AND CURRENTLY DO NOT HAVE DATA > 191 ############ */" | |
puts no_index_and_data_less_than_191 | |
puts "\n" | |
puts "/* ############ CONVERT TABLES TO UTF8MB4 ############ */" | |
puts alter_tables | |
puts "\n" | |
puts "/* ############ CONVERT DATABASE ########### */" | |
puts "ALTER DATABASE `#{database}` CHARACTER SET = utf8mb4 COLLATE = `#{collation}`;" | |
puts "\n" | |
puts "/* ############ SOME HELPFULL COMMANDS TO RUN BEFORE AND/OR AFTER ########### */" | |
puts "# use `#{database}`; show session variables like 'character%';" | |
puts "# set names utf8mb4; use `#{database}`; show session variables like 'character%';" | |
puts "# select `SOMECOLUMN` from `#{database}` where NOT HEX(`SOMECOLUMN`) REGEXP '^([0-7][0-9A-F])*$'" | |
con.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment