Created
June 20, 2011 08:32
-
-
Save michaelcontento/1035306 to your computer and use it in GitHub Desktop.
MySQL: Insert utf8 via latin1-connection into utf8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | |
-- First we create the table with charset=utf8 | |
-- | |
mysql> create table utf8 (msg varchar(10)) default charset=utf8; | |
Query OK, 0 rows affected (0.00 sec) | |
-- | |
-- This is out demo-character (remember the hex value!) | |
-- | |
mysql> select hex('ç'); | |
+-----------+ | |
| hex('ç') | | |
+-----------+ | |
| C3A7 | | |
+-----------+ | |
1 row in set (0.00 sec) | |
-- | |
-- Now we set the connection charset to latin1 (mysql default) | |
-- and insert a valid utf8 character | |
-- | |
mysql> set names latin1; | |
Query OK, 0 rows affected (0.00 sec) | |
mysql> insert into utf8 values ('ç'); | |
Query OK, 1 row affected (0.00 sec) | |
mysql> select msg, hex(msg) from utf8; | |
+------+----------+ | |
| msg | hex(msg) | | |
+------+----------+ | |
| ç | C383C2A7 | | |
+------+----------+ | |
1 row in set (0.00 sec) | |
-- | |
-- The value inside the table is broken, but mysql can convert | |
-- it briefly so everything is fine until we change the | |
-- connection charset to utf8 | |
-- | |
mysql> set names utf8; | |
Query OK, 0 rows affected (0.00 sec) | |
mysql> select msg, hex(msg) from utf8; | |
+------+----------+ | |
| msg | hex(msg) | | |
+------+----------+ | |
| ç | C383C2A7 | | |
+------+----------+ | |
1 row in set (0.00 sec) | |
-- | |
-- Wuhu! Now we can see the mess! | |
-- | |
mysql> select convert(cast(convert(msg using latin1) as binary) using utf8) from utf8; | |
+---------------------------------------------------------------+ | |
| convert(cast(convert(msg using latin1) as binary) using utf8) | | |
+---------------------------------------------------------------+ | |
| ç | | |
+---------------------------------------------------------------+ | |
1 row in set (0.00 sec) | |
-- | |
-- Tada! Everthing is fine, because we tricked the mysql (convert mess into | |
-- latin1, treat it as binary and convert it into utf8). And with this we | |
-- can easily fix the mess inside the table. | |
-- | |
mysql> update utf8 set msg = convert(cast(convert(msg using latin1) as binary) using utf8); | |
Query OK, 1 row affected (0.00 sec) | |
Rows matched: 1 Changed: 1 Warnings: 0 | |
mysql> select msg from utf8; | |
+------+ | |
| msg | | |
+------+ | |
| ç | | |
+------+ | |
1 row in set (0.00 sec) | |
-- | |
-- Done! | |
-- | |
-- | |
-- And finally: Let's have fun with ENUM() | |
-- | |
mysql> set names latin1; | |
Query OK, 0 rows affected (0.00 sec) | |
mysql> alter table utf8 add column bla enum('ç'); | |
Query OK, 1 row affected (0.02 sec) | |
Records: 1 Duplicates: 0 Warnings: 0 | |
mysql> truncate utf8; | |
Query OK, 0 rows affected (0.00 sec) | |
mysql> insert into utf8 values ('ç', 'ç'); | |
Query OK, 1 row affected (0.00 sec) | |
mysql> select * from utf8; | |
+------+------+ | |
| msg | bla | | |
+------+------+ | |
| ç | ç | | |
+------+------+ | |
1 row in set (0.00 sec) | |
mysql> show create table utf8; | |
+-------+------------------------------------------------+ | |
| Table | Create Table | |
+-------+------------------------------------------------+ | |
| utf8 | CREATE TABLE `utf8` ( | |
`msg` varchar(10) DEFAULT NULL, | |
`bla` enum('ç') DEFAULT NULL | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8 | | |
+-------+------------------------------------------------+ | |
1 row in set (0.00 sec) | |
-- | |
-- Everything fine until ... | |
-- | |
mysql> set names utf8; | |
Query OK, 0 rows affected (0.00 sec) | |
mysql> select * from utf8; | |
+------+------+ | |
| msg | bla | | |
+------+------+ | |
| ç | ç | | |
+------+------+ | |
1 row in set (0.00 sec) | |
mysql> insert into utf8 values ('ç', 'ç'); | |
Query OK, 1 row affected, 1 warning (0.00 sec) | |
mysql> select * from utf8; | |
+------+------+ | |
| msg | bla | | |
+------+------+ | |
| ç | ç | | |
| ç | | | |
+------+------+ | |
2 rows in set (0.00 sec) | |
mysql> show create table utf8; | |
+-------+------------------------------------------------+ | |
| Table | Create Table | |
+-------+------------------------------------------------+ | |
| utf8 | CREATE TABLE `utf8` ( | |
`msg` varchar(10) DEFAULT NULL, | |
`bla` enum('ç') DEFAULT NULL | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8 | | |
+-------+------------------------------------------------+ | |
1 row in set (0.00 sec) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment