Created
December 9, 2014 13:36
-
-
Save raindev/440f99ce98c056e19068 to your computer and use it in GitHub Desktop.
Wrong charset (CP1251 encoded as UTF-8) data loss
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package test; | |
| import org.junit.Test; | |
| import java.io.UnsupportedEncodingException; | |
| import java.util.Arrays; | |
| import static org.junit.Assert.assertNotEquals; | |
| public class EncodingTest { | |
| @Test | |
| public void dataLoss() throws UnsupportedEncodingException { | |
| String utf8 = "іиы"; | |
| System.out.println( Arrays.toString( utf8.getBytes( "utf-8" ) ) ); | |
| System.out.println( Arrays.toString( utf8.getBytes( "cp1251" ) ) ); | |
| String cp1251 = new String( utf8.getBytes( "cp1251" ), "utf-8" ); | |
| System.out.println( Arrays.toString( cp1251.getBytes( "utf-8" ) ) ); | |
| System.out.println( Arrays.toString( cp1251.getBytes( "cp1251" ) ) ); | |
| String original = new String( cp1251.getBytes( "cp1251" ), "utf-8" ); | |
| System.out.println( Arrays.toString( original.getBytes( "utf-8" ) ) ); | |
| System.out.println( Arrays.toString( original.getBytes( "cp1251" ) ) ); | |
| assertNotEquals( utf8, original ); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment