Skip to content

Instantly share code, notes, and snippets.

@raindev
Created December 9, 2014 13:36
Show Gist options
  • Select an option

  • Save raindev/440f99ce98c056e19068 to your computer and use it in GitHub Desktop.

Select an option

Save raindev/440f99ce98c056e19068 to your computer and use it in GitHub Desktop.
Wrong charset (CP1251 encoded as UTF-8) data loss
package test;
import org.junit.Test;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import static org.junit.Assert.assertNotEquals;
public class EncodingTest {
@Test
public void dataLoss() throws UnsupportedEncodingException {
String utf8 = "іиы";
System.out.println( Arrays.toString( utf8.getBytes( "utf-8" ) ) );
System.out.println( Arrays.toString( utf8.getBytes( "cp1251" ) ) );
String cp1251 = new String( utf8.getBytes( "cp1251" ), "utf-8" );
System.out.println( Arrays.toString( cp1251.getBytes( "utf-8" ) ) );
System.out.println( Arrays.toString( cp1251.getBytes( "cp1251" ) ) );
String original = new String( cp1251.getBytes( "cp1251" ), "utf-8" );
System.out.println( Arrays.toString( original.getBytes( "utf-8" ) ) );
System.out.println( Arrays.toString( original.getBytes( "cp1251" ) ) );
assertNotEquals( utf8, original );
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment