Created
November 21, 2018 15:25
-
-
Save koduki/35e7b6aebd9d7fc3b18d641a9ce40636 to your computer and use it in GitHub Desktop.
UTF8から文字コード変換するときにSJISの範囲外の文字を任意の文字に置き換えるサンプル
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
String str = "a吉野屋𠮷野\r\n屋ア緣イ?"; | |
// byte[] bytes = str.getBytes("Windows-31j"); | |
char alt = '□'; | |
final String charset = "Windows-31j"; | |
String filteredStr = filterLowSurrogate(str); | |
String translatedStr = new String(str.getBytes(charset), Charset.forName(charset)); | |
StringBuilder sb = new StringBuilder(filteredStr.length()); | |
for (int i = 0; i < filteredStr.length(); i++) { | |
char c1 = filteredStr.charAt(i); | |
char c2 = translatedStr.charAt(i); | |
if (c1 == c2) { | |
sb.append(c1); | |
} else { | |
sb.append(alt); | |
} | |
String x1 = Integer.toHexString(c1).toUpperCase(); | |
String x2 = Integer.toHexString(c2).toUpperCase(); | |
System.out.println(x1 + ":" + x2); | |
} | |
String str2 = sb.toString(); | |
System.out.println("---"); | |
System.out.println(str); | |
System.out.println("---"); | |
System.out.println(filteredStr); | |
System.out.println("---"); | |
System.out.println(str2); | |
System.out.println("---"); | |
System.out.println(new String(str2.getBytes(charset), Charset.forName(charset))); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment