Last active
May 28, 2023 15:14
-
-
Save benigumocom/8fd25b1ada3518fbd05ad2ec5091011a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun dump(data: String) { | |
println(data) | |
val ss = Regex("\\X").findAll(data) | |
.map { match -> match.value } | |
.toList() | |
println(ss) | |
println(ss.size) | |
println() | |
ss.forEach { s -> | |
println("$s ${s.toUtf16EscapeSequence()}") | |
s.codePoints().forEach { cp -> | |
val hcp = "0x%X".format(cp) | |
val c = Character.toChars(cp).joinToString("") | |
println(" ${cp.toUtf16EscapeSequence()} $hcp ($c)") | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun String.toUtf16EscapeSequence(): String { | |
// String.chars() returns IntStream under 0x10000 Int only, | |
// no need to consider utf-16 surrogate pair | |
return this.chars() | |
.asSequence() | |
.joinToString("") { i -> "\\u%04X".format(i) } | |
} | |
fun Int.toUtf16EscapeSequence(): String { | |
val cp = this // code point | |
return Character.toChars(cp).joinToString("") | |
.toUtf16EscapeSequence() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
【Kotlin】絵文字を含む Unicode 文字列の文字数をカウントする方法と文字ごとの構成
👉 https://android.benigumo.com/20230529/kotlin-unicode/