Skip to content

Instantly share code, notes, and snippets.

@ChristopherDavenport
Created September 20, 2021 23:13
Show Gist options
  • Save ChristopherDavenport/e9dd40b81005f000aeef2e0557524721 to your computer and use it in GitHub Desktop.
Save ChristopherDavenport/e9dd40b81005f000aeef2e0557524721 to your computer and use it in GitHub Desktop.
Encode Emoji with Custom Embedding
import cats.syntax.all._
object EmojiEncoding {
private val range_min = 127744
private val range_max = 129782
private val range_min_2 = 126980
private val range_max_2 = 127569
private val range_min_3 = 169
private val range_max_3 = 174
private val range_min_4 = 8205
private val range_max_4 = 12953
def isEmojiCharCode(char_code: Int): Boolean = {
if (range_min <= char_code && char_code <= range_max) true
else if (range_min_2 <= char_code && char_code <= range_max_2) true
else if (range_min_3 <= char_code && char_code <= range_max_3) true
else if (range_min_4 <= char_code && char_code <= range_max_4) true
else false
}
def encodeUnicodeFromEmoji(s: String): String = {
s.codePoints.toArray.map( i =>
if (isEmojiCharCode(i)) {
"U+" +i.toHexString.toUpperCase()
} else new String(Array(i), 0, 1)
).mkString
}
val EmojiUnicode = "U\\+([0-9a-fA-F]{4,5})".r
private def codePointForHex(hexString: String): Int = {
val i = Integer.parseInt(hexString, 16)
i
}
private def codePointToString(i: Int): String = new String(Array(i), 0, 1)
private def encodeEmoji(hexString: String): String = codePointToString(codePointForHex(hexString))
def encodeEmojiFromUCodes(base: String): String = {
EmojiUnicode.replaceAllIn(base, {m: scala.util.matching.Regex.Match =>
encodeEmoji(m.group(1))}
)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment