Created
September 20, 2021 23:13
-
-
Save ChristopherDavenport/e9dd40b81005f000aeef2e0557524721 to your computer and use it in GitHub Desktop.
Encode Emoji with Custom Embedding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cats.syntax.all._ | |
object EmojiEncoding { | |
private val range_min = 127744 | |
private val range_max = 129782 | |
private val range_min_2 = 126980 | |
private val range_max_2 = 127569 | |
private val range_min_3 = 169 | |
private val range_max_3 = 174 | |
private val range_min_4 = 8205 | |
private val range_max_4 = 12953 | |
def isEmojiCharCode(char_code: Int): Boolean = { | |
if (range_min <= char_code && char_code <= range_max) true | |
else if (range_min_2 <= char_code && char_code <= range_max_2) true | |
else if (range_min_3 <= char_code && char_code <= range_max_3) true | |
else if (range_min_4 <= char_code && char_code <= range_max_4) true | |
else false | |
} | |
def encodeUnicodeFromEmoji(s: String): String = { | |
s.codePoints.toArray.map( i => | |
if (isEmojiCharCode(i)) { | |
"U+" +i.toHexString.toUpperCase() | |
} else new String(Array(i), 0, 1) | |
).mkString | |
} | |
val EmojiUnicode = "U\\+([0-9a-fA-F]{4,5})".r | |
private def codePointForHex(hexString: String): Int = { | |
val i = Integer.parseInt(hexString, 16) | |
i | |
} | |
private def codePointToString(i: Int): String = new String(Array(i), 0, 1) | |
private def encodeEmoji(hexString: String): String = codePointToString(codePointForHex(hexString)) | |
def encodeEmojiFromUCodes(base: String): String = { | |
EmojiUnicode.replaceAllIn(base, {m: scala.util.matching.Regex.Match => | |
encodeEmoji(m.group(1))} | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment