Skip to content

Instantly share code, notes, and snippets.

@geotheory
Last active October 3, 2025 11:11
Show Gist options
  • Save geotheory/5af18b19681754a90323fda12807ee89 to your computer and use it in GitHub Desktop.
Save geotheory/5af18b19681754a90323fda12807ee89 to your computer and use it in GitHub Desktop.
emojis-dataframe.R
require(tidyverse)
require(emo)
emojis = readLines('http://www.unicode.org/Public/emoji/13.0/emoji-test.txt') %>%
.[ji_detect(.)] %>%
str_replace_all('[ ]+', ' ') %>%
str_replace('#', ';') %>%
enframe(name = NULL) %>%
separate(value, c('code', 'status', 'desc'), sep = ' ; ') %>%
mutate(unicode = ji_extract_all(desc) %>% map_chr(~ paste(.x, collapse='')),
desc = str_remove(desc, '.*? ')) %>%
separate(desc, c('version', 'desc'), sep = '(?<=[0-9]) ') %>%
separate(desc, c('desc', 'desc_qual'), sep = ': ') %>%
mutate(bytecode = iconv(unicode, from="UTF-8", to="ASCII", "byte"),
label = case_when(desc == 'flag' ~ desc_qual, TRUE ~ desc),
label = str_replace(label, 'telephone receiver', 'telephone')) %>%
select(unicode, label, desc, desc_qual, version, code, bytecode, status)
#> Warning: Expected 2 pieces. Additional pieces discarded in 1 rows [3116].
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1510 rows [1, 2,
#> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
set.seed(3)
print(sample_n(emojis, 15))
#> # A tibble: 15 x 8
#> unicode label desc desc_qual version code bytecode status
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 "\U0001… lobster lobster <NA> E11.0 1F99E <f0><9f><a6><9… fully-…
#> 2 "🇷🇺" Russia flag Russia E0.6 1F1F7 1… <f0><9f><87><b… fully-…
#> 3 "🙆🏼" person… person… medium-lig… E1.0 1F646 1… <f0><9f><99><8… fully-…
#> 4 "👩🏾‍🍳" woman … woman … medium-dar… E4.0 1F469 1… <f0><9f><91><a… fully-…
#> 5 "👩‍👩‍👧‍👦" family family woman, wom… E2.0 1F469 2… <f0><9f><91><a… fully-…
#> 6 "🍇" grapes grapes <NA> E0.6 1F347 <f0><9f><8d><8… fully-…
#> 7 "🚆" train train <NA> E1.0 1F686 <f0><9f><9a><8… fully-…
#> 8 "👨🏼‍🍳" man co… man co… medium-lig… E4.0 1F468 1… <f0><9f><91><a… fully-…
#> 9 "👶" baby baby <NA> E0.6 1F476 <f0><9f><91><b… fully-…
#> 10 "🙇🏻" woman … woman … light skin… E4.0 1F647 1… <f0><9f><99><8… minima…
#> 11 "🙏🏿" folded… folded… dark skin … E1.0 1F64F 1… <f0><9f><99><8… fully-…
#> 12 "🏋🏼" woman … woman … medium-lig… E4.0 1F3CB 1… <f0><9f><8f><8… minima…
#> 13 "🇵🇦" Panama flag Panama E2.0 1F1F5 1… <f0><9f><87><b… fully-…
#> 14 "🇩🇪" Germany flag Germany E0.6 1F1E9 1… <f0><9f><87><a… fully-…
#> 15 "🧑" pilot pilot <NA> E12.1 1F9D1 2… <f0><9f><a7><9… minima…
# example usage
if(require(textclean)){
cat(txt <- "Tomorrow I \U0002708\U000FE0F to \U0001f1e6\U0001f1fa for vacation.")
textclean::replace_emoji(txt, select(emojis, x=bytecode, y=label))
}
#> Loading required package: textclean
#> Tomorrow I ✈️ to 🇦🇺 for vacation.
#> [1] "Tomorrow I airplane to Australia for vacation."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment