Skip to content

Instantly share code, notes, and snippets.

@Black-Milk
Created March 9, 2017 22:02
Show Gist options
  • Save Black-Milk/d1354a0dd2c5e518bfd76a7d968dbf68 to your computer and use it in GitHub Desktop.
Save Black-Milk/d1354a0dd2c5e518bfd76a7d968dbf68 to your computer and use it in GitHub Desktop.
Emoticon Lookup Table in R
library(rvest)
library(magrittr)
library(dplyr)
# reference website
url <- "http://apps.timwhitlock.info/emoji/tables/unicode"
#List of table selectors
table_css_selectors <- list('body > div.container > div > div > table:nth-child(7)', #emoticons
'body > div.container > div > div > table:nth-child(17)', #additional_emoticons
'body > div.container > div > div > table:nth-child(9)', #dingbats
'body > div.container > div > div > table:nth-child(11)', #transport and map symbols
'body > div.container > div > div > table:nth-child(13)', #enclosed characters
'body > div.container > div > div > table:nth-child(15)', #uncategorized
'body > div.container > div > div > table:nth-child(19)', #additional transport and map symbols
'body > div.container > div > div > table:nth-child(21)') #other additional symbols
#Function. Input: webpage url, css_selectors for tables. Output: table data frame
create_table_from_selector <- function(url,selector){
df <- url %>% read_html() %>% html_node(selector) %>% html_table() %>% as.data.frame(stringsAsFactors = FALSE)
names(df) <- make.names(names = names(df), unique = TRUE, allow_ = TRUE)
return(tbl_df(df))
}
#Select Columns from Data Frame
select_columns <- function(table_df, column_names){
table_df %>% select(one_of(column_names))
}
#Rename Columns of Data Frame
rename_columns <- function(table_df, new_column_names){
names(table_df) <- new_column_names
return(table_df)
}
#Piped Approach
emoticon_df_list <- table_css_selectors %>% lapply(create_table_from_selector, url = url) %>%
lapply(select_columns, column_names = c("Native..1.", "Bytes..UTF.8.", "Description")) %>%
lapply(rename_columns, new_column_names = c("Native", "Bytes", "Description"))
#Bind Rows of table dfs in df list
emoticon_df <- bind_rows(emoticon_df_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment