Created
March 9, 2017 22:02
-
-
Save Black-Milk/d1354a0dd2c5e518bfd76a7d968dbf68 to your computer and use it in GitHub Desktop.
Emoticon Lookup Table in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(magrittr) | |
library(dplyr) | |
# reference website | |
url <- "http://apps.timwhitlock.info/emoji/tables/unicode" | |
#List of table selectors | |
table_css_selectors <- list('body > div.container > div > div > table:nth-child(7)', #emoticons | |
'body > div.container > div > div > table:nth-child(17)', #additional_emoticons | |
'body > div.container > div > div > table:nth-child(9)', #dingbats | |
'body > div.container > div > div > table:nth-child(11)', #transport and map symbols | |
'body > div.container > div > div > table:nth-child(13)', #enclosed characters | |
'body > div.container > div > div > table:nth-child(15)', #uncategorized | |
'body > div.container > div > div > table:nth-child(19)', #additional transport and map symbols | |
'body > div.container > div > div > table:nth-child(21)') #other additional symbols | |
#Function. Input: webpage url, css_selectors for tables. Output: table data frame | |
create_table_from_selector <- function(url,selector){ | |
df <- url %>% read_html() %>% html_node(selector) %>% html_table() %>% as.data.frame(stringsAsFactors = FALSE) | |
names(df) <- make.names(names = names(df), unique = TRUE, allow_ = TRUE) | |
return(tbl_df(df)) | |
} | |
#Select Columns from Data Frame | |
select_columns <- function(table_df, column_names){ | |
table_df %>% select(one_of(column_names)) | |
} | |
#Rename Columns of Data Frame | |
rename_columns <- function(table_df, new_column_names){ | |
names(table_df) <- new_column_names | |
return(table_df) | |
} | |
#Piped Approach | |
emoticon_df_list <- table_css_selectors %>% lapply(create_table_from_selector, url = url) %>% | |
lapply(select_columns, column_names = c("Native..1.", "Bytes..UTF.8.", "Description")) %>% | |
lapply(rename_columns, new_column_names = c("Native", "Bytes", "Description")) | |
#Bind Rows of table dfs in df list | |
emoticon_df <- bind_rows(emoticon_df_list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment