Skip to content

Instantly share code, notes, and snippets.

@arbelt
Created October 9, 2014 17:17
Show Gist options
  • Save arbelt/62d2c753adb0e61a5cc1 to your computer and use it in GitHub Desktop.
Save arbelt/62d2c753adb0e61a5cc1 to your computer and use it in GitHub Desktop.
## ----, include=FALSE-----------------------------------------------------
library(knitr)
## ------------------------------------------------------------------------
source("pre_azw.R")
## ------------------------------------------------------------------------
languages = read.csv("languages.csv", header = FALSE, stringsAsFactors = FALSE)[[1]]
## ------------------------------------------------------------------------
lang_regex <- paste(languages, collapse="|") %>% paste("(", ., ")", sep="")
lang_level_regex <- paste("(Elementary|Intermediate|Advanced)\\s+", lang_regex, sep="")
lang_in_title <- grepl(lang_regex, courses$course_title)
lang_level_in_title <- with(courses, grepl(lang_level_regex, course_title) | grepl(lang_level_regex, sub_title))
has_lang_code <- grepl("^\\w+ [A-C][a-z]*\\.", courses$course_title, perl=TRUE)
lang_in_desc <- grepl("language course", courses$course_description, ignore.case = TRUE)
## ------------------------------------------------------------------------
matched <- courses %>% filter(lang_in_desc | has_lang_code | lang_level_in_title)
unmatched <- courses %>% filter(lang_in_title) %>% anti_join(matched)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment