Created
August 15, 2014 21:49
-
-
Save Ironholds/ff3f18b628cf0a0fd1aa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| foo <- function(data_frame, column, regex, ...){ | |
| #Run the regex over the column and add a new column to the object noting whether the value in that row matched or not | |
| data_frame$matched <- grepl(x = data_frame[,column], pattern = regex, ...) | |
| #Return | |
| return(data_frame) | |
| } | |
| #Call. Works like a charm! | |
| foo(data_frame = x, column = "vegetables", regex = "turnips") | |
| #Hmn. Except you need PCRE for certain types of regex. "perl" is a parameter in grepl. | |
| foo(data_frame = x, column = "vegetables", regex = "turnip(s)?", perl = TRUE) | |
| #Some people are assholes. Those people submit data. Asshole-submitted data does not respect CAPS LOCK. | |
| foo(data_frame = x, column = "vegetables", regex = "turnip(s)?", perl = TRUE, ignore.case = TRUE) | |
| #And some people might find it funny to literally insert "turnip(s)?" to try and blow something in the system | |
| foo(data_frame = x, column = "vegetables", regex = "turnip(s)?", perl = TRUE, ignore.case = TRUE, fixed = TRUE) | |
| #And all of these parameters slow it down so we probably want it to ignore multi-byte characters | |
| foo(data_frame = x, column = "vegetables", regex = "turnip(s)?", perl = TRUE, ignore.case = TRUE, fixed = TRUE, useBytes = TRUE) | |
| #"..." allows me to pass whatever arguments I need for a particular regex or a particular object through. The alternative would be defining the function as... | |
| foo <- function(data_frame, column, regex, perl = FALSE, ignore.case = FALSE, fixed = FALSE, useBytes = FALSE){ | |
| #Run the regex over the column and add a new column to the object noting whether the value in that row matched or not | |
| data_frame$matched <- grepl(x = data_frame[,column], pattern = regex, perl = perl, ignore.case = ignore.case, fixed = fixed, useBytes = useBytes) | |
| #Return | |
| return(data_frame) | |
| } | |
| #...and that's just ugly. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment