Created
January 1, 2018 18:57
-
-
Save sellorm/b1458b1a6a32a58b62e57aecbde6d43f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# run unique baby names from the babynames package through the sorting hat | |
# used in http://blog.sellorm.com/2017/12/21/command-line-utilities-in-r-pt-4/ | |
# to check the distribution of results | |
# house lookup ------------------------------------------------------------ | |
houses <- c("0" = "Hufflepuff", | |
"1" = "Gryffindor", | |
"2" = "Ravenclaw", | |
"3" = "Slytherin", | |
"4" = "Hufflepuff", | |
"5" = "Gryffindor", | |
"6" = "Ravenclaw", | |
"7" = "Slytherin", | |
"8" = "Hufflepuff", | |
"9" = "Gryffindor", | |
"a" = "Ravenclaw", | |
"b" = "Slytherin", | |
"c" = "Hufflepuff", | |
"d" = "Gryffindor", | |
"e" = "Ravenclaw", | |
"f" = "Slytherin" | |
) | |
# get unique names -------------------------------------------------------- | |
student_names <- unique(babynames::babynames$name) | |
# Get house name ---------------------------------------------------------- | |
get_house <- function(student_name){ | |
name_hash <- digest::sha1(tolower(student_name)) | |
house_index <- substr(name_hash, 1, 1) | |
house <- houses[house_index] | |
return(house) | |
} | |
# main -------------------------------------------------------------------- | |
house_results <- lapply(student_names, get_house) | |
housedf <- data.frame(cbind(house_results), stringsAsFactors = TRUE) | |
dplyr::count(housedf, unlist(housedf$house_results)) |
Thank you very much for the informative survey of the command line arugment paser packages!
re: this code, I think convert hex to decimal and take mod might be less tedious.
# run unique baby names from the babynames package through the sorting hat
# used in http://blog.sellorm.com/2017/12/21/command-line-utilities-in-r-pt-4/
# to check the distribution of results
# house lookup ------------------------------------------------------------
houses <- c("Hufflepuff", "Gryffindor", "Ravenclaw", "Slytherin")
# get unique names --------------------------------------------------------
student_names <- unique(babynames::babynames$name)
# Get house name ----------------------------------------------------------
get_house <- function(student_name){
name_hash <- digest::sha1(tolower(student_name))
house_index <- strtoi(substr(name_hash, 1, 1), base = 16) %% 4 + 1
house <- houses[house_index]
return(house)
}
# main --------------------------------------------------------------------
house_results <- lapply(student_names, get_house)
housedf <- data.frame(cbind(house_results), stringsAsFactors = TRUE)
dplyr::count(housedf, unlist(housedf$house_results))
#> # A tibble: 4 x 2
#> `unlist(housedf$house_results)` n
#> <chr> <int>
#> 1 Gryffindor 23783
#> 2 Hufflepuff 23587
#> 3 Ravenclaw 23913
#> 4 Slytherin 23742
Created on 2018-04-15 by the reprex package (v0.2.0).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My results look like this...