Skip to content

Instantly share code, notes, and snippets.

@jmcurran
Last active August 26, 2016 03:40
Show Gist options
  • Save jmcurran/36734b7d62d4995e0b16ee4b2b7b4075 to your computer and use it in GitHub Desktop.
Save jmcurran/36734b7d62d4995e0b16ee4b2b7b4075 to your computer and use it in GitHub Desktop.
Script for converting imperial height measurements (of humans) into millimetres
raw clean ft in mm
"5 5'0 5 0 1524
5' 5'0 5 0 1524
5'5 5'5 5 5 1651
5'7 1/2 5'7 1/2 5 7.5 1714.5
5'7 5'7 5 7 1701.8
5'8.5 5'8.5 5 8.5 1739.9
5'612 5'6 1/2 5 6.5 1689.1
6'0 1/2 6'0 1/2 6 0.5 1841.5
5'4 1/2 ? 5'4 1/2 ? 5 4.5 1638.3
5'6.25 5'6.25 5 6.25 1682.75
4'0 4'0 4 0 1219.2
5-7.5 5'7.5 5 7.5 1714.5
5'91/2\ Lt" 5'9 1/2 5 9.5 1765.3
5'5-7 1/2 5'7 1/2 5 7.5 1714.5
5'7 1/2l 5'7 1/2 5 7.5 1714.5
5'9' 5'9 5 9 1752.6
5'109 5'10 5 10 1778
6'- 6'0 6 0 1828.8
5 ' 5'0 5 0 1524
5'8+ 5'8 5 8 1727.2
5'+ 5'0 5 0 1524
5' 5\ (2)" 5'5 5 5 1651
5'? 5'0 5 0 1524
5 ft. 6in. 5'6 5 6 1676.4
5ft' 7 in. 5'7 5 7 1701.8
5 ft. 8 in 5'8 5 8 1727.2
5 ft 4 in. 5'4 5 4 1625.6
5 ft. 4 in. 5'4' 5 4 1625.6
5'4'5 5'4.5 5 4.5 1638.3
5,' 0 5'0 5 0 1524
5'8 5'8 5 8 1727.2
5'5t 1/4 5'5 1/4 5 5.25 1657.35
5''11 1/2 5'11 1/2 5 11.5 1816.1
6 ' 11/2 6'1 1/2 6 1.5 1866.9
5-8.5 5'8.5 5 8.5 1739.9
6; 1/2 6'0 1/2 6 0.5 1841.5
#crew = read.csv("crewlist.csv", stringsAsFactors = FALSE)
#nrow(crew)
#names(crew)
library(stringr)
im_your_father_luke = function(h){
## there is a lot of code dealing with NAs here because the example data set had them
## it would probably be cleaner if we didn't bother
## clean out a bunch of initial guff
h = gsub("[\"\\-]", "", h)
h = gsub("^ *$", "", h)
h = gsub("^(5|6)'([1-9]|10|11)*([13]\\/[248]).*$", "\\1'\\2 \\3", h)
h = gsub("^5'5(7|9)(.*$)", "5'\\1\\2", h)
## This is the workhorse regexp
pattern = "^ *([1-7]) *(ft[.']?|'{1,2}|,'?|;|\\.)? *([0-9]{1,2})? *(['t]|in\\.*)?([ ]+(1\\/2|([12])\\/3|(1|3)\\/4|([15])\\/6|([1-7])\\/8|1\\/12)|(\\.?[0-9]{1,3}))?[ +\\?`l (2)]*$"
## produces a matrix with 11 columns
## column 2 should have the feet, column 4 should have the inches, columm 6 will have the fraction if there is one, column will have the decimal if there is one
m = str_match(h, pattern)
## helper function to evaluate the fractions
convertFracs = function(x){
if(is.na(x)){
return(NA)
}else{
return(25.4 * eval(parse(text=x)))
}
}
## covert the feet, inches and fraction into millimetres
mm = cbind(as.numeric(m[,2])*12*25.4, as.numeric(m[,4])*25.4 ,
apply(cbind(sapply(m[,6], convertFracs, USE.NAMES = FALSE), sapply(m[,11], convertFracs, USE.NAMES = FALSE)), 1, function(row){
if(all(is.na(row))){
return(NA)
}else if(all(!is.na(row))){
return(-1)
}else if(is.na(row[1]) & !is.na(row[2])){
return(row[2])
}else{
return(row[1])
}
}))
## appropriate columns to get a single figure
toMM = function(row){
if(all(is.na(row))){
return(NA)
}else if(!is.na(row[1]) & is.na(row[2]) & is.na(row[3])){
return(row[1])
}else if(!is.na(row[1]) & !is.na(row[2]) & is.na(row[3])){
return(row[1] + row[2])
}else if(!is.na(row[1]) & is.na(row[2]) & !is.na(row[3])){
return(row[1] + row[3])
}else{
return(sum(row))
}
}
return(data.frame(input = h, output = apply(mm, 1, toMM)))
}
DLMTestCases = read.csv("DLMTest.csv", stringsAsFactors = FALSE)
mine = im_your_father_luke(DLMTestCases$raw)
fails = DLMTestCases[abs(DLMTestCases$mm - mine$output) > 0.001,]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment