Last active
April 17, 2017 19:29
-
-
Save benmarwick/4983803 to your computer and use it in GitHub Desktop.
A collection of (mostly other people's) methods for reproducing Excel's VLOOKUP function in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Methods for doing Excel's VLOOKUP with R | |
# sample data | |
x <- data.frame(id = c(1, 2, 3, 4), name = c('foo', 'bar', 'bob', 'joe')) | |
y <- data.frame(idblah = c(5, 2, 4, 3, 1), sex = c('m', 'f', 'f', 'm', 'm')) | |
z <- data.frame(id = c(1, 2, 3, 4, 5), sex = c('g', 'b', 'b', 'g', 'g')) | |
# function for find a single value | |
vlookup <- function(val, df, col){ | |
df[df[1] == val, col][1] } | |
vlookup(3, x, 2) # bob | |
# | |
lookv <- function(x,ltab,rcol=2) ltab[max(which(ltab[,1]<=x)),rcol] | |
lookv(3, y, 2) # f | |
# find values and merge to data frame | |
# based on https://stat.ethz.ch/pipermail/r-help/2008-March/157899.html | |
data.frame(sex=y$sex, name=x$name[match(y$idblah, x$id)]) | |
# using merge | |
merge(x, y, by.x = 'id', by.y='idblah') # If the matching column(s) have same name (say, 'foo'), then by='foo' is all that is needed. | |
merge(x, y, by.x = 'id', by.y='idblah', all = TRUE) # Keep all records. This is same as all.y = TRUE; all.x = TRUE gives you above results | |
# using sqldf, same process | |
library(sqldf) | |
sqldf("SELECT | |
xa.* | |
, ya.sex | |
FROM | |
x xa | |
INNER JOIN | |
y ya | |
ON xa.id = ya.idblah;") | |
# simplified | |
sqldf("SELECT x.name, y.sex | |
FROM x | |
INNER JOIN y | |
ON x.id = y.idblah") | |
# using plyr, the the join-by column must have the same name in both dfs | |
library(plyr) | |
join(x, z, by = "id") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment