Last active
December 3, 2016 20:27
-
-
Save MattSandy/9112e88b8a7f103233472f5e05d73415 to your computer and use it in GitHub Desktop.
Update and Append Multiple Files in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("~/R/Merge Stuff") | |
install.packages("openxlsx") | |
library("openxlsx") | |
file <- list() | |
#base file is the original you are working from | |
#update file is the file with new information which updates base cells | |
#error file contains information in new columns which are appended | |
file$base <- read.xlsx("base.xlsx") | |
file$update <- read.xlsx("update.xlsx") | |
file$error <- read.xlsx("error.xlsx") | |
#set up empty columns for columns in update file which aren't in base file | |
for(column in names(file$update)[!names(file$update) %in% names(file$base)]) { | |
print(column) | |
file$base[,column] <- NA | |
} | |
#set up empty columns for columns in error file which aren't in base file | |
for(column in names(file$error)[!names(file$error) %in% names(file$base)]) { | |
print(column) | |
file$base[,column] <- NA | |
} | |
#loops through file$base looking for matches by FIRSTNAME and LASTNAME in both datasets | |
for(i in 1:nrow(file$base)) { | |
#look for rows in the update dataset which match current looped row in base dataset | |
match <- file$update[which(file$update$FIRSTNAME==file$base[i,"FIRSTNAME"] & file$update$LASTNAME==file$base[i,"LASTNAME"]),] | |
#more than one row returned in the update file | |
if(nrow(match)>1) { | |
print("Collision in file$update") | |
#print the row which caused collision | |
print(file$base[i,]) | |
} else if(nrow(match)==1) { | |
#update base dataset with column info from update dataset for current row | |
#uses all column names from update dataset | |
file$base[i,names(file$update)] <- match[1,] | |
} | |
#look for rows in the error dataset which match current looped row in base dataset | |
match <- file$error[which(file$error$FIRSTNAME==file$base[i,"FIRSTNAME"] & file$error$LASTNAME==file$base[i,"LASTNAME"]),] | |
#more than one row returned | |
if(nrow(match)>1) { | |
print("Collision in file$error") | |
#print the row which caused collision | |
print(file$base[i,]) | |
} else if(nrow(match)==1) { | |
#append column information from error dataset to base dataset for current row | |
#uses all column names from error dataset which aren't in the update dataset | |
file$base[i,names(file$error)[!names(file$error) %in% names(file$update)]] <- match[1,names(file$error)[!names(file$error) %in% names(file$update)]] | |
} | |
} | |
#export modified base file by creating excel worksheet inside a workbook | |
wb <- createWorkbook() | |
addWorksheet(wb, "Export") | |
writeData(wb, 1, file$base) | |
saveWorkbook(wb, file = "./export.xlsx", overwrite = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment