Skip to content

Instantly share code, notes, and snippets.

@Xachriel
Created March 24, 2013 08:24
Show Gist options
  • Save Xachriel/5231033 to your computer and use it in GitHub Desktop.
Save Xachriel/5231033 to your computer and use it in GitHub Desktop.
Parallel fast.merging
#Multicore (mc) requires snow package as multicore isn't on windows...
require(snow)
mc.fast.merging = function(data.list, nparts, cluster){
if(!is.list(data.list)) stop("data.list isn't a list")
while(length(data.list) != 1){ #Loop until everything is merged
if(length(data.list) > nparts){
starts = seq(1, length(data.list), nparts)
ends = seq(nparts, length(data.list), nparts) #starts and ends are of equal size if length(data.list) divides nparts.
if(length(ends) < length(starts)) ends = c(ends, length(data.list)) #making sure things are even
sections = matrix(c(starts, ends), ncol=2, byrow=FALSE)
sections = apply(sections, 1, list)
}else{
sections = list(c(1, length(data.list)))
}
if(length(sections) !=1){
data.list = parLapply(cluster, sections, function(x, data.list){
if(is.list(x)) x = x[[1]]
#the standard way starts ->
part = data.list[[x[1]]]
for(i in x[1]:x[2]){
part = merge(part, data.list[[i]], all=TRUE, sort=FALSE)
}
#<- standard way ends
return(part)
}, data.list = data.list)
}else{
data.list = lapply(sections, function(x, data.list){
if(is.list(x)) x = x[[1]]
part = data.list[[x[1]]]
for(i in x[1]:x[2]){
part = merge(part, data.list[[i]], all=TRUE, sort=FALSE)
}
return(part)
}, data.list = data.list)
}
}
return(data.list[[1]]) #returning the merged data frame
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment