Skip to content

Instantly share code, notes, and snippets.

@wush978
Last active December 17, 2015 14:39
Show Gist options
  • Save wush978/5626223 to your computer and use it in GitHub Desktop.
Save wush978/5626223 to your computer and use it in GitHub Desktop.
change tb.name to query for generalization
batch_fetch.PostgreSQLConnection <- function(db, query, n, m) {
require(data.table)
start.index <- 0
pb <- txtProgressBar(max = n)
dbSendQuery(db, "START TRANSACTION READ ONLY")
dbSendQuery(db, sprintf('DECLARE res CURSOR FOR %s', query))
res <- dbSendQuery(db, sprintf("fetch %d from res", m))
tb.part <- fetch(res, m)
tb.class <- sapply(tb.part, class)
tb <- list()
for(col.name in names(tb.class)) {
tb[[col.name]] <- vector(tb.class[col.name], n)
}
tb <- as.data.table(tb);gc()
while(nrow(tb.part) > 0) {
for(col.name in names(tb.class)) {
tb[seq(start.index + 1, start.index + nrow(tb.part)), eval(col.name) := tb.part[[col.name]]]
}
start.index <- start.index + nrow(tb.part)
res <- dbSendQuery(db, sprintf("fetch %d from res", m))
tb.part <- fetch(res, m)
setTxtProgressBar(pb, start.index)
}
close(pb)
tb
}
#'@param db DBI connection
#'@param sql character for selection
#'@param n integer total row number
#'@param m integer a tuning parameter: large m will run faster but consume more memory
batch_fetch <- function(db, sql, n, m) {
require(data.table)
res <- dbSendQuery(db, sql)
retval.part <- fetch(res, m)
#-- create retval
retval.class <- sapply(retval.part, class)
retval <- list()
for(name in names(retval.class)) {
retval[[name]] <- vector(retval.class[name], n)
}
retval <- as.data.table(retval)
pb <- txtProgressBar(max = n)
#-- insert
while(nrow(retval.part) > 0) {
index <- seq(from = getTxtProgressBar(pb) + 1, by = 1, length = nrow(retval.part))
for(name in names(retval.class)) {
retval[index, eval(name) := retval.part[[name]]]
}
setTxtProgressBar(pb, index[length(index)])
retval.part <- fetch(res, m)
}
retval
}
#'@description For RSQLite
#'
#'@param db DBI connection
#'@param sql character for insertion
#'@param n integer total row number
#'@param m integer a tuning parameter: large m will run faster but consume more memory
#'@param data bind data
batch_query <- function(db, sql, n, m, data) {
pb <- txtProgressBar(max = n, style=3)
index.start <- getTxtProgressBar(pb) + 1
index.end <- index.start + m - 1
while(index.start <= n) {
if (index.end > n) {
index.end <- n
}
# print(system.time({
dbBeginTransaction(db)
dbSendPreparedQuery(db, sql, bind.data=data[index.start:index.end,])
dbCommit(db)
# }))
setTxtProgressBar(pb, index.end)
index.start <- getTxtProgressBar(pb) + 1
index.end <- index.start + m - 1
}
close(pb)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment