Created
June 11, 2018 19:58
-
-
Save AdamSpannbauer/a3097414d013e6cb0a6b920a9490b33a to your computer and use it in GitHub Desktop.
an R6 class for chunking large sql queries (written for use with SQL Server). this is a translation of https://gist.github.com/AdamSpannbauer/b04c1f6243ce07a5d2e0c9eb78502a55
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(R6) | |
QueryChunker = R6Class('QueryChunker', | |
public = list( | |
query = NULL, | |
chunk_query = NULL, | |
connection = NULL, | |
post_process_func = NULL, | |
chunk_size = 1000L, | |
offset_size = 0, | |
initialize = function(query, | |
connection, | |
post_process_func=NULL, | |
chunk_size = 1000L) { | |
self$query = query | |
self$connection = connection | |
self$post_process_func = post_process_func | |
self$chunk_size = chunk_size | |
# append chunking sql bits to users query | |
self$chunk_query = paste0(query, '\n', | |
'OFFSET {self$offset_size} ROWS\n', | |
'FETCH NEXT {self$chunk_size} ROWS ONLY') | |
}, | |
fetch_chunk = function(debug_print=FALSE) { | |
# fill in offset and chunk size | |
query_i = glue::glue(self$chunk_query) | |
# print generated query for debug purposes | |
if (debug_print) cat(glue::glue('\n\r\n\rQUERY:', | |
'\n-------------\n\r', | |
'{query_i}', | |
'\n-------------\n\r\n\r')) | |
# exec query and store results | |
query_result = DBI::dbGetQuery(conn = self$connection, | |
statement = query_i) | |
# apply post processing func if provided | |
if (is.function(self$post_process_func)) { | |
query_result = self$post_process_func(query_result) | |
} | |
# increment chunk counter | |
self$offset_size = self$offset_size + self$chunk_size | |
return(query_result) | |
} | |
)) | |
# define dbi connection | |
connection_string = "my connection string" | |
con = DBI::dbConnect(drv = odbc::odbc(), | |
.connection_string = connection_string) | |
query = " | |
SELECT * | |
FROM my_table | |
ORDER BY my_column" | |
# init a query chunker | |
query_chunker = QueryChunker$new(query, | |
connection = con, | |
chunk_size = 100L) | |
query_chunker$fetch_chunk() #fetch rows 1-100 | |
query_chunker$fetch_chunk() #fetch rows 101-200 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment