Last active
August 29, 2015 14:18
-
-
Save eibanez/794021626bdbb7061ae3 to your computer and use it in GitHub Desktop.
Debug issue in https://github.com/NREL/rplexos/issues/29
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load contents of test.R first | |
library(RSQLite) | |
library(data.table) | |
library(dplyr) | |
test <- query_test(db, "interval", "Generator", "Generation") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
query_test <- function(db, time, col, prop, columns = "name", time.range = NULL, filter = NULL, phase = 4) { | |
# Open connection | |
thesql <- src_sqlite(db$filename) | |
# Query interval data | |
# Get the table names that store the data | |
if (identical(prop, "*")) { | |
t.name <- db$properties[[1]] | |
} else if(length(prop) == 1L) { | |
t.name <- db$properties[[1]] %>% filter(property == prop) | |
} else { | |
t.name <- db$properties[[1]] %>% filter(property %in% prop) | |
} | |
t.name <- t.name %>% | |
filter(collection == col, phase_id == phase, is_summary == 0) %>% | |
select(collection, property, table_name) %>% | |
mutate(table_name = gsub("data_interval_", "", table_name)) | |
# Get max/min time existing in the table to be queried | |
# In case time table has more time stamps than those in the dataset | |
time.limit <- t.name %>% | |
group_by(collection, property) %>% | |
do( | |
tbl(thesql, .$table_name) %>% | |
filter(phase_id == phase) %>% | |
summarize(time_from = min(time_from), time_to = max(time_to)) %>% | |
collect | |
) | |
min.time.data <- min(time.limit$time_from) | |
max.time.data <- max(time.limit$time_to) | |
message("min.time.data: ", min.time.data) | |
message("max.time.data: ", max.time.data) | |
# Collect time data | |
time.data <- tbl(thesql, "time") %>% | |
filter(phase_id == phase) %>% | |
filter(between(time, min.time.data, max.time.data)) %>% | |
rplexos:::filter_rplexos_time(time.range) %>% | |
select(time) %>% | |
collect | |
write.csv(time.data, "test-timedata.csv", row.names = FALSE) | |
# If time data is empty, return an empty data frame | |
if (nrow(time.data) == 0L) { | |
DBI::dbDisconnect(thesql$con) | |
return(data.frame()) | |
} | |
# Convert into R time-data format | |
time.data$time <- lubridate::ymd_hms(time.data$time) | |
# Get interval data | |
out <- t.name %>% | |
group_by(collection, property) %>% | |
do(tbl(thesql, .$table_name) %>% | |
filter(phase_id == phase) %>% | |
rplexos:::filter_rplexos(filter) %>% | |
rplexos:::filter_rplexos_time(time.range, modified = TRUE) %>% | |
select(-time_to) %>% | |
rename(time = time_from) %>% | |
rplexos:::select_rplexos(columns, add.key = TRUE) %>% | |
collect | |
) %>% | |
ungroup %>% | |
mutate(time = lubridate::ymd_hms(time)) | |
# Mask out data | |
out <- out %>% | |
mutate(name = name %>% factor %>% as.numeric, | |
value = value %>% factor %>% as.numeric) | |
write.csv(out, "test-out.csv", row.names = FALSE) | |
# Expand data | |
# This will be easier when dplyr supports rolling joins | |
out2 <- data.table(out, key = "key,time") | |
cj2 <- CJ(key = unique(out$key), time = time.data$time) | |
write.csv(cj2, "test-cj2.csv", row.names = FALSE) | |
message("key: \t", length(unique(out$key))) | |
message("time:\t", length(time.data$time)) | |
message("time2:\t", length(unique(time.data$time))) | |
message("out2:\t", nrow(out2)) | |
message("cj2: \t", nrow(cj2)) | |
out3 <- out2[cj2, roll = TRUE, allow.cartesian = TRUE] | |
out <- out3 %>% | |
as.data.frame(stringsAsFactors = FALSE) %>% | |
select(-key) | |
write.csv(out3, "test-out3.csv", row.names = FALSE) | |
message("out3:\t", nrow(out3)) | |
# Restore time zone | |
attributes(out$time) <- attributes(time.data$time) | |
# Disconnect database | |
DBI::dbDisconnect(thesql$con) | |
# Return value | |
return(out) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment