Created
January 15, 2015 21:29
-
-
Save djhocking/a9cf393edce6274ec899 to your computer and use it in GitHub Desktop.
R code to derive metrics for all catchments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get list of unique catchments with daymet data in our database | |
drv <- dbDriver("PostgreSQL") | |
con <- dbConnect(drv, dbname=...) | |
# con <- dbConnect(drv, dbname=...) | |
qry <- "SELECT DISTINCT featureid FROM daymet;" | |
result <- dbSendQuery(con, qry) | |
catchments <- fetch(result, n=-1) | |
catchments <- as.character(catchments$featureid) | |
# get daymet data for a subset of catchments | |
# Make the code below into a function then loop through all catchments to create derived metrics in chucks of 100-1000 | |
catchmentod = c("730076", "730077") | |
# retreiveDaymet <- function(catchmentid) { | |
# connect to database source | |
db <- src_postgres(dbname='conte_dev',...) | |
# db <- src_postgres(dbname='conte_dev',...) | |
n.catches <- length(catchmentid) | |
if(n.catches == 1) { | |
tbl_daymet <- tbl(db, 'daymet') %>% | |
dplyr::filter(featureid == catchmentid) %>% | |
dplyr::mutate(airTemp = (tmax + tmin)/2) %>% | |
} else { | |
tbl_daymet <- tbl(db, 'daymet') %>% | |
dplyr::filter(featureid %in% catchmentid) %>% | |
dplyr::mutate(airTemp = (tmax + tmin)/2) %>% | |
} | |
# df$site <- paste(df$agency_name, df$location_name, sep='_') | |
springFallBPs$site <- as.character(springFallBPs$site) | |
# temp hack | |
observedData$site <- as.character(observedData$site) | |
tempData <- left_join(climateData, select(covariateData, -Latitude, -Longitude), by=c('site')) | |
tempData <- left_join(tempData, dplyr::select(.data = observedData, agency, date, AgencyID, site, temp), by = c("site", "date")) | |
tempDataBP <- left_join(tempData, springFallBPs, by=c('site', 'year')) | |
# Clip to syncronized season | |
# temp hack - eventually need to adjust Kyle's code to substitute huc or other mean breakpoint in when NA | |
fullDataSync <- tempDataBP %>% | |
filter(dOY >= finalSpringBP & dOY <= finalFallBP | is.na(finalSpringBP) | is.na(finalFallBP)) %>% | |
filter(dOY >= mean(finalSpringBP, na.rm = T) & dOY <= mean(finalFallBP, na.rm = T)) | |
rm(covariateData) # save some memory | |
# Order by group and date | |
fullDataSync <- fullDataSync[order(fullDataSync$site,fullDataSync$year,fullDataSync$dOY),] | |
# For checking the order of fullDataSync | |
fullDataSync$count <- 1:length(fullDataSync$year) | |
fullDataSync <- fullDataSync[order(fullDataSync$count),] # just to make sure fullDataSync is ordered for the slide function | |
# airTemp | |
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -1, NewVar='airTempLagged1') | |
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -2, NewVar='airTempLagged2') | |
# prcp | |
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -1, NewVar='prcpLagged1') | |
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -2, NewVar='prcpLagged2') | |
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -3, NewVar='prcpLagged3') | |
fullDataSync <- fullDataSync[ , c("agency", "date", "AgencyID", "year", "site", "date", "finalSpringBP", "finalFallBP", "FEATUREID", "HUC4", "HUC8", "HUC12", "temp", "Latitude", "Longitude", "airTemp", "airTempLagged1", "airTempLagged2", "prcp", "prcpLagged1", "prcpLagged2", "prcpLagged3", "dOY", "Forest", "Herbacious", "Agriculture", "Developed", "TotDASqKM", "ReachElevationM", "ImpoundmentsAllSqKM", "HydrologicGroupAB", "SurficialCoarseC", "CONUSWetland", "ReachSlopePCNT", "srad", "dayl", "swe")] # | |
# Standardize and Add Covariates and Indices for Analysis | |
fullDataSyncS <- stdCovs(x = fullDataSync, y = tempDataSync, var.names = var.names) | |
fullDataSyncS <- addInteractions(fullDataSyncS) | |
fullDataSyncS <- indexDeployments(fullDataSyncS, regional = TRUE) | |
#firstObsRowsFull <- createFirstRows(fullDataSyncS) | |
#evalRowsFull <- createEvalRows(fullDataSyncS) | |
# Make predictions | |
fullDataSyncS <- mutate(fullDataSyncS, huc = HUC8) | |
fullDataSyncS <- predictTemp(data = fullDataSyncS, coef.list = coef.list, cov.list = cov.list) | |
# Add predictions back to original (unscaled) dataframe | |
fullDataSync <- left_join(fullDataSync, select(fullDataSyncS, site, date, tempPredicted), by = c("site", "date")) | |
###Derived metrics | |
derived.site.metrics <- deriveMetrics(fullDataSync) | |
write.table(derived.site.metrics, file = 'reports/MADEP/derived_site_metrics.csv', sep = ',', row.names = F) | |
# } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks. This looks like it should work for the question I asked. Unfortunately, I realized that I need this for every catchment in the daymet record rather than every catchment (site) in the observed locations table. I can just reverse the order of the
left_join
but then handle theNA
for site in the predict function (which it should already do). I just changed the order of theleft_join
and added a filter so I can do it in chunks.