djhocking · August 29, 2015 14:13 · walkerjeffd · Jan 13, 2015 · walkerjeffd · Jan 13, 2015
diff --git a/gistfile1.r b/gistfile1.r
 # fetch temperature data
 tbl_values <- left_join(tbl_series,
                        select(tbl_variables, variable_id, variable_name),
                        by=c('variable_id'='variable_id')) %>%
  select(-file_id) %>%
  filter(location_id %in% df_locations$location_id,
         variable_name=="TEMP") %>%
  left_join(tbl_values,
            by=c('series_id'='series_id')) %>%
  left_join(select(tbl_locations, location_id, location_name, latitude, longitude, featureid=catchment_id),
            by=c('location_id'='location_id')) %>%
  left_join(tbl_agencies,
            by=c('agency_id'='agency_id')) %>%
  mutate(year = date_part('year', datetime))

 df_values <- collect(tbl_values) 
 df_values <- df_values %>%
  mutate(datetime=with_tz(datetime, tzone='EST'))
 summary(df_values)

 # create climateData input dataset (too big without pre-filter or smaller join)

 # tried to do the year filter within the postgres tbl query but was getting errors with recognizing
 climate <- tbl_daymet %>%
  mutate(year = date_part('year', date)) %>%
  filter(featureid %in% df_locations$featureid)

 tbl_climate <- climate %>%
  filter(year %in% unique(df_values$year)) # distinct() doesn't work on numeric values

 climateData <- collect(tbl_climate)
	# fetch temperature data
	tbl_values <- left_join(tbl_series,
	select(tbl_variables, variable_id, variable_name),
	by=c('variable_id'='variable_id')) %>%
	select(-file_id) %>%
	filter(location_id %in% df_locations$location_id,
	variable_name=="TEMP") %>%
	left_join(tbl_values,
	by=c('series_id'='series_id')) %>%
	left_join(select(tbl_locations, location_id, location_name, latitude, longitude, featureid=catchment_id),
	by=c('location_id'='location_id')) %>%
	left_join(tbl_agencies,
	by=c('agency_id'='agency_id')) %>%
	mutate(year = date_part('year', datetime))

	df_values <- collect(tbl_values)
	df_values <- df_values %>%
	mutate(datetime=with_tz(datetime, tzone='EST'))
	summary(df_values)

	# create climateData input dataset (too big without pre-filter or smaller join)

	# tried to do the year filter within the postgres tbl query but was getting errors with recognizing
	climate <- tbl_daymet %>%
	mutate(year = date_part('year', date)) %>%
	filter(featureid %in% df_locations$featureid)

	tbl_climate <- climate %>%
	filter(year %in% unique(df_values$year)) # distinct() doesn't work on numeric values

	climateData <- collect(tbl_climate)