title | output | ||||
---|---|---|---|---|---|
Simple SQL and dplyr |
|
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::is_null() masks testthat::is_null()
## x dplyr::lag() masks stats::lag()
## x dplyr::matches() masks testthat::matches()
library(tidyhydat)
library(DBI)
conn <- hy_src()
Basic select a tbl
tbl(conn, "DLY_FLOWS") %>%
show_query()
## <SQL>
## SELECT *
## FROM `DLY_FLOWS`
Now select a table and filter
tbl(conn, "DLY_FLOWS") %>%
filter(STATION_NUMBER == "08MF005") %>%
show_query()
## <SQL>
## SELECT *
## FROM `DLY_FLOWS`
## WHERE (`STATION_NUMBER` = '08MF005')
Two filters
tbl(conn, "DLY_FLOWS") %>%
filter(STATION_NUMBER == "08MF005" & YEAR > 2013) %>%
show_query()
## <SQL>
## SELECT *
## FROM `DLY_FLOWS`
## WHERE (`STATION_NUMBER` = '08MF005' AND `YEAR` > 2013.0)
Let's create a group and take the mean of year
tbl(conn, "DLY_FLOWS") %>%
group_by(STATION_NUMBER) %>%
summarise(year_mean = mean(YEAR, na.rm = TRUE)) %>%
show_query()
## <SQL>
## SELECT `STATION_NUMBER`, AVG(`YEAR`) AS `year_mean`
## FROM `DLY_FLOWS`
## GROUP BY `STATION_NUMBER`
How selecting just two columns from a table
tbl(conn, "DLY_FLOWS") %>%
select(STATION_NUMBER, YEAR) %>%
show_query()
## <SQL>
## SELECT `STATION_NUMBER`, `YEAR`
## FROM `DLY_FLOWS`
Mutating a column
tbl(conn, "DLY_FLOWS") %>%
select(YEAR) %>%
mutate(Year_plus_one = YEAR + 1) %>%
show_query()
## <SQL>
## SELECT `YEAR`, `YEAR` + 1.0 AS `Year_plus_one`
## FROM (SELECT `YEAR`
## FROM `DLY_FLOWS`)
Let's try a join
tbl(conn, "STN_DATUM_CONVERSION") %>%
select(STATION_NUMBER, cf = CONVERSION_FACTOR) %>%
left_join(tbl(conn, "SED_SAMPLES") %>%
select(STATION_NUMBER, fl = FLOW),
by = c("STATION_NUMBER")) %>%
show_query()
## <SQL>
## SELECT `TBL_LEFT`.`STATION_NUMBER` AS `STATION_NUMBER`, `TBL_LEFT`.`cf` AS `cf`, `TBL_RIGHT`.`fl` AS `fl`
## FROM (SELECT `STATION_NUMBER`, `CONVERSION_FACTOR` AS `cf`
## FROM `STN_DATUM_CONVERSION`) AS `TBL_LEFT`
## LEFT JOIN (SELECT `STATION_NUMBER`, `FLOW` AS `fl`
## FROM `SED_SAMPLES`) AS `TBL_RIGHT`
## ON (`TBL_LEFT`.`STATION_NUMBER` = `TBL_RIGHT`.`STATION_NUMBER`)
hy_src_disconnect(conn)