git clone https://gist.github.com/cb33c735c7cf3f3cf8e8.git r-and-sql-demo
cd r-and-sql-demo
pwd
Open RStudio, set working directory to whatever pwd
reported, and open the demo.R
script to follow along.
# 1. Show how to run a shell command from inside R | |
# 2. Refresher on selecting things from a dataframe | |
# 3. Show how to use boolean indexing to extract parts of a dataframe | |
# Run sqlite3 using the survey.db database, send it the "test.sql" | |
# file containing commands to run, | |
# and redirect the output to "out.csv" | |
system("sqlite3 survey.db < test.sql > out.csv") | |
# Now we can read it in to R | |
df <- read.csv('out.csv') | |
# see what we're working with | |
head(df) | |
# first row of df | |
df[1,] | |
# first column of df | |
df[,1] | |
# "quant" column of df | |
df$quant | |
# a big list of TRUE and FALSE. It's exactly as long as df is, and it's | |
# TRUE wherever the quantity is salinity | |
df$quant == 'sal' | |
# We can use that to select things from the dataframe. This is | |
# equivalent to "select * from df where quant = 'sal'" | |
df[df$quant == 'sal',] | |
# same thing, but rad | |
df[df$quant == 'rad',] | |
# how to get all the salinity readings from DR-1? | |
df[(df$name == 'DR-1') & (df$quant == 'sal'), 'reading'] | |
-- turn on CSV output mode | |
.mode csv | |
-- print headers, so importing into R is easier | |
.headers on | |
-- query goes here...this particular query selects the lat, lon, date, site | |
-- name, quantity, and value for all sites that had a valid date. | |
-- | |
SELECT Site.lat, Site.long, Site.name, Visited.dated, Survey.quant, Survey.reading | |
FROM Site | |
JOIN Visited ON Site.name=Visited.site | |
JOIN Survey ON Visited.ident=Survey.taken | |
WHERE Visited.dated IS NOT NULL; |