Last active
January 29, 2018 19:18
-
-
Save cbare/8370e56ebe03f1f02a779845c4855346 to your computer and use it in GitHub Desktop.
Travels in the Tidyverse: R code to count reviews by external physicians
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compile stats on external clinical reviews | |
library('RPostgreSQL') | |
library('dplyr') | |
library('lubridate') | |
library('tidyr') | |
# Get external reviews with UTC timestamps from the DB | |
# \copy (select distinct o.observation_id, o.collection_date, o.vendor_id, v.name, reviewed_at_utc, reviewed_by from ops.review r join analysis.observation o on r.observation_id=o.observation_id join analysis.vendor v on o.vendor_id=v.vendor_id where reviewed_at_utc >= '2017-05-01' order by reviewed_at_utc) TO '/Users/cbare/Desktop/external-reviews.tsv' WITH NULL AS ''; | |
# get a table of addresses from user-management | |
env.vars <- Sys.getenv(c('DB_USER', 'DB_PW', 'DB_HOST')) | |
con <- dbConnect(PostgreSQL(), | |
user=env.vars['DB_USER'], | |
password=env.vars['DB_PW'], | |
host=env.vars['DB_HOST'], | |
dbname="user-management") | |
q <- paste( | |
"select u.public_client_id, a.city, a.region, a.postal_code, a.country", | |
"from users.user u", | |
"join users.address a on u.shipping_address_id=a.address_id", | |
"where u.public_client_id not like '99%'", | |
"order by public_client_id;") | |
rs <- dbSendQuery(con, q) | |
addresses <- fetch(rs, n=-1) | |
dbDisconnect(con) | |
# get reviews | |
con <- dbConnect(PostgreSQL(), | |
user=env.vars['DB_USER'], | |
password=env.vars['DB_PW'], | |
host=env.vars['DB_HOST'], | |
dbname="arivale") | |
q <- paste( | |
"select distinct o.observation_id, o.collection_date, c.public_client_id, o.vendor_id, v.name, reviewed_at_utc, reviewed_by", | |
"from ops.review r", | |
"join analysis.observation o on r.observation_id=o.observation_id", | |
"join analysis.client c on o.client_id=c.client_id", | |
"join analysis.vendor v on o.vendor_id=v.vendor_id", | |
"where reviewed_at_utc >= '2017-05-01'", | |
"order by reviewed_at_utc") | |
rs <- dbSendQuery(con, q) | |
df <- fetch(rs, n=-1) | |
dbDisconnect(con) | |
# merge reviews with address info yielding a data.frame with addresses | |
dfwa <- left_join(df, addresses, by='public_client_id') | |
# convert timestamps to pacific time | |
dfwa <- dfwa %>% mutate( reviewed_at_pacific_time=with_tz(as_datetime(reviewed_at_utc), 'America/Los_angeles') ) | |
# trim rows prior to may 1st pacific time | |
dfwa <- dfwa[ dfwa$reviewed_at_pacific_time >= as_datetime('2017-05-01 00:00:00 -0800'), ] | |
# we going to group by date, week or month | |
dfwa <- dfwa %>% mutate( pacific_date=as_date(reviewed_at_pacific_time) ) | |
dfwa <- dfwa %>% mutate( pacific_week=week(pacific_date)+52*(year(pacific_date)-2017) ) | |
dfwa <- dfwa %>% mutate( pacific_month=month(pacific_date)+12*(year(pacific_date)-2017) ) | |
# counts will count reviews performed by each physician on each date for each vendor | |
counts <- dfwa %>% group_by( name, pacific_date, reviewed_by, region ) %>% summarise( count=n() ) %>% spread(name, count, fill=0L) %>% arrange(reviewed_by, region, pacific_date) | |
counts <- dfwa %>% group_by( name, pacific_week, reviewed_by, region ) %>% summarise( count=n() ) %>% spread(name, count, fill=0L) %>% arrange(reviewed_by, pacific_week, region) | |
write.table(counts, '/Users/cbare/Desktop/external-review-counts-by-week.tsv', sep='\t', row.names=F ) | |
counts <- dfwa %>% group_by( name, pacific_month, reviewed_by, region ) %>% summarise( count=n() ) %>% spread(name, count, fill=0L) %>% arrange(reviewed_by, pacific_month, region) | |
write.table(counts, '/Users/cbare/Desktop/external-review-counts-by-month.tsv', sep='\t', row.names=F ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment