Last active
December 13, 2016 20:56
-
-
Save technickle/67c3cebb687a3b370d0ea3435012b941 to your computer and use it in GitHub Desktop.
Processes and visualizes compatible government service request data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Process and streamgraph-visualize a service request data file compatible with the Open311 GeoReport bulk specification. | |
# by @technickle. | |
# This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. | |
# (see https://creativecommons.org/licenses/by-sa/4.0/ for more) | |
# note: you may have issues installing the streamgraph library. | |
# For help, see comments at https://gist.github.com/technickle/67c3cebb687a3b370d0ea3435012b941 | |
library(readr) | |
library(dplyr) | |
library(streamgraph) | |
# filename to import; configure your file/path here | |
ImportFileName <- "YOUR_PATH_AND_FILENAME_HERE" | |
# ImportFileName <- "~/DevProjects/Open311-bulk-cruncher/Bloomington311-20161115.csv" | |
# ImportFileName <- "~/DevProjects/Open311-bulk-cruncher/Louisville311-20161114.csv" | |
# if you are loading data from Louisville, uncomment the line below: | |
#PatchLouisville <- TRUE | |
# Load the source data file | |
Bulk311_Imported <- read_csv(ImportFileName, col_types = cols(service_request_id = col_character())) | |
# patch for Louisville, their dates aren't in ISO8601 so read_csv doesn't convert to date | |
if(PatchLouisville == TRUE) | |
{ | |
Bulk311_Imported %>% | |
transmute(requested_datetime = parse_datetime(requested_datetime, format = "%Y-%m-%d %H:%M:%S")) | |
} | |
# since this can be running on an arbitrary date, get a date for the end of the previous month | |
range_end <- as.Date(paste(format(Sys.Date(), "%Y-%m"), "-01", sep="")) - 1 | |
range_start <- range_end - 365 | |
# grab only the requested_datetime and service_name columns; | |
# truncate requested_datetime so it only has dates | |
# filter for the last year of data, remove any "future" records, | |
Bulk311_Last365Days <- Bulk311_Imported %>% | |
select(requested_datetime, service_name) %>% | |
mutate_each(requested_datetime, funs = "as.Date") %>% | |
filter(requested_datetime >= range_start & requested_datetime <= range_end) %>% | |
mutate(year_month=format(requested_datetime, "%Y-%B"), day=format(requested_datetime, "%d")) %>% | |
mutate(first_of_month = as.Date(paste(year_month, "-01", sep=""),"%Y-%B-%d")) | |
# get the top 10 most frequent service_names based upon entire year of data | |
Bulk311_TopTenServiceNames <- Bulk311_Last365Days %>% | |
select(service_name) %>% | |
group_by(service_name) %>% | |
tally(sort=TRUE) %>% | |
top_n(10) | |
# filter out last year's data so it only includes the top 10 most frequent services | |
Bulk311_Last365Days_of_TopTenServiceNames <- Bulk311_Last365Days %>% | |
filter(service_name %in% Bulk311_TopTenServiceNames$service_name) | |
# aggregate last year's data by month and service name and count them up | |
Bulk311_MonthAggregation_of_TopTenServices <- Bulk311_Last365Days_of_TopTenServiceNames %>% | |
group_by(service_name,first_of_month) %>% | |
tally() | |
# generate the graph! the last line suppresses the Y-axis labels | |
Bulk311_MonthAggregation_of_TopTenServices %>% | |
streamgraph("service_name","n","first_of_month", offset="zero") %>% | |
sg_axis_x(tick_format = "%b") %>% | |
sg_axis_y(tick_count = 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Had an issue where the streamgraph package wasn't installing. The following devtools workaround seemed to fix it.
#Streamgraph doesn't always install correctly. So you have to do a devtools dance to get it to work. library(devtools)
#Was having an issue installing streamgraph because the zoo package was not installed. Installing the zoo package via package manager resolved that issue, and then the following devtools function worked fine.
devtools::install_github("hrbrmstr/streamgraph")
#load remaining packages
library(readr)
library(dplyr)
library(streamgraph)
library(RSocrata)