arthurgailes’s gists

arthurgailes / sum_wide.R

Created April 3, 2024 18:17

duckdplyr summarizing wide data

	# Load necessary library
	pacman::p_load(
	stringi, data.table, duckplyr, readr, dplyr, collapse, duckdb, dbplyr, bench,
	ggplot2
	)

	# Function to generate a dataframe chunk
	generate_data_chunk <- function(num_rows = 1000, num_cols = 100) {
	# Generate numeric columns
	numeric_cols <- replicate(n = num_cols/2, expr = runif(num_rows, 1, 10000), simplify = FALSE)

arthurgailes / duckplyr_read_csv.R

Created April 3, 2024 16:13

duckplyr csv reading benchmark

	# Load necessary library
	pacman::p_load(
	stringi, data.table, duckplyr, readr, dplyr, collapse, duckdb, dbplyr, bench,
	ggplot2
	)

	# Function to generate a dataframe chunk
	generate_data_chunk <- function(num_rows = 1000, num_cols = 100) {
	# Generate numeric columns
	numeric_cols <- replicate(n = num_cols/2, expr = runif(num_rows, 1, 10000), simplify = FALSE)

arthurgailes / fix_census_2022_blocks.R

Created December 21, 2023 16:28

Fix Census 2022 blocks with tigris

	#' Fix broken Census Connecticut blocks post-2022
	#' See: https://github.com/walkerke/tigris/issues/178
	library(tigris)
	library(sf)
	library(dplyr)
	library(readr)
	library(testthat)

	# load official CT crosswalk
	# missing a few block ids, but tracts are a consistent 1:1 merge so use those

arthurgailes / r_package_downloads.R

Created August 14, 2023 12:57

Quickly show time series for R package downloads

	# quick R script for seeing the trends in downloads for a package
	library(cranlogs)
	library(ggplot2)
	library(collapse)
	library(lubridate)

	package <- "shiny"
	# get downloads for a specific date
	x <- cran_downloads(packages=shiny, from="2015-06-01", to="2023-08-14")
	head(x)

arthurgailes / collapse_sf.R

Created July 28, 2023 13:24

R: collapse and sf

	# list and unlist an sf object
	library(collapse)
	library(sf)
	library(testthat)

	nc <- st_read(system.file("shape/nc.shp", package="sf"))

	nc_list <- rsplit(nc, by = seq_len(nrow(nc)))

	nc_collapse <- unlist2d(nc_list, idcols = FALSE, recursive = FALSE) \|>

arthurgailes / postgres_helpers.R

Created June 7, 2023 13:02

R PostgreSQL Helpers

	# Get all indexes, then drop and add before and after writing to table
	# Speeds up writes
	get_index_table <- function(con, tablename, schemaname) {
	index_df <- dbGetQuery(con, paste0(
	"SELECT indexname, indexdef FROM pg_indexes
	WHERE tablename = '", tablename, "' AND schemaname = '", schemaname, "'"))

	index_df <- subset(index_df, !grepl("[pf]key", indexname))

	index_df$indexname <- paste0(schemaname, ".", index_df$indexname)

arthurgailes / furrr_maps.R

Last active May 14, 2024 16:20 — forked from walkerke/purrr_maps.R

comparing purrr to furrr

	if(!require(pacman)) install.packages('pacman')
	pacman::p_load(tigris, tidycensus, furrr, purrr, tictoc, ggplot2)

	options(tigris_use_cache = FALSE) # make things equal between runs

	state_names <- c(state.name, "District of Columbia")
	names(state_names) <- state_names

	# purrr:
	tictoc::tic()

arthurgailes / census_commute.R

Last active September 24, 2022 01:02

Census Commute Shares in TidyCensus

	# Create census commute share in Portland
	if(!require(pacman)) install.packages('pacman')
	pacman::p_load(dplyr, tidycensus)

	# https://data.census.gov/cedsci/table?q=means%20transportation&g=1600000US4159000&tid=ACSDT1Y2021.B08006
	trans_table <- 'B08006'

	# note, no data for 2020, the 2005 data is corrupt :
	# the non-auto counts are off
	years <- c(2006:2019, 2021)

arthurgailes / .block

Created May 1, 2020 22:37

dc.js interactive example

license: mit

arthurgailes / .block

Last active May 1, 2020 22:37

dc.js interactive example

license: mit

Arthur Gailes arthurgailes