Skip to content

Instantly share code, notes, and snippets.

View geotheory's full-sized avatar

Robin Edwards geotheory

View GitHub Profile
require(tidyverse)
mm_ratio = function(x) mean(x) / median(x)
x = map(10^seq(2,6,length.out = 200), ~ list(unif = runif(.x), exp = rexp(.x), log = rlnorm(.x)))
d = x %>% map_df(~ tibble(n = length(.x$unif),
unif = mm_ratio(.x$unif),
exp = mm_ratio(.x$exp),
log = mm_ratio(.x$log)))
require(robotstxt)
#> Loading required package: robotstxt
rt = robotstxt::get_robotstxt('bbc.com')
paths_allowed('https://bbc.com/notexists', rt_robotstxt_http_getter = rt)
#> bbc.com
#>
#> [1] TRUE
@geotheory
geotheory / hex-words.txt
Last active October 3, 2025 11:07
hex-words.txt
#acad1a
#a55151
#be551e
#b0bb1e
#ca551e
#debb1e
#0de55a
#aba5ed
#acac1a
#accede
library(bit64)
require(tidyverse)
twepoch <- as.integer64('1288834974657')
base <- as.integer64(2)
datacenter_id_bits <- 5
worker_id_bits <- 5
sequence_id_bits <- 12
max_datacenter_id <- 1 * base^datacenter_id_bits
max_worker_id <- 1 * base^worker_id_bits
require(R6)
# depends: {purrr}, {tibble}, {tidy} for graph conversion
Step = R6Class("Step", list(
id = NA,
stage = NA,
state = NA,
parent = NA,
children = list(),
open = TRUE,
c1 = as.matrix(readr::read_csv("comp,x1,x2,y1,y2
1,0,1,2,2
2,0,1,1,1
3,0,1,2,1
4,0,1,1,2
5,0,1,2,2
5,0,1,1,2
6,1,1,1,2
7,1,1,1,2
We can't make this file beautiful and searchable because it's too large.
"user_id","status_id","created_at","screen_name","text","source","display_text_width","reply_to_status_id","reply_to_user_id","reply_to_screen_name","is_quote","is_retweet","favorite_count","retweet_count","quote_count","reply_count","hashtags","symbols","urls_url","urls_t.co","urls_expanded_url","media_url","media_t.co","media_expanded_url","media_type","ext_media_url","ext_media_t.co","ext_media_expanded_url","ext_media_type","mentions_user_id","mentions_screen_name","lang","quoted_status_id","quoted_text","quoted_created_at","quoted_source","quoted_favorite_count","quoted_retweet_count","quoted_user_id","quoted_screen_name","quoted_name","quoted_followers_count","quoted_friends_count","quoted_statuses_count","quoted_location","quoted_description","quoted_verified","retweet_status_id","retweet_text","retweet_created_at","retweet_source","retweet_favorite_count","retweet_retweet_count","retweet_user_id","retweet_screen_name","retweet_name","retweet_followers_count","retweet_friends_count","retweet_statuses_c
@geotheory
geotheory / recursive_split
Created November 1, 2020 01:36 — forked from zachary-waller/recursive_split
Recursive Split: Split a dataframe into a nested list and reassemble back into a dataframe
library(purrr)
library(rrapply)
# Split a data frame into a nested list using a different column for each level.
# This uses rrapply::rrapply() to avoid having to do any nested loops (map, lapply,
# for loop, whatever).
# Nested lists can be useful for avoiding searching through your data: the data
# has already been indexed in the list. This can be pretty handy for saving time
# if you need to do lots of filtering stuff.
@geotheory
geotheory / postgresql-datetime-histogram.R
Last active October 3, 2025 11:09
postgresql-datetime-histogram.R
require(tidyverse)
#> Loading required package: tidyverse
require(DBI)
#> Loading required package: DBI
require(RPostgres)
#> Loading required package: RPostgres
con = dbConnect(RPostgres::Postgres(), dbname = "xxx")
# toy dataset with numeric and datetime fields
@geotheory
geotheory / PostgreSQL-histogram.R
Last active October 28, 2020 23:42
Value binning and histogram implementation in PostgresSQL/SQL with R comparison
require(tidyverse)
require(DBI)
require(RPostgres)
con = dbConnect(RPostgres::Postgres(), dbname = "xxx")
dbWriteTable(con, "diamonds", diamonds, row.names=FALSE, append=FALSE)
sql_method = dbGetQuery(con, "
with price_stats as (
select min(price) as min,