Skip to content

Instantly share code, notes, and snippets.

View MichaelChirico's full-sized avatar

Michael Chirico MichaelChirico

View GitHub Profile
@MichaelChirico
MichaelChirico / ca_santa_clara_covid_map.R
Created February 25, 2021 04:07
Generate the travel radius around California Santa Clara County for COVID restrictions
library(data.table)
library(sp)
library(rgdal)
library(rgeos)
# Via CA GIS Data site
ca = readOGR("~/Downloads/CA_Counties", "CA_Counties_TIGER2016")
travel_zone = gBuffer(
ca[ca$NAME == "Santa Clara", ],
@MichaelChirico
MichaelChirico / r_squared_1.R
Last active February 25, 2021 00:27
Get an R^2 of 1
library(data.table)
DT = fread("~/Downloads/spotifyclass.csv")
# add new columns. they're great predictors!
DT[ , paste0("V", 1:nrow(DT)) := replicate(.N, rnorm(.N), simplify = FALSE)]
summary(lm(DT$target ~ ., data = DT[ , .SD, .SDcols = patterns("^V")]))$r.squared
@MichaelChirico
MichaelChirico / spelling_bee
Created September 23, 2020 03:28
Cheating on NYT SpellingBee
# caveat -- doesn't have 100% overlap w the dictionary
CENTER=u
LETTERS=${CENTER}cfinot
grep $CENTER /usr/share/dict/words | grep -E "^[$LETTERS]{4,}$"
@MichaelChirico
MichaelChirico / flop_mirror_gif.sh
Last active September 22, 2020 03:44
flop (horizontal flip) + mirror a gif
#/bin/sh
# built on ImageMagick tools via convert; constituent SO answers:
# https://askubuntu.com/a/101527/362864
# https://askubuntu.com/a/1052902/362864
# https://stackoverflow.com/a/20075227/3576984
# https://unix.stackexchange.com/a/24019/112834
# INPUT: foo.gif
# step 0: isolate foo to its own folder
TMPDIR=/tmp/__flop_mirror__
@MichaelChirico
MichaelChirico / xml_read_efficiency.R
Created September 6, 2020 03:59
Efficiency of reading XML documents with relative/absolute addresses
test_xml = '
<div>
<div>
<div>
<div>
<p>1</p>
<p>2</p>
<p>3</p>
</div>
</div>
@MichaelChirico
MichaelChirico / presto_json_return.R
Created July 8, 2020 03:44
presto REST API & output
# NB: not fully reproducible since I've had to censor some stuff / obscure credentialling
library(data.table)
mtcars = cbind(car = rownames(mtcars), mtcars)
mtcars = lapply(mtcars, function(x) if (is.character(x)) sQuote(x, "'") else x)
colnames = toString(names(mtcars))
mtcars$sep = ', '
# \n\t are bells&whistles to make cat(query) look nicer
mtcars$collapse = '),\n\t('
query = sprintf("select * from (values\n\t(%s)\n)\nt(%s)", do.call(paste, mtcars), colnames)
presto_rest_endpoint = file.path('/path/to/rest', 'statement')
@MichaelChirico
MichaelChirico / posix_java_switch.R
Created July 6, 2020 09:57
Translate between Java SimpleDateFormat and POSIX time format
# sources:
# https://prestodb.io/docs/current/functions/datetime.html
# https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html
# NB: when multiple matches are possible (e.g. %h=%I), the higher one is "preferred"
# NB: %r/%T are basically blocked out from being returned Java->Presto by putting
# them at the bottom, as a way of preferring the more verbose "full" form
# NB: the following supported formats don't have an exact equivalent: %w, %x, %v
time_fmt_mapping = fread('posix,java
%Y,yyyy
%y,yy
@MichaelChirico
MichaelChirico / loan_repayments_random_search
Last active May 31, 2020 13:56
A simple simulation of loan repayments
@MichaelChirico
MichaelChirico / psum_benchmark.R
Last active May 23, 2020 19:01
Benchmarking psum & friends
library(data.table)
library(microbenchmark)
get_reduce = function(FUN, ELEMENT) {
function(..., na.rm=FALSE) {
l = list(...)
if (length(l) == 1L && is.list(l[[1L]])) l = l[[1L]]
if (length(l) == 1L && (identical(FUN, `|`) || identical(FUN, `&`))) l[[1L]] = as.logical(l[[1L]])
if (na.rm) {
# TODO: nafill to support complex input, then use nafill here
@MichaelChirico
MichaelChirico / nyt_words.R
Created May 6, 2020 12:22
@NYT_first_said + rtweet + ggrepl
library(data.table)
library(rtweet)
library(ggplot2)
library(ggrepel)
nyt_words = get_timeline(
'NYT_first_said',
n = 900,
exclude_replies = TRUE,
include_rts = FALSE