Skip to content

Instantly share code, notes, and snippets.

@vankesteren
vankesteren / wine_plots.R
Last active April 21, 2021 09:58
Plots for a wine data visualisation
# script that outputs a graph
library(tidyverse)
library(firatheme)
wine <- read_delim("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
delim = ",",
col_names = c(
"Cultivar", "Alcohol", "Malic acid", "Ash", "Alcalinity of ash", "Magnesium",
"Total phenols", "Flavanoids", "Nonflavanoid phenols", "Proanthocyanins",
"Color intensity", "Hue", "OD280/OD315", "Proline"
)
library(tidyverse)
library(broom)
library(huxtable)
library(estimatr)
library(fabricatr)

set.seed(1234)
dat <- fabricate(
  N = 40,
library(tidyverse)
library(scales)
data(diamonds)
diamonds %>%
filter(str_detect(cut, "Fair|Ideal")) %>%
ggplot(aes(price, carat)) +
geom_point(color = "skyblue", alpha = 0.5) +
facet_wrap(~cut, strip.position = "bottom") +
scale_x_continuous(labels = comma) +
@statwonk
statwonk / rootograms.R
Last active September 28, 2019 07:11
A gist that shows how a rootogram helps find that the zero-inflated negative binomial was the data generating mechanism.
# install.packages("countreg", repos="http://R-Forge.R-project.org")
# https://www.fromthebottomoftheheap.net/2016/06/07/rootograms/
# https://channel9.msdn.com/Events/useR-international-R-User-conferences/useR-International-R-User-2017-Conference/countreg-Tools-for-count-data-regression
library(countreg)
rzinbinom(3e3, size = 4, mu = 20, pi = 0.05) -> x
table(x)
hist(x, col = "orange")
rootogram(glm(x ~ 1, family = "poisson")) # zeros under fit
@svmiller
svmiller / 1-dont-bring-boilerplate-centrist-talking-points-to-a-data-fight-claire.R
Created July 31, 2019 16:23
“fReE sTuFf fRoM tHe gOvErnMeNt dOeS nOt pLaY wElL iN tHe mIdWeSt.”
library(tidyverse)
library(stevemisc)
GSS <- readRDS("~/Dropbox/data/gss/GSS_spss-2018/gss7218.rds")
GSS %>%
mutate(regioncondensed = NA,
regioncondensed = ifelse(region == 8 | region == 9, "West", regioncondensed),
regioncondensed = ifelse(region == 3 | region == 4, "Midwest", regioncondensed),
regioncondensed = ifelse(region == 5 | region == 6 | region == 7, "South", regioncondensed),
library(data.table)
library(dplyr)
library(R.utils)
# JUST MASS
mass <- gunzip("arcos-ma-statewide-itemized.tsv.gz")
mass_opioids <- fread(file = 'arcos-ma-statewide-itemized.tsv')
glimpse(mass_opioids) # 2,574,240 observations
summary(mass_opioids)
@benmarwick
benmarwick / gist:8cf22ecb74ac511f8ac1c70aef6038a7
Last active April 29, 2023 21:31
How to make diamond plots after Bergstrom and West (2018) "Why scatter plots suggest causality, and what we can do about it"
# https://arxiv.org/pdf/1809.09328.pdf & https://twitter.com/CT_Bergstrom/status/1035327464644333568
# they use Mathematica, boo! So let's make them with R
# starting with https://stackoverflow.com/q/33396168/1036500
library(ggplot2)
p <- ggplot() +
geom_point(data = anscombe,
library(gganimate) # thomasp85/gganimate
library(cartogram)
library(geogrid) # Need github version jbaileyh/geogrid
library(rnaturalearth)
library(sf)
library(scico)
us <- ne_states('united states of america', returnclass = 'sf')
us <- us[!us$woe_name %in% c('Alaska', 'Hawaii'), ]
us <- st_transform(us, '+proj=eqdc +lat_0=39 +lon_0=-96 +lat_1=33 +lat_2=45 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs')
@backroot
backroot / pandoc_install.sh
Last active February 22, 2023 14:32
installing pandoc on CentOS 7
#!/bin/bash
sudo yum -y install epel-release
sudo yum -y install haskell-platform --enablerepo=epel
cabal update
cabal install pandoc --force-reinstall
@mike-lawrence
mike-lawrence / gp_regression.stan
Last active April 7, 2020 18:15
GP Regression example
functions{
// GP: computes noiseless Gaussian Process
vector GP(real volatility, real amplitude, vector normal01, int n_x, real[] x ) {
matrix[n_x,n_x] cov_mat ;
real amplitude_sq_plus_jitter ;
amplitude_sq_plus_jitter = amplitude^2 + 1e-6 ;
cov_mat = cov_exp_quad(x, amplitude, 1/volatility) ;
for(i in 1:n_x){
cov_mat[i,i] = amplitude_sq_plus_jitter ;
}