Skip to content

Instantly share code, notes, and snippets.

View aammd's full-sized avatar

Andrew MacDonald aammd

  • Université de Sherbrooke
  • Montreal, Canada
View GitHub Profile
@aammd
aammd / matrix_to_data.frame.R
Last active January 4, 2017 01:31
cantrip to turn a matrix into a data.frame, assuming that the first row of the matrix contains a header row
matrix_to_df_firstline_header <- function(mat){
requireNamespace("purrr")
mat %>%
## cut columns into lists
apply(2, function(s) list(s)) %>%
flatten %>%
map(flatten_chr) %>%
## set names to the first element of the list
{set_names(x = map(., ~ .x[-1]),
@aammd
aammd / is_continuous_categories.R
Created December 5, 2016 14:16
are a vector of range edges, separated by _, continuous?
## this function will print "TRUE" if the categories defined by the vector are
## continuous, e.g. c("1500_20000","20000_NA","0_1500" ) `
is_continuous_categories <- function(cat_vector){
cat_range <- cat_vector %>%
str_split("_") %>%
transpose %>%
map(unlist) %>%
{c(invoke(setdiff, .), invoke(setdiff, rev(.)))}
identical(cat_range, c("0", "NA"))
library(dplyr)
library(tidyr)
words_some_missing <- data_frame(one_word = c(NA, "M", NA),
second_word = c("F", NA, NA))
words_some_missing %>%
unite(sex, one_word, second_word)
## that sticks the NAs together, which I think is something you dont want? In
library(map)
library(rucrdtw)
map(list(first = 33, second = 19, third = 7), ~ ucred_mv(synthetic_control[-.x,], synthetic_control[.x,], byrow= TRUE)) %>% map_dbl("distance") %>% tibble::enframe(.)
# consider also adding this to that shiny app
microbenchmark::microbenchmark({lm(ys~xs, data = testdf)$residuals},
{fastLm(ys~xs, data = testdf)$residuals},
{homemade_residuals(indeps, deps)}, times = 500)
identical(homemade_residuals(indeps, deps), homemade_residuals2(indeps, deps))
# all.equal()
testdf <- data_frame(xs = runif(200, 0, 15),
ys = xs * 5 + 15 + rnorm(200, sd = 3))
@aammd
aammd / simple_tree_graph.R
Last active January 25, 2017 12:26
i just want a simple tree tho
library(dplyr)
library(igraph)
library(ggplot2)
library(ggraph)
ed <- frame_data(
~from, ~to,
"a", "b",
"a", "c",
"c", "e"
@aammd
aammd / fake.R
Created January 26, 2017 15:34
fake exponential growth with error
library(dplyr)
library(ggplot2)
data_frame(x = seq(2001, 2016, by = 2),
y = exp(rnorm(length(x), mean = 0.3, sd = 0.03)*(x - 2000))) %>%
ggplot(aes(x = x, y = y)) + geom_point() + geom_line() +
labs(x = "Year",
y = "Cumulative number of papers",
title = "Number of papers with a 'Number of papers' figure") +
theme_bw()
@aammd
aammd / reshaping.R
Created February 24, 2017 16:26
if you have species names in one column
library(dplyr)
library(tidyr)
library(stringr)
too_wide <- frame_data(
~common_name, ~plant_genera,
"Abagrotis apposita", "Amelanchier Arbutus Cenanothus",
"Abagrotis brunipennis", "Prunus Vaccinium"
)
@aammd
aammd / hosts.awk
Last active March 8, 2017 18:02
no longer needful as a script to clean the data
BEGIN {
fam = 0
parasite_genus = 0
host_genus = 0
parasite_species = 0
print "Host_fam\tHost_genus\tHost_sp\tParasite_genus\tParasite_sp\tk_i\tLocation\tParasite_fam"
}
/^[A-Z]/ {fam = $1}
/^\t[A-Z]/ {
@aammd
aammd / gist:f566a3eafc6ee293d234b32812f3b2bb
Created March 10, 2017 09:30
plotting with a function factory
rmax <- function(rm, baserate){
force(rm)
force(baserate)
function(x) {
rm * x / (
(rm / baserate) + x
)
}
}