Noam Ross noamross

Computational Disease Ecologist, Executive Director of @ropensci, consulting in forecasting, One Health, #RStats + open-source science

noamross / angryscrape.R

Last active January 21, 2021 17:23

In which I scrape WAHIS out of rage

	library(httr)
	library(rvest)
	library(xml2)
	library(tidyverse)

	# Pretend we are a browser
	my_headers = c(Origin="http://www.oie.int",
	`Upgrade-Insecure-Requests`="1",
	`Content-Type`="application/x-www-form-urlencoded",
	`User-Agent`="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",

noamross / get_bat_wiki_pages.R

Last active April 1, 2018 17:39

Get the content of all bat Wikipedia pages

	library(tidyverse)
	library(xml2)
	library(rvest)
	library(WikipediR)
	library(urltools)

	# Get all speceies-level page titles from the Wikipedia list of bats
	bat_titles <- read_html("https://en.wikipedia.org/wiki/List_of_bats") %>%
	html_nodes(xpath="//ul/li[contains(., 'Genus')]/ul/li/a[starts-with(@href, '/wiki/')]") %>%
	xml_attr("href") %>%

noamross / ropensci_editor_workloads.R

Last active April 1, 2018 17:34

Checking editor workloads for rOpenSci onboarding

	library(tidyverse)
	library(gh)
	library(lubridate)

	issues <- gh("/repos/ropensci/onboarding/issues?state=all&labels=package", .limit=1000)

	edits = map_df(issues,
	~data_frame(url = .$html_url,
	editor = .$assignee$login %\|\|% NA_character_,
	opened = as.Date(.$created_at))) %>%

noamross / pandoc-newpage-filter.R

Last active March 22, 2018 10:30

	#!/usr/bin/env Rscript

	json_in <- file('stdin', 'r')

	lat_newp <- '{"t":"RawBlock","c":["latex","\\\\newpage"]}'

	doc_newp <- '{"t":"RawBlock","c":["openxml","<w:p><w:r><w:br w:type=\\"page\\"/></w:r></w:p>"]}'

	ast <- paste(readLines(json_in, warn=FALSE), collapse="\n")

noamross / get_cran_marc_codes.R

Last active July 16, 2021 06:48

	library(desc)
	library(httr)
	library(purrr)
	library(stringi)
	library(methods)
	library(dplyr)
	library(tidyr)
	pkgs <- rownames(available.packages())

	if(!dir.exists("DESCRIPTIONS")) dir.create("DESCRIPTIONS")

noamross / catch_per_unit_effort.R

Last active March 5, 2018 15:13

Catch per unit effort, a riff on https://rud.is/b/2018/03/02/comparing-2017-maine-lobster-landings-to-historical-landings/

	# Noam's take on Bob's lobseter plot

	library(stringi)
	library(pdftools)
	library(hrbrthemes) #devtools::install_github("hrbrmstr/hrbrthemes"); hrbrthemes::import_roboto_condensed()
	library(tidyverse)

	# A different file provided by the state includes fishing effort
	lobster_tbl <- "https://www.maine.gov/dmr/commercial-fishing/landings/documents/lobster.table.pdf"
	lobster_tbl_fil <- basename(lobster_tbl)

noamross / RStudio__unititled_6986DF35

Created March 1, 2018 14:58

# Oh man, I'm sorry, my GITHUB_PAT didn't have 'gist' scope! Carry on.

noamross / RStudio__tt.R

Created March 1, 2018 14:57

	library(googledrive)

	gdoc_to_md <- function(file, path=NULL, markdown="markdown") {
	td <- tempdir()
	gf <- googledrive::drive_download(
	file, path = td,
	type = "application/epub+zip", overwrite = TRUE
	)
	if (is.null(path)) {
	path <- file.path(getwd(), paste0(gf$name, ".md"))

noamross / tt.R

Created March 1, 2018 14:56

	library(googledrive)

	gdoc_to_md <- function(file, path=NULL, markdown="markdown") {
	td <- tempdir()
	gf <- googledrive::drive_download(
	file, path = td,
	type = "application/epub+zip", overwrite = TRUE
	)
	if (is.null(path)) {
	path <- file.path(getwd(), paste0(gf$name, ".md"))

noamross / gh_file.R

Last active December 3, 2020 09:25

Function to download a file from github via API, including large files and private repos

	# TODO (maybe)
	# - Vectorize on URLs
	# - Allow for downloading whole directory contents if path is a directory
	# - Make a recursive = TRUE argument for this case
	# - Error messages/input checking

	#' Gets a file from a github repo, using the Data API blob endpoint
	#'
	#' This avoids the 1MB limit of the content API and uses [gh::gh] to deal with
	#' authorization and such. See https://developer.github.com/v3/git/blobs/