Last active
March 8, 2017 00:03
-
-
Save omsai/0a66e3b3747b4db847e06d3ee5817a54 to your computer and use it in GitHub Desktop.
Recent Data Carpentry Genomics workshops
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages({ | |
library(tidyverse) | |
library(rvest) # Webscraping | |
library(stringr) | |
}) | |
workshops_url <- read_html("http://www.datacarpentry.org/workshops-past/") | |
workshops <- tibble( | |
date = html_nodes(workshops_url, "td[class='date']") %>% html_text(), | |
name = html_nodes(workshops_url, "td[class='link']") %>% html_text(), | |
url = html_nodes(workshops_url, "td[class='link'] a") %>% html_attr("href") | |
) | |
## Handle missing pages (HTTP 404 error) using `try()`. | |
try_read_html <- function(url) try(read_html(url)) | |
pages <- lapply(workshops$url, try_read_html) | |
is_error <- function(x) inherits(x, "try-error") | |
workshops$page <- pages | |
workshops <- filter(workshops, ! sapply(page, is_error)) | |
## Label official genomics lessons | |
url_grepl <- function(x, pattern, ...) { | |
x %>% | |
html_nodes("a") %>% | |
html_attr("href") %>% | |
grepl(pattern = pattern, ignore.case = TRUE, ...) %>% | |
sum > 0 | |
} | |
official <- mutate(workshops, | |
lesson_intro = sapply( | |
page, url_grepl, "introduction-genomics"), | |
lesson_cloud = sapply( | |
page, url_grepl, "cloud-genomics"), | |
lesson_shell = sapply( | |
page, url_grepl, "shell-genomics"), | |
lesson_wrangling = sapply( | |
page, url_grepl, "wrangling-genomics"), | |
lesson_r = sapply( | |
page, url_grepl, "R-genomics")) | |
## Count how many genomics lessons in these workshops | |
sums <- select(official, starts_with("lesson_")) %>% rowSums %>% as.integer | |
official <- mutate(official, lessons = sums) | |
## Show with at least 1 lesson, sorted by relevance | |
genomics_official <- filter(official, lessons > 0) %>% | |
arrange(-lessons) %>% select(lessons, url) | |
genomics_official |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> genomics_official | |
# A tibble: 13 × 2 | |
lessons url | |
<int> <chr> | |
1 5 https://ryanpeek.github.io/2017-01-22-stanford/ | |
2 5 https://tracykteal.github.io/2016-11-21-genomics-unm/ | |
3 5 https://uio-carpentry.github.io/2016-03-15-Oslo-data-bio/ | |
4 4 https://nwu-eresearch.github.io/2016-09-26-nwu-genomics/ | |
5 4 https://datacarpentry.github.io/2016-05-26-NIH/ | |
6 4 https://iglpdc.github.io/2016-04-11-bu/ | |
7 4 http://www.datacarpentry.org/2016-01-19-sbu/ | |
8 4 http://www.datacarpentry.org/2015-09-22-UCDavis/ | |
9 4 http://www.datacarpentry.org/2015-07-30-ASPB/ | |
10 2 https://vlsci.github.io/datacarpentry_2015-11-23_VLSCI/ | |
11 1 https://markrobinsonuzh.github.io/2016-07-18-zurich/ | |
12 1 https://lmweber.github.io/2016-05-30-Zurich/ | |
13 1 http://www.datacarpentry.org/2015-08-24-ISU/ | |
> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment