Created
January 12, 2017 23:08
-
-
Save seanjtaylor/04fe14b3bdc39dff1fb737eef22b8ea6 to your computer and use it in GitHub Desktop.
Download a list of page views for Wikipedia back to 2008.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(RJSONIO) | |
get_page_visits <- function(page) { | |
past <- expand.grid(year = 2008:2015, month = 1:12) %>% | |
mutate(url = paste('http://stats.grok.se/json/en', as.character(year * 100 + month), page, sep = '/')) %>% | |
group_by(url) %>% | |
do({ | |
a <- RJSONIO::fromJSON(.$url)$daily_views | |
data_frame(ds = names(a), views = a) | |
}) | |
recent.url <- paste('https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user', page, 'daily', '2016010100/2017011100', sep = '/') | |
recent <- RJSONIO::fromJSON(recent.url) | |
recent <- bind_rows(recent$items) | |
past %>% | |
ungroup %>% | |
filter(views > 0) %>% | |
mutate(ds = as.Date(ds)) %>% | |
select(ds, views) %>% | |
bind_rows(recent %>% mutate(ds = parse_date(timestamp, '%Y%m%d00')) %>% select(ds, views)) %>% | |
arrange(ds) %>% | |
filter(!is.na(ds)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment