Skip to content

Instantly share code, notes, and snippets.

@mcanouil
Last active February 2, 2024 23:31
Show Gist options
  • Save mcanouil/6eb80bfead9b68d1c2272afd8c71d072 to your computer and use it in GitHub Desktop.
Save mcanouil/6eb80bfead9b68d1c2272afd8c71d072 to your computer and use it in GitHub Desktop.
R CRAN Packages
# # MIT License
#
# Copyright (c) 2024 Mickaël Canouil
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
## inspired by https://gist.github.com/daroczig/3cf06d6db4be2bbe3368
options(stringsAsFactors = FALSE)
library(rvest)
library(dplyr)
library(data.table)
library(ggplot2)
library(scales)
library(gganimate)
source('https://github.com/mcanouil/DEV/raw/master/R/theme_black.R')
parse_cran <- function(...) {
temp <- file.path("https://cran.rstudio.com/src/contrib", ...) %>%
read_html() %>%
html_node("pre") %>%
html_text() %>%
strsplit(" \n") %>%
.[[1]] %>%
gsub("^ *", "", .) %>%
gsub(" {2,}", "\t", .) %>%
gsub("gz ", "gz\t", .) %>%
strsplit("\t") %>%
(function(x) x[sapply(x, length) == 3])(.) %>%
do.call("rbind.data.frame", .)
if (nrow(temp)==0) {
temp <- data.frame(name = NA, modified = NA, size = NA)
}
setNames(temp, c("name", "modified", "size"))
}
pkgs <- parse_cran() %>%
filter(grepl("K$", size)) %>%
filter(grepl("tar.gz$", name)) %>%
mutate(
modified = as.POSIXct(strftime(modified, format = "%Y-%m-%d %H:%M:%S")),
name = sub("^([a-zA-Z0-9\\.]*).*", "\\1", name)
) %>%
rename(last_modified = modified)
archives <- parse_cran("Archive") %>%
filter(grepl("/$", name)) %>%
filter(!grepl("-", name)) %>%
mutate(
name = sub("/$", "", name),
size = NULL
)
all_pkgs <- full_join(pkgs, archives, by = "name") %>%
arrange(name) %>%
mutate(
date = as.character(last_modified),
versions = as.numeric(!is.na(date)),
archived = name %in% archives$name,
date = ifelse(archived, NA, date)
)
## lookup release date of first version & number of releases
saveRDS(all_pkgs, "/tmp/all_pkgs.rds")
all_pkgs <- as.data.table(all_pkgs)
all_pkgs[is.na(date), c("date", "versions") := {
cat(name, "\n")
pkgarchive <- try(parse_cran("Archive", name), silent = TRUE)
ntry <- 1
while(ntry < 3 & inherits(pkgarchive, "try-error")) {
Sys.sleep(0.5)
pkgarchive <- try(parse_cran("Archive", name), silent = TRUE)
ntry <- ntry + 1
}
if (inherits(pkgarchive, "try-error")) {
list(as.character(date), versions)
} else {
list(as.character(min(pkgarchive$modified)), versions + nrow(pkgarchive))
}
}, by = name]
all_pkgs <- all_pkgs[!is.na(date), ]
setnames(all_pkgs, "date", 'first_release')
setorder(all_pkgs, first_release, name)
all_pkgs[, index := .I]
saveRDS(all_pkgs, "/tmp/all_pkgs.rds")
## Plot
gganimate::animate(
plot = ggplot(all_pkgs, aes(x = as.Date(first_release), y = index)) +
geom_path(size = 1.5, lineend = "round", linejoin = "round", colour = "white") +
scale_x_date(
date_breaks = "2 year",
date_labels = "%Y"
) +
scale_y_continuous(
breaks = seq(0, ceiling(nrow(all_pkgs) / 100) * 100, 2500),
expand = expand_scale(mult = c(0.01, 0.05)),
labels = comma
) +
labs(
x = NULL,
y = NULL,
title = "Number of R packages ever published on CRAN",
caption = paste("Compiled on", Sys.Date())
) +
theme_black() +
theme(panel.grid.minor = element_blank()) +
transition_reveal(along = as.Date(first_release)),
width = 6.3,
height = 4.7,
units = "in",
res = 300,
duration = 3,
bg = theme_get()$plot.background$colour,
renderer = gifski_renderer("number-of-submitted-packages-to-CRAN.gif")
)
@mcanouil
Copy link
Author

mcanouil commented Feb 7, 2020

number-of-submitted-packages-to-CRAN

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment