In response to my blog post:
Great analysis! Nicely done! It would be nice to compare the number of commits with the released version of the packages, some packages do a lot of commits for few releases.
I don't know if there's a faster way to get the number of releases for a package, but here's one approach. It takes around 20-30 minutes.
library(tidyverse)
library(scales)
library(pkgsearch)
# Get CRAN package GitHub contributors data from my post:
# https://tidytales.ca/posts/2023-05-10_r-developers-github/
contributors_data <- "https://raw.githubusercontent.com/mccarthy-m-g/tidytales/main/data/2023-05-10_r-developers-github.csv"
contributors_all <- read_csv(contributors_data)
packages_all <- set_names(unique(contributors_all$package))
# {pkgsearch} can get package histories as a data frame where each row is a new
# CRAN release, so we just need the number of rows for a packages history to
# get the total releases.
package_releases <- map_dfr(
packages_all,
\(.x) {
tryCatch(
tibble(releases = nrow(cran_package_history(.x))),
error = function(e) tibble(releases = NA)
)
},
.id = "package"
)
# Get summary statistics for each package
packages_tidy <- contributors_all |>
group_by(package) |>
summarise(
contributors = n(),
commits = sum(n_commits, na.rm = TRUE)
) |>
left_join(package_releases)
ggplot(packages_tidy, aes(x = releases, y = contributors)) +
geom_hex(bins = 30) +
geom_rug(
alpha = 0.01, position = position_jitter(width = .1, height = .1)
) +
scale_x_continuous(
trans = "log2", breaks = breaks_log(n = 7, base = 2)
) +
scale_y_continuous(
trans = "log2", breaks = breaks_log(n = 7, base = 2)
) +
scale_fill_viridis_c(trans = "log10") +
theme(panel.grid.minor = element_blank())
ggplot(packages_tidy, aes(x = releases, y = commits)) +
geom_hex(bins = 30) +
geom_rug(
alpha = 0.01, position = position_jitter(width = .1, height = .1)
) +
scale_x_continuous(
trans = "log2", breaks = breaks_log(n = 7, base = 2)
) +
scale_y_continuous(
trans = "log10", breaks = breaks_log(n = 7, base = 10)
) +
scale_fill_viridis_c(trans = "log2") +
theme(panel.grid.minor = element_blank())
Created on 2023-05-23 with reprex v2.0.2