Skip to content

Instantly share code, notes, and snippets.

@resulumit
Last active March 26, 2022 07:48
Show Gist options
  • Save resulumit/05f5186f2c7abd7efed0e05bc64a358e to your computer and use it in GitHub Desktop.
Save resulumit/05f5186f2c7abd7efed0e05bc64a358e to your computer and use it in GitHub Desktop.
# load libraries -------------------------------------------------------
library(tidyverse)
library(rvest)
library(robotstxt)
# get journal names and links from my website -----------------------------
# get names
journal_names <-
read_html("https://resulumit.com/blog/polisci-turnaround-acceptance/") |>
html_elements(".table-striped a") |>
html_text()
# get links
journal_urls <-
read_html("https://resulumit.com/blog/polisci-turnaround-acceptance/") |>
html_elements(".table-striped a") |>
html_attr("href")
# merge and extract journal id
journals_df <- data.frame(journal_names, journal_urls) |>
mutate(id = str_split(journal_urls, fixed("/"), simplify = TRUE)[, 4])
# check if scraping metrics is allowed [it is] -------------------------
robotstxt(domain = "https://www.tandfonline.com")$permissions
# scrape related pages for metrics --------------------------------------
temp_list <- list()
for (i in 1:nrow(journals_df)) {
metrics_url = paste0(
"https://www.tandfonline.com/action/journalInformation?show=journalMetrics&journalCode=",
journals_df$id[i]
)
html <- read_html(metrics_url)
try(foo <- tibble(
journal = journals_df$journal_names[i],
url = journals_df$journal_urls[i],
metrics = html |>
html_elements(".speed li") |>
html_text()
),
silent = TRUE)
temp_list[[i]] <- foo
}
# create the metrics dataframe ------------------------------------------
df <- as_tibble(do.call(rbind, temp_list)) |>
mutate(
Journal = paste0("<a href='", url, "'>", journal, "</a>"),
statistic = word(metrics, 1),
type = case_when(
str_detect(metrics, "acceptance rate") ~ "acceptance_rate",
str_detect(metrics, "first decision") ~ "first_decision",
str_detect(metrics, "post-review") ~ "first_review",
str_detect(metrics, "online publication") ~ "online_publication"
)
) |>
pivot_wider(id_cols = "journal",
names_from = "type",
values_from = "statistic")
# plot turnaround statistics ----------------------------------------------
df |> select(-acceptance_rate) |>
pivot_longer(
cols = first_decision:online_publication,
names_to = "type",
values_to = "statistic"
) |>
mutate(statistic = as.numeric(statistic),
type = factor(
type,
levels = c("first_decision", "first_review", "online_publication"),
labels = c(
"Including desk rejects",
"Excluding desk rejects",
"Acceptance to online"
)
)) |>
ggplot(aes(x = statistic, color = type, fill = type)) +
geom_density(alpha = 0.3) +
theme_light() +
labs(y = "Density\n", x = "\nMedian number of days") +
theme(
panel.background = element_rect(fill = "#faf9f9"),
plot.background = element_rect(fill = "#faf9f9"),
legend.background = element_rect(fill = "#faf9f9"),
axis.line = element_line(colour = "black", size = 0.25),
panel.grid.major = element_line(colour = "grey95", size = 0.25),
panel.grid.minor = element_line(colour = "grey95", size = 0.125),
panel.border = element_rect(
fill = NA,
colour = "grey95",
size = 0.25
),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_discrete(name = "") +
scale_color_discrete(name = "")
# plot acceptance rate ----------------------------------------------------
df |> mutate(acceptance_rate = as.numeric(str_remove(acceptance_rate, "%"))) |>
ggplot(aes(x = acceptance_rate)) +
geom_density(alpha = 0.3) +
theme_light() +
labs(y = "Density\n", x = "\nAcceptance Rate") +
theme(
panel.background = element_rect(fill = "#faf9f9"),
plot.background = element_rect(fill = "#faf9f9"),
legend.background = element_rect(fill = "#faf9f9"),
axis.line = element_line(colour = "black", size = 0.25),
panel.grid.major = element_line(colour = "grey95", size = 0.25),
panel.grid.minor = element_line(colour = "grey95", size = 0.125),
panel.border = element_rect(
fill = NA,
colour = "grey95",
size = 0.25
)
) +
scale_x_continuous(
labels = function(x)
paste0(x, "%")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment