-
-
Save r2evans/ed0d132166bfbd9473d99b4fac7d65db to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Convert an StackOverflow URL to a local filename | |
#' | |
#' @param url character string, typically starting with | |
#' "https://stackoverflow.com/..." | |
#' @param dir logical, whether this should be a directory or a file | |
#' @param ext character, file extension | |
#' @param create logical, whether to create it or just report on it | |
#' @param force logical, whether to resort to a question number if the | |
#' user cannot be found | |
#' @param base character, the path under which all others will be | |
#' based | |
#' @param clip logical, whether to attempt to write to the clipboard | |
#' with 'writeLines' | |
#' @return character string for the file for code, and the file has | |
#' been created and pre-populated with the URL on the first line | |
#' @export | |
SE <- function(url, dir = FALSE, ext = if (dir) "" else ".R", | |
create = TRUE, force = FALSE, base = "~/StackOverflow", | |
clip = interactive()) { | |
if (!requireNamespace("rvest")) { | |
stop("'rvest' package is not available") | |
} | |
if (missing(url)) { | |
# let's try the clipboard | |
if (clip) { | |
url <- suppressWarnings(readLines("clipboard")) | |
} | |
if (!is.character(url) || !nzchar(url) || !grepl("^http", url)) { | |
stop("argument url is missing, and the clipboard does not start with 'http'") | |
} | |
} | |
# remove sublink anchor refs | |
url <- gsub("#[^#]*$", "", url) | |
question_number <- gsub(".*/questions/(\\d+)/.*", "\\1", url) | |
if (!nzchar(question_number) || grepl("\\D", question_number)) { | |
stop("unable to find question number: ", url) | |
} | |
hand <- try(xml2::read_html(url), silent = TRUE) | |
if (inherits(hand, "try-error")) { | |
stop("unable to read the URL") | |
} | |
author <- try({ | |
rvest::html_attr( | |
rvest::html_nodes(rvest::html_nodes(hand, ".question .user-info .user-details"), "a"), | |
"href")}, silent = TRUE) | |
# I think these list editors, too ... | |
if (inherits(author, "try-error") || length(author) < 1 || !is.character(author) || !nzchar(author)) { | |
stop("unable to find the author: ", as.character(author)) | |
} | |
if (length(author) > 1L) { | |
warning("multiple authors: ", paste(author, collapse = ", ")) | |
author <- tail(author, n = 1) | |
} | |
author_number <- gsub("/users/(\\d+)/?.*", "\\1", author) | |
author_name <- gsub("/users/\\d+/", "", author) | |
if (grepl("\\D", author_number)) { | |
msg <- paste("malformed author:", author) | |
if (force) warning(msg) else stop(msg) | |
author <- "questions" | |
} | |
if (grepl("/", author_name) || !nzchar(author_name)) { | |
warning("malformed author name: ", sQuote(author_name)) | |
author_name <- "(unk)" | |
} | |
path <- file.path(base, author_number, question_number, fsep = "/") | |
if (dir) { | |
path <- file.path(path, "question", fsep = "/") | |
} | |
if (!anyNA(ext) && nzchar(ext)) path <- paste0(path, ext) | |
if (create) { | |
dir.create(dirname(path), recursive = TRUE, showWarnings = FALSE) | |
} | |
if (clip) { | |
suppressWarnings(writeLines(path, "clipboard", sep = "")) | |
} | |
if (create) { | |
append <- TRUE | |
contents <- character(0) | |
if (file.exists(path)) { | |
partialurl <- gsub("^(.*/questions/[0-9]+/).*", "\\1", url) | |
# somewhat inefficient but hopefully sufficient | |
alllines <- gsub("^[# ]*", "", readLines(path)) | |
if (partialurl %in% substr(alllines, 1, nchar(partialurl))) { | |
append <- FALSE | |
warning("existing question, no change") | |
} else { | |
append <- TRUE | |
warning("user file exists, appending new question") | |
contents <- c(contents, "\n") | |
} | |
} | |
contents <- c(contents, paste("#", url), "") | |
if (append) write(contents, path, append = TRUE) | |
} | |
message("URL : ", url) | |
message("Author : ", author_number, ", ", author_name) | |
message("Question: ", question_number) | |
message("Path : ", path) | |
banned <- file.path(base, author_number, "banned") | |
if (file.exists(banned)) { | |
warning("\n### BANNED", call. = FALSE) | |
cat(paste("###", readLines(banned, warn = FALSE), collapse = "\n"), "\n") | |
} | |
invisible(path) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment