Skip to content

Instantly share code, notes, and snippets.

@RLesur
Created November 14, 2018 22:47
Show Gist options
  • Save RLesur/03370182c195d998be6e195d63999a26 to your computer and use it in GitHub Desktop.
Save RLesur/03370182c195d998be6e195d63999a26 to your computer and use it in GitHub Desktop.
PDF generation from R using Chrome and rstudio/websocket
# Packages ---------------------------------------------------------------
remotes::install_github('milesmcbain/chradle')
remotes::install_github('rlesur/pagedown@automation')
remotes::install_github('rstudio/websocket')
library(chradle)
library(pagedown)
library(websocket)
library(jsonlite)
# Chrome headless helpers -------------------------------------------------
# Thanks to Miles McBain
browser_init <- function(debug_port = 9222, bin = "google-chrome"){
debug_process <- processx::process$new(bin,
c("--use-test-config",
"--headless",
"--no-first-run",
"--new-window",
"--user-data-dir=remote-profile",
glue::glue("--remote-debugging-port={debug_port}")))
chradle:::debugger_200_ok(debug_port)
list(debug_process = debug_process, debug_port = debug_port)
}
ws_addr <- function(debug_port = 9222){
open_debuggers <-
jsonlite::read_json(glue::glue("http://localhost:{debug_port}/json"), simplifyVector = TRUE)
open_debuggers$webSocketDebuggerUrl[open_debuggers$type == "page"]
}
# Generate and serve a paged document -------------------------------------
rmd_file <- file.path(tempdir(), "index.Rmd")
download.file("https://raw.githubusercontent.com/rstudio/pagedown/master/inst/examples/index.Rmd", rmd_file)
paged_document <- rmarkdown::render(rmd_file)
servr::httd(tempdir(), port = 4321)
# Open a headless Chrome: it will render the paged document later ---------
instance <- browser_init()
# you may wait a little bit before retrieving the ws address
headless_address <- ws_addr()
# WebSocket connection ----------------------------------------------------
ws <- WebSocket$new(headless_address, autoConnect = FALSE)
ws$onOpen(function(event) {
cat("Connection opened\n")
event$target$send('{"id":1,"method":"Runtime.enable"}')
event$target$send('{"id":2,"method":"Page.enable"}')
})
ws$onMessage(function(event) {
data <- fromJSON(event$data)
if (!is.null(data$id)) {
if (data$id == 2) {
cat('Headless Chrome environment enabled\n')
event$target$send('{"id":3,"method":"Runtime.addBinding","params":{"name":"pagedownListener"}}')
}
}
})
ws$onMessage(function(event) {
data <- fromJSON(event$data)
if (!is.null(data$id)) {
if (data$id == 3) {
cat('Opening the html_paged document...\n')
event$target$send('{"id":4,"method":"Page.navigate","params":{"url":"http://127.0.0.1:4321"}}')
}
}
})
ws$onMessage(function(event) {
data <- fromJSON(event$data)
if (!is.null(data$method)) {
if (data$method == "Runtime.bindingCalled") {
cat('html_paged document ready.\nPrinting to PDF...\n')
event$target$send('{"id":99,"method":"Page.printToPDF"}')
}
}
})
ws$onMessage(function(event) {
data <- fromJSON(event$data)
if (!is.null(data$id)) {
if (data$id == 99) {
writeBin(base64_dec(data$result$data), "book.pdf")
cat("PDF saved to file: book.pdf\nYou can close the websocket connection.\n")
}
}
})
ws$onClose(function(event) {
cat("Client disconnected with code ", event$code,
" and reason ", event$reason, "\n", sep = "")
})
ws$onError(function(event) {
cat("Client failed to connect: ", event$message, "\n")
})
# You can have a look to the running headless browser
# at http://localhost:9222 (use Chrome!) - Select the link about:blank
ws$connect()
# wait!
# Close and clean the headless browser -----------------------------------------
ws$close()
chr_kill(instance)
chr_clean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment