Skip to content

Instantly share code, notes, and snippets.

@cderv
Last active March 2, 2019 16:05
Show Gist options
  • Save cderv/67d7ad8998559f2ce14b4eb4bb852fd1 to your computer and use it in GitHub Desktop.
Save cderv/67d7ad8998559f2ce14b4eb4bb852fd1 to your computer and use it in GitHub Desktop.
CRRRI STUFF
# Based on issue https://github.com/RLesur/crrri/issues/30
# from Rlesur/crrri@feature/build-on-cdp-session
# tested with 59747341
devtools::load_all()
# launch
work_dir <- chr_new_data_dir()
chrome <- chr_launch(work_dir = work_dir, headless = TRUE)
# get ws endpoint
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page')
# create the connexion to a page
page_session <- CDPSession$new(ws_endpoint)
# just check if connexion ready - To wrap in a function
repeat {
if (page_session$is_ready()) break
}
url <- "https://www.r-project.org/"
# Object required for the piped workflow
session <- list(CDPSession = page_session, method_to_send = NULL)
res <- NULL
session %>%
sendToSession("Page.enable", listener = 'run') %>%
sendToSession("DOM.enable") %>%
sendToSession("Page.navigate", params = list(url = url)) %>%
listenTo("Page.loadEventFired") %>%
sendToSession("DOM.getDocument") %>%
sendToSession("DOM.getOuterHTML",
callback = function(data) {
nodeId <- data$result$root$nodeId
session$CDPSession$sendCommand("DOM.getOuterHTML", params = list(nodeId = nodeId))
}) %>%
sendToSession("getres", callback = function(data) {
res <<- data$result
})
# run the workflow
page_session$emit("run")
while(is.null(res)) {
later::run_now()
}
res
# close everything
page_session$close()
if(chrome$is_alive()) chrome$kill()
rm(list = ls())
## With Event Emitters API -----------------
# The previous exemple is a pipe workflow build on top of event emitter.
# Here is how it look with event emitter directly
work_dir <- chr_new_data_dir()
chrome <- chr_launch(work_dir = work_dir, headless = TRUE)
# get ws endpoint
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page')
# create the connexion to a page
page_session <- CDPSession$new(ws_endpoint)
# just check if connexion ready - To wrap in a function
repeat {
if (page_session$is_ready()) break
}
url <- "https://www.r-project.org/"
res <- NULL
page_session$once("run", ~ page_session$sendCommand('Page.enable'))
page_session$once("Page.enable", ~ page_session$sendCommand('DOM.enable'))
page_session$once("DOM.enable", ~ page_session$sendCommand('Page.navigate', list(url = url)))
page_session$once("Page.loadEventFired", ~ page_session$sendCommand('DOM.getDocument'))
page_session$once("DOM.getDocument", function(data) {
page_session$sendCommand("DOM.getOuterHTML", list(nodeId = data$result$root$nodeId))
})
page_session$once("DOM.getOuterHTML", function(data) {res <<- data$result})
# run the workflow
page_session$emit("run")
while(is.null(res)) {
later::run_now()
}
res
page_session$close()
if(chrome$is_alive()) chrome$kill()
rm(list = ls())
# from Rlesur/crrri@feature/build-on-cdp-session
# tested with faf9964d
### Classic workflow promises -------------
devtools::load_all()
chrome <- chr_connect()
url = "https://www.rstudio.com"
configured <- chrome %>%
Page.enable() %>%
Network.enable() %>%
Network.setRequestInterception(patterns = list(list(urlPattern="*")))
intercepted <-
configured %>%
Network.requestIntercepted() %...T>% {
print("intercepted")
}
configured %>%
Page.navigate(url) %...!% {
}
intercepted %...>% {
chr_disconnect(chrome)
}
### Using Event Emitter No promises ------------------
# from Rlesur/crrri@feature/build-on-cdp-session
devtools::load_all()
# launch
work_dir <- chr_new_data_dir()
chrome <- chr_launch(work_dir = work_dir, headless = TRUE)
# get ws endpoint
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page')
# create the connexion to a page
page_session <- CDPSession$new(ws_endpoint)
# just check if connexion ready - To wrap in a function
repeat {
if (page_session$is_ready()) break
}
url = "https://www.rstudio.com"
page_session$once("run", ~ page_session$sendCommand("Page.enable"))
page_session$once("Page.enable", ~ page_session$sendCommand("Network.enable"))
page_session$once("Network.enable",
# configure interception
~ page_session$sendCommand(
"Network.setRequestInterception",
params = list(patterns = list(list(urlPattern="*")))))
page_session$once("Network.setRequestInterception",
# Go to the url
~ page_session$sendCommand("Page.navigate", params = list(url = url)))
# Get all the interception
page_session$on("Network.requestIntercepted",
~ {
print("intercepted")
intercepted <<- TRUE
})
# launch the workflow
intercepted <- FALSE
page_session$emit("run")
while(!intercepted) {
later::run_now()
}
page_session$close()
if(chrome$is_alive()) chrome$kill()
rm(list = ls())
## Event Emitter Pipe workflow ----------------
devtools::load_all()
# launch
work_dir <- chr_new_data_dir()
chrome <- chr_launch(work_dir = work_dir, headless = TRUE)
# get ws endpoint
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page')
# create the connexion to a page
page_session <- CDPSession$new(ws_endpoint)
# just check if connexion ready - To wrap in a function
repeat {
if (page_session$is_ready()) break
}
url <- "https://www.r-project.org/"
# Object required for the piped workflow
session <- list(CDPSession = page_session)
session %>%
sendToSession("Page.enable", listener = "run") %>%
sendToSession("Network.enable") %>%
sendToSession("Network.setRequestInterception",
params = list(patterns = list(list(urlPattern="*")))) %>%
sendToSession("Page.navigate", params = list(url = url)) %>%
listenTo("Network.requestIntercepted",
callback = ~ {
print("intercepted")
intercepted <<- TRUE
})
intercepted <- FALSE
page_session$emit("run")
while(!intercepted) {
later::run_now()
}
page_session$close()
if(chrome$is_alive()) chrome$kill()
rm(list = ls())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment