Last active
March 2, 2019 16:05
-
-
Save cderv/67d7ad8998559f2ce14b4eb4bb852fd1 to your computer and use it in GitHub Desktop.
CRRRI STUFF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on issue https://github.com/RLesur/crrri/issues/30 | |
# from Rlesur/crrri@feature/build-on-cdp-session | |
# tested with 59747341 | |
devtools::load_all() | |
# launch | |
work_dir <- chr_new_data_dir() | |
chrome <- chr_launch(work_dir = work_dir, headless = TRUE) | |
# get ws endpoint | |
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page') | |
# create the connexion to a page | |
page_session <- CDPSession$new(ws_endpoint) | |
# just check if connexion ready - To wrap in a function | |
repeat { | |
if (page_session$is_ready()) break | |
} | |
url <- "https://www.r-project.org/" | |
# Object required for the piped workflow | |
session <- list(CDPSession = page_session, method_to_send = NULL) | |
res <- NULL | |
session %>% | |
sendToSession("Page.enable", listener = 'run') %>% | |
sendToSession("DOM.enable") %>% | |
sendToSession("Page.navigate", params = list(url = url)) %>% | |
listenTo("Page.loadEventFired") %>% | |
sendToSession("DOM.getDocument") %>% | |
sendToSession("DOM.getOuterHTML", | |
callback = function(data) { | |
nodeId <- data$result$root$nodeId | |
session$CDPSession$sendCommand("DOM.getOuterHTML", params = list(nodeId = nodeId)) | |
}) %>% | |
sendToSession("getres", callback = function(data) { | |
res <<- data$result | |
}) | |
# run the workflow | |
page_session$emit("run") | |
while(is.null(res)) { | |
later::run_now() | |
} | |
res | |
# close everything | |
page_session$close() | |
if(chrome$is_alive()) chrome$kill() | |
rm(list = ls()) | |
## With Event Emitters API ----------------- | |
# The previous exemple is a pipe workflow build on top of event emitter. | |
# Here is how it look with event emitter directly | |
work_dir <- chr_new_data_dir() | |
chrome <- chr_launch(work_dir = work_dir, headless = TRUE) | |
# get ws endpoint | |
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page') | |
# create the connexion to a page | |
page_session <- CDPSession$new(ws_endpoint) | |
# just check if connexion ready - To wrap in a function | |
repeat { | |
if (page_session$is_ready()) break | |
} | |
url <- "https://www.r-project.org/" | |
res <- NULL | |
page_session$once("run", ~ page_session$sendCommand('Page.enable')) | |
page_session$once("Page.enable", ~ page_session$sendCommand('DOM.enable')) | |
page_session$once("DOM.enable", ~ page_session$sendCommand('Page.navigate', list(url = url))) | |
page_session$once("Page.loadEventFired", ~ page_session$sendCommand('DOM.getDocument')) | |
page_session$once("DOM.getDocument", function(data) { | |
page_session$sendCommand("DOM.getOuterHTML", list(nodeId = data$result$root$nodeId)) | |
}) | |
page_session$once("DOM.getOuterHTML", function(data) {res <<- data$result}) | |
# run the workflow | |
page_session$emit("run") | |
while(is.null(res)) { | |
later::run_now() | |
} | |
res | |
page_session$close() | |
if(chrome$is_alive()) chrome$kill() | |
rm(list = ls()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from Rlesur/crrri@feature/build-on-cdp-session | |
# tested with faf9964d | |
### Classic workflow promises ------------- | |
devtools::load_all() | |
chrome <- chr_connect() | |
url = "https://www.rstudio.com" | |
configured <- chrome %>% | |
Page.enable() %>% | |
Network.enable() %>% | |
Network.setRequestInterception(patterns = list(list(urlPattern="*"))) | |
intercepted <- | |
configured %>% | |
Network.requestIntercepted() %...T>% { | |
print("intercepted") | |
} | |
configured %>% | |
Page.navigate(url) %...!% { | |
} | |
intercepted %...>% { | |
chr_disconnect(chrome) | |
} | |
### Using Event Emitter No promises ------------------ | |
# from Rlesur/crrri@feature/build-on-cdp-session | |
devtools::load_all() | |
# launch | |
work_dir <- chr_new_data_dir() | |
chrome <- chr_launch(work_dir = work_dir, headless = TRUE) | |
# get ws endpoint | |
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page') | |
# create the connexion to a page | |
page_session <- CDPSession$new(ws_endpoint) | |
# just check if connexion ready - To wrap in a function | |
repeat { | |
if (page_session$is_ready()) break | |
} | |
url = "https://www.rstudio.com" | |
page_session$once("run", ~ page_session$sendCommand("Page.enable")) | |
page_session$once("Page.enable", ~ page_session$sendCommand("Network.enable")) | |
page_session$once("Network.enable", | |
# configure interception | |
~ page_session$sendCommand( | |
"Network.setRequestInterception", | |
params = list(patterns = list(list(urlPattern="*"))))) | |
page_session$once("Network.setRequestInterception", | |
# Go to the url | |
~ page_session$sendCommand("Page.navigate", params = list(url = url))) | |
# Get all the interception | |
page_session$on("Network.requestIntercepted", | |
~ { | |
print("intercepted") | |
intercepted <<- TRUE | |
}) | |
# launch the workflow | |
intercepted <- FALSE | |
page_session$emit("run") | |
while(!intercepted) { | |
later::run_now() | |
} | |
page_session$close() | |
if(chrome$is_alive()) chrome$kill() | |
rm(list = ls()) | |
## Event Emitter Pipe workflow ---------------- | |
devtools::load_all() | |
# launch | |
work_dir <- chr_new_data_dir() | |
chrome <- chr_launch(work_dir = work_dir, headless = TRUE) | |
# get ws endpoint | |
ws_endpoint <- chr_get_ws_addr(debug_port = 9222, type = 'page') | |
# create the connexion to a page | |
page_session <- CDPSession$new(ws_endpoint) | |
# just check if connexion ready - To wrap in a function | |
repeat { | |
if (page_session$is_ready()) break | |
} | |
url <- "https://www.r-project.org/" | |
# Object required for the piped workflow | |
session <- list(CDPSession = page_session) | |
session %>% | |
sendToSession("Page.enable", listener = "run") %>% | |
sendToSession("Network.enable") %>% | |
sendToSession("Network.setRequestInterception", | |
params = list(patterns = list(list(urlPattern="*")))) %>% | |
sendToSession("Page.navigate", params = list(url = url)) %>% | |
listenTo("Network.requestIntercepted", | |
callback = ~ { | |
print("intercepted") | |
intercepted <<- TRUE | |
}) | |
intercepted <- FALSE | |
page_session$emit("run") | |
while(!intercepted) { | |
later::run_now() | |
} | |
page_session$close() | |
if(chrome$is_alive()) chrome$kill() | |
rm(list = ls()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment