Skip to content

Instantly share code, notes, and snippets.

@akabe
Last active January 11, 2018 04:45
Show Gist options
  • Save akabe/e46c72c3b5de1b87a647fc6692454973 to your computer and use it in GitHub Desktop.
Save akabe/e46c72c3b5de1b87a647fc6692454973 to your computer and use it in GitHub Desktop.
A short example of bindings of `chrome-launcher` and `chrome-remote-interface` in OCaml BuckleScript
(* This is a short example of bindings of `chrome-launcher` and `chrome-remote-interface`
(for node libraries manipulating headless-mode Google Chrome) in OCaml BuckleScript (https://bucklescript.github.io/).
Usage:
$ npm install -g chrome-launcher chrome-remote-interface
$ bsc -bs-main bucklescript_headless_chrome.ml
Headless browsers (such as PhantomJS, Chrome, Firefox) are useful for, e.g.,
- integration tests of JavaScript products on a real browser, or
- Web scraping for pages containing complex JavaScript.
This example is toy code. You can add functions, properties and methods that you need. *)
(** {2 Monadic operators for Js.Promise} *)
let (>>=) x f = Js.Promise.then_ f x
let (>>|) x f = Js.Promise.then_ (fun y -> Js.Promise.resolve (f y)) x
(** {2 Binding to chrome-launcher} *)
module ChromeLauncher = struct
type t = < pid : int; port : int; kill : unit -> unit Js.Promise.t [@bs.meth]; > Js.t
external launch
: < startingUrl : string; chromeFlags : string array; > Js.t -> t Js.Promise.t
= "" [@@bs.module "chrome-launcher"]
end
(** {2 Binding to chrome-remote-interface *)
module ChromeRemoteInterface = struct
type request_param =
< requestId : string;
loaderId : string;
documentURL : string;
timestamp : float;
wallTime : float;
initiator : < _type : string > Js.t;
_type : string;
frameId : string;
request : < url : string; _method : string; > Js.t;
> Js.t
type network =
< enable : unit -> unit Js.Promise.t [@bs.meth];
requestWillBeSent : (request_param -> unit) -> unit [@bs.meth];
> Js.t
type page =
< enable : unit -> unit Js.Promise.t [@bs.meth];
navigate : < url : string > Js.t -> unit Js.Promise.t [@bs.meth];
loadEventFired : unit -> unit Js.Promise.t [@bs.meth];
> Js.t
type eval_result =
< result : < _type : string; value : string; > Js.t > Js.t
type runtime =
< enable : unit -> unit Js.Promise.t [@bs.meth];
evaluate : < expression : string > Js.t -> eval_result Js.Promise.t [@bs.meth];
> Js.t
type t =
< close : unit -> unit Js.Promise.t [@bs.meth];
_Network : network;
_Page : page;
_Runtime : runtime;
> Js.t
external call :
'a -> < port : int > Js.t -> (t -> unit Js.Promise.t) -> unit Js.Promise.t
= "" [@@bs.module "chrome-remote-interface"]
let create ~port callback = call call [%bs.obj { port }] callback
end
(** {2 Main routine} *)
let _ =
ChromeLauncher.launch [%bs.obj {
startingUrl = "about:blank"; (* The URL that Chrome opens first *)
chromeFlags = [|"--headless"; "--disable-gpu"|]; (* command-line options *)
}] >>= fun chrome ->
ChromeRemoteInterface.create
~port:chrome##port
(fun client ->
(* Set an event listener (capturing HTTP requests) *)
client##_Network##requestWillBeSent
(fun params -> Js.log (params##request##_method ^ " " ^ params##request##url)) ;
(* Wait until required functionality becomes available. *)
Js.Promise.all [|
client##_Network##enable ();
client##_Page##enable ();
client##_Runtime##enable ();
|] >>= fun _ ->
(* Open http://www.google.co.jp/ *)
client##_Page##navigate [%bs.obj { url = "http://example.com/" }] >>= fun () ->
(* Wait until onLoad event is fired. *)
client##_Page##loadEventFired () >>= fun () ->
(* Obtain a raw HTML string *)
client##_Runtime##evaluate [%bs.obj { expression = "document.querySelector('html').innerHTML" }] >>= fun res ->
Js.log res##result##value ;
(* Shutdown the headless browser. *)
client##close () >>= fun () ->
chrome##kill ())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment