Skip to content

Instantly share code, notes, and snippets.

@stdray
Created December 9, 2014 17:46
Show Gist options
  • Select an option

  • Save stdray/644776ff78eb8cf6efc3 to your computer and use it in GitHub Desktop.

Select an option

Save stdray/644776ff78eb8cf6efc3 to your computer and use it in GitHub Desktop.
open System
module Grabber =
open HtmlAgilityPack
open System.Net.Http
let grab grabbers page = async {
let works = grabbers |> Seq.map (fun grabber -> grabber page)
let! parts = Async.Parallel works
return parts |> Seq.collect id
}
let private load baseUri (uri : string) selector = async {
use http = new HttpClient()
http.BaseAddress <- baseUri
use! stream = uri |> http.GetStreamAsync |> Async.AwaitTask
let doc = HtmlDocument()
doc.Load(stream)
return
doc.DocumentNode.SelectNodes selector
|> Seq.map(fun n -> n.GetAttributeValue("src", String.Empty))
|> List.ofSeq
}
let makeGrabber baseUri selector = fun (page : uint32) ->
load (Uri(baseUri)) (page.ToString()) selector
let grabbers =
[ Grabber.makeGrabber @"http://1001mem.ru/best/" @"//section[@class='posts']//div[@class='image']//img";
Grabber.makeGrabber @"http://joyreactor.cc/" @"//div[@class='article post-normal']//div[@class='image']//img"; ]
Grabber.grab grabbers 0u |> Async.RunSynchronously |> Seq.iter(printfn "%A")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment