Created
December 9, 2014 17:46
-
-
Save stdray/644776ff78eb8cf6efc3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| open System | |
| module Grabber = | |
| open HtmlAgilityPack | |
| open System.Net.Http | |
| let grab grabbers page = async { | |
| let works = grabbers |> Seq.map (fun grabber -> grabber page) | |
| let! parts = Async.Parallel works | |
| return parts |> Seq.collect id | |
| } | |
| let private load baseUri (uri : string) selector = async { | |
| use http = new HttpClient() | |
| http.BaseAddress <- baseUri | |
| use! stream = uri |> http.GetStreamAsync |> Async.AwaitTask | |
| let doc = HtmlDocument() | |
| doc.Load(stream) | |
| return | |
| doc.DocumentNode.SelectNodes selector | |
| |> Seq.map(fun n -> n.GetAttributeValue("src", String.Empty)) | |
| |> List.ofSeq | |
| } | |
| let makeGrabber baseUri selector = fun (page : uint32) -> | |
| load (Uri(baseUri)) (page.ToString()) selector | |
| let grabbers = | |
| [ Grabber.makeGrabber @"http://1001mem.ru/best/" @"//section[@class='posts']//div[@class='image']//img"; | |
| Grabber.makeGrabber @"http://joyreactor.cc/" @"//div[@class='article post-normal']//div[@class='image']//img"; ] | |
| Grabber.grab grabbers 0u |> Async.RunSynchronously |> Seq.iter(printfn "%A") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment