Created
November 4, 2016 12:36
-
-
Save gusty/f8ac85f29b7a36096aba99abbd97690d to your computer and use it in GitHub Desktop.
Sample WebLinq query. All Queen songs from Wikipedia.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* Namespaces | |
TryParsers | |
WebLinq | |
WebLinq.Collections | |
WebLinq.Html | |
WebLinq.Sys | |
WebLinq.Text | |
WebLinq.Xml | |
WebLinq.Xsv | |
WebLinq.Zip | |
*) | |
from html in HttpQuery.Http.Get(new Uri("https://en.wikipedia.org/wiki/Queen_discography")).Html().Content() | |
let albums = | |
from tr in html.Tables("table[border='1']").First().QuerySelectorAll("tr") | |
from th in tr.QuerySelectorAll("th[scope=row]") | |
let a = th.QuerySelector("a[href]") | |
where a != null | |
select new | |
{ | |
title = a.GetAttributeValue("title")?.Trim(), | |
link = html.TryBaseHref(a.GetAttributeValue("href")?.Trim()), | |
} | |
into e | |
select new | |
{ | |
e.title, | |
url = TryParse.Uri(e.link, UriKind.Absolute), | |
} | |
into e | |
where !string.IsNullOrEmpty(e.title) && e.url != null | |
select e | |
from album in albums.ToQuery() | |
select album | |
into album | |
from html in HttpQuery.Http.Get(album.url).Html() | |
from tb in html.Content.Tables(".tracklist").Take(2).ToQuery() | |
from tr in tb.QuerySelectorAll("tr").ToQuery() | |
where tr.QuerySelectorAll("td").Count() == (album.title == "Queen II" || album.title == "Innuendo (album)" ? 3 : 4) | |
let titleTd = tr.QuerySelectorAll("td[style='text-align: left; vertical-align: top;']").Single() | |
let authr = tr.QuerySelectorAll("td[style='vertical-align: top;']").SingleOrDefault()?.InnerText | |
let durat = tr.QuerySelectorAll("td[style='padding-right: 10px; text-align: right; vertical-align: top;']").Last().InnerText | |
let title = titleTd.HasChildElements ? titleTd.ChildElements.First().GetAttributeValue("title") : titleTd.InnerText | |
where title != null | |
select new | |
{ | |
Album = album.title, | |
Title = title , | |
Author = authr, | |
Duration = durat | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment