Created
December 29, 2016 23:22
-
-
Save TeaDrivenDev/7978ea35093948bb683b14a8d6f3abc9 to your computer and use it in GitHub Desktop.
Raw script from http://teadrivendev.github.io/2016/12/29/FsAdvent-Rss-Feed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r @"..\packages\FSharp.Data\lib\net40\FSharp.Data.dll" | |
open FSharp.Data | |
[<Literal>] | |
let OverviewUrl = @"https://sergeytihon.wordpress.com/2016/10/23/f-advent-calendar-in-english-2016/" | |
type Page = HtmlProvider<OverviewUrl> | |
type Entry = { Date : string; Author : string; Title : string; Url : string } | |
let (|TextValue|) (n : HtmlNode) = n.InnerText().Trim() | |
let (|Posts|) (n : HtmlNode) = | |
match n.Descendants() |> Seq.tryHead with | |
| Some node when node.Name() = "span" -> Seq.empty | |
| _ -> | |
n.Descendants("a") | |
|> Seq.map (fun link -> link.Attribute("href").Value(), link.InnerText()) | |
let parseRow (row : HtmlNode) = | |
match row.Descendants("td") |> Seq.toList with | |
| [ TextValue date; TextValue name; Posts posts ] -> | |
posts | |
|> Seq.map (fun (url, title) -> | |
{ Date = date.Substring(0, 6); Author = name; Title = title; Url = url }) | |
| _ -> Seq.empty | |
let descendantsNamed name (node : HtmlNode) = | |
node |> HtmlNode.descendants false (fun n -> n.HasName name) | |
let firstDescendantNamed name node = | |
descendantsNamed name node |> Seq.head | |
let innerText (node : HtmlNode) = node.InnerText() | |
let getYear body = | |
let title = body |> firstDescendantNamed "article" |> firstDescendantNamed "h1" |> innerText | |
let xmatch = System.Text.RegularExpressions.Regex.Match(title, @"\d{4}$") | |
xmatch.Captures.[0].Value |> int | |
let getEntries body = | |
body | |
|> firstDescendantNamed "table" | |
|> firstDescendantNamed "tbody" | |
|> descendantsNamed "tr" | |
|> Seq.collect parseRow | |
let getValues (pageUrl : string) = | |
let body = Page.Load(pageUrl).Html.Body() | |
getYear body, getEntries body | |
let year, entries = | |
let year, entries = getValues OverviewUrl | |
year, entries |> Seq.toList | |
#r "System.Xml.Linq" | |
open System | |
open System.Linq | |
open System.Xml.Linq | |
let xn name = XName.Get name | |
let xmlEncode s = | |
let node = Xml.XmlDocument().CreateElement "root" | |
node.InnerText <- s | |
node.InnerXml | |
let getEntryYear baseYear (date : string) = | |
match date.Split(' ') |> Array.head with | |
| "Jan" -> baseYear + 1 | |
| _ -> baseYear | |
let encodeDate (date : DateTime) = date.ToString "r" | |
let entryXml baseYear entry = | |
XElement(xn "item", | |
XElement(xn "title", sprintf "%s | %s | %s" entry.Date (xmlEncode entry.Author) (xmlEncode entry.Title)), | |
XElement(xn "link", entry.Url), | |
XElement(xn "guid", entry.Url), | |
XElement(xn "pubDate", sprintf "%s %i" entry.Date (getEntryYear baseYear entry.Date) |> DateTime.Parse |> encodeDate)) | |
let emptyFeed year = | |
let xe = | |
let date = encodeDate DateTime.Now | |
let title = sprintf "F# Advent Calendar %i" year | |
XElement(xn "rss", | |
XAttribute(xn "version", "2.0"), | |
XElement(xn "channel", | |
XElement(xn "title", title), | |
XElement(xn "link", OverviewUrl), | |
XElement(xn "description", title), | |
XElement(xn "pubDate", date), | |
XElement(xn "lastBuildDate", date), | |
XElement(xn "language", "en"))) | |
xe | |
let addEntries (feed : XElement) (entries : XElement seq) = | |
entries | |
|> Seq.toArray | |
|> feed.Descendants(xn "channel").First().Add | |
feed | |
let createFeed url = | |
let year, entries = getValues url | |
year, | |
entries | |
|> Seq.rev | |
|> Seq.map (entryXml year) | |
|> addEntries (emptyFeed year) | |
open System.IO | |
let repositoryDir = @"D:\Development\Projects\Active\TeaDrivenDev.github.io" | |
let rssFile = @"public\fsadvent%i.rss" | |
let writeFeed fileName feed = | |
File.WriteAllText(fileName, string feed) | |
OverviewUrl | |
|> createFeed | |
|> (fun (year, feed) -> writeFeed (Path.Combine(repositoryDir, sprintf (Printf.StringFormat<_> rssFile) year)) feed) | |
#r @"..\packages\FAKE\tools\FakeLib.dll" | |
open Fake.Git.CommandHelper | |
gitCommand repositoryDir "add *.rss" | |
gitCommand repositoryDir "commit -m \"Updated F# advent calendar RSS feed\"" | |
// Alternate createFeed function for first generation when using Feedly | |
let createFeed' url = | |
let year, entries = getValues url | |
entries | |
|> Seq.truncate 10 | |
|> Seq.rev | |
|> Seq.map (entryXml year) | |
|> addEntries (emptyFeed year) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is so neat! 👍