Created
December 27, 2023 09:20
-
-
Save SchlenkR/698e8eff4f448a106fd13f0202049591 to your computer and use it in GitHub Desktop.
The script to the video: https://www.youtube.com/watch?v=hedlfwDRh_c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "nuget: FsHttp" | |
#r "nuget: FSharp.Data" | |
open FsHttp | |
open FsHttp.Operators | |
open FSharp.Data | |
let url = "https://public-api.wordpress.com/wpcom/v2/sites/15048173/articles | |
?className=is-style-borders | |
&showExcerpt=0 | |
&excerptLength=55 | |
&showReadMore=0 | |
&readMoreLabel=Keep%20reading | |
&showDate=1 | |
&showImage=1 | |
&showCaption=1 | |
&disableImageLazyLoad=0 | |
&fetchPriority | |
&imageShape=landscape | |
&minHeight=0 | |
&moreButton=1 | |
&moreButtonText | |
&showAuthor=0 | |
&showAvatar=1 | |
&showCategory=0 | |
&postLayout=grid | |
&columns=3 | |
&colGap=3 | |
&postsToShow=100000 | |
&mediaPosition=top | |
&&categories%5B0%5D=121184674 | |
&includeSubcategories=1 | |
&&&&&typeScale=4 | |
&imageScale=3 | |
&mobileStack=0 | |
§ionHeader | |
&specificMode=0 | |
&textColor | |
&customTextColor | |
&singleMode=0 | |
&showSubtitle=0 | |
&postType%5B0%5D=post | |
&textAlign=left | |
&includedPostStatuses%5B0%5D=publish | |
&deduplicate=1 | |
&wpcom_site=15048173 | |
&page=1 | |
" | |
let posts = | |
% get url | |
|> Response.toJson | |
|> fun x -> x?items.EnumerateArray() | |
let postHtmls = | |
posts |> Seq.map (fun x -> x?html.GetString()) | |
let postInfos = | |
[ | |
for i, htmlText in postHtmls |> Seq.indexed do | |
printfn $"iteration {i}" | |
let html = HtmlDocument.Parse(htmlText) | |
let imgSrc = | |
try html.CssSelect("img").Head.AttributeValue("src") |> Some | |
with _ -> None | |
let theLink = | |
html | |
.CssSelect("article.category-f-weekly") | |
.CssSelect("div.entry-wrapper").Head | |
.CssSelect("a").Head | |
let href = theLink.AttributeValue("href") | |
let title = theLink.InnerText() | |
{| imgSrc = imgSrc; href = href; title = title |} | |
] | |
let mdContent = | |
[ | |
yield "# Thank You Sergey :)" | |
yield "This is a list of all the posts that were scraped from the [F# Weekly](https://sergeytihon.com/category/f-weekly/) blog." | |
yield "## Some Stats" | |
yield $"- {postInfos.Length} posts scraped" | |
yield "## Posts" | |
for post in postInfos do | |
yield $"## [{post.title}]({post.href})" | |
yield | |
match post.imgSrc with | |
| Some x -> $"""<img src="{x}">""".Replace("&crop=1", "&crop=0") | |
| None -> "No Image Available" | |
] | |
|> String.concat "\n\n" | |
do System.IO.File.WriteAllText( | |
__SOURCE_DIRECTORY__ + "/posts.md", | |
mdContent) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment