Skip to content

Instantly share code, notes, and snippets.

@gaswelder
Created July 23, 2020 22:20
Show Gist options
  • Save gaswelder/a7afa1da6d6167502cead11ed9e8a051 to your computer and use it in GitHub Desktop.
Save gaswelder/a7afa1da6d6167502cead11ed9e8a051 to your computer and use it in GitHub Desktop.
(ns clojure-noob.core
(:gen-class))
(defn parse [url]
(println url)
(org.jsoup.Jsoup/parse (slurp url)))
(defn href [a] (.attr a "href"))
(defn fetch [a] (parse (href a)))
(defn main-page [] (parse "http://www.dbdebunk.com/"))
(defn next-page
"any page -> next page or nil"
[page]
(let [links (.select page ".blog-pager-older-link")]
(when (seq links)
(fetch (first links)))))
(defn page-posts
"page -> list of linked posts"
[doc]
(map fetch (.select doc ".entry-title a")))
(defn all-pages
([] (all-pages (main-page)))
([start-page]
(lazy-seq
(when start-page
(cons start-page (all-pages (next-page start-page)))))))
(defn page-content [page] (.html (first (.select page "#main"))))
(defn all-posts [] (mapcat page-posts (all-pages)))
(defn book []
(reduce str (map page-content (all-posts))))
(defn -main [] (spit "dbdebunk.html" (book)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment