Created
July 23, 2020 22:20
-
-
Save gaswelder/a7afa1da6d6167502cead11ed9e8a051 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns clojure-noob.core | |
(:gen-class)) | |
(defn parse [url] | |
(println url) | |
(org.jsoup.Jsoup/parse (slurp url))) | |
(defn href [a] (.attr a "href")) | |
(defn fetch [a] (parse (href a))) | |
(defn main-page [] (parse "http://www.dbdebunk.com/")) | |
(defn next-page | |
"any page -> next page or nil" | |
[page] | |
(let [links (.select page ".blog-pager-older-link")] | |
(when (seq links) | |
(fetch (first links))))) | |
(defn page-posts | |
"page -> list of linked posts" | |
[doc] | |
(map fetch (.select doc ".entry-title a"))) | |
(defn all-pages | |
([] (all-pages (main-page))) | |
([start-page] | |
(lazy-seq | |
(when start-page | |
(cons start-page (all-pages (next-page start-page))))))) | |
(defn page-content [page] (.html (first (.select page "#main")))) | |
(defn all-posts [] (mapcat page-posts (all-pages))) | |
(defn book [] | |
(reduce str (map page-content (all-posts)))) | |
(defn -main [] (spit "dbdebunk.html" (book))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment