Last active
October 28, 2021 19:41
-
-
Save duanebester/e33cb26e033660a49c751e6a692d37a4 to your computer and use it in GitHub Desktop.
Scraping - find-products
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn get-content | |
[content] | |
(into {} (map (fn [c] [(:tag c) (first (:content c))]) (:content content)))) | |
(defn find-products | |
"Crawls the url's sitemap and for each product, will call f with the site and the product" | |
[site url f] | |
(let [p (parse-xml url) | |
tag (:tag p)] | |
(println (str "Parsing URL: " url " for site " site)) | |
(if p | |
(cond | |
(= tag :urlset) (doseq [mm (map get-content (:content p))] (f site mm)) | |
(= tag :sitemapindex) (let [ccs (map get-content (:content p)) | |
pps (vec (map :loc ccs))] | |
(doseq [mm pps] (find-products site mm f))) | |
:else (println "none")) | |
(println (str "Unable to process url for " site))) | |
(println "Done finding products"))) | |
(find-products "bellroy" "https://bellroy.com/sitemap.xml" println) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment