Created
April 9, 2022 22:34
-
-
Save johanmynhardt/84683c04ec84ae5659a6e0642e2cc0aa to your computer and use it in GitHub Desktop.
Simple Zipper-based Syndication XML Extractor to navigate the "What's New?" AWS Feed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns user | |
"Simple Syndication XML Extractor to navigate the available AWS Feed." | |
(:require [clojure.data.xml :as xml] | |
[clojure.java.io :as io] | |
[clojure.pprint :refer [pprint]] | |
[clojure.string :as str] | |
[clojure.zip :as z])) | |
(def aws-feed-url "https://aws.amazon.com/about-aws/whats-new/recent/feed/") | |
(def feed-xml | |
(xml/parse (io/reader aws-feed-url))) | |
(defn items-level-loc | |
"Return `loc` where article items start in the XML." | |
[xml-root] | |
{:pre [(instance? clojure.data.xml.node.Element xml-root)]} | |
(-> (z/xml-zip xml-root) | |
(z/down) | |
(z/down))) | |
(defn xml->clj | |
"Converts parsed XML Element to Clojure coll." | |
[xml-root] | |
{:pre [(instance? clojure.data.xml.node.Element xml-root)]} | |
(loop [loc (items-level-loc xml-root) | |
items []] | |
(cond (nil? (z/right loc)) | |
items | |
:else | |
(let [{:keys [tag content]} (z/node (z/right loc)) | |
item | |
(when (= :item tag) | |
{tag (->> content | |
(map (juxt :tag :content)) | |
(map (fn [[t c]] | |
(cond (= :category t) | |
[t (into #{} (str/split (first c) #","))] | |
:else | |
[t (first c)]))) | |
(into {}))})] | |
(recur (z/right loc) | |
(if-not item items (conj items item))))))) | |
(defn all-categories | |
"Returns a list of all the extracted categories in the syndication feed." | |
[items] | |
(->> items | |
(mapcat (comp :category :item)) | |
(distinct) | |
(sort))) | |
(defn select-categories | |
"Return items matching category predicate." | |
[category-p items] | |
(->> items | |
(filter (comp category-p :category :item)))) | |
(defn from-item | |
"Predicate fn/key to extract from an `item`." | |
[pred] | |
(comp pred :item)) | |
(comment | |
;; List all the categories available from the feed. | |
(->> feed-xml | |
(xml->clj) | |
(all-categories) | |
(pprint)) | |
;; Print out the items containing the specified category. | |
;; Output limited fields. | |
(->> feed-xml | |
(xml->clj) | |
(select-categories #(contains? % "general:products/aws-lambda")) | |
#_(map (from-item #(select-keys % [:pubDate :title :category]))) | |
(map (from-item (juxt :pubDate :category :title))) | |
(map pprint)) | |
#_{}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment