rss中的关键词统计
Last active
August 5, 2019 02:13
-
-
Save BadUncleX/27ab50a79c716679e3b88ff70f5cf332 to your computer and use it in GitHub Desktop.
rss中的关键词统计 来自joy of clojure clj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn feed->zipper [uri-str] | |
(->> (xml/parse uri-str) ; | |
zip/xml-zip)) ; | |
(defn normalize [feed] ; | |
(if (= :feed (:tag (first feed))) | |
feed | |
;; Returns the loc of the leftmost child of the node at this loc | |
(zip/down feed))) | |
(defn feed-children [uri-str] | |
(->> uri-str | |
feed->zipper | |
normalize | |
zip/children | |
(filter (comp #{:item :entry} :tag)))) | |
(comment | |
;; 括号看起来更直观? | |
(filter (comp #{:item :entry} :tag) | |
(zip/children | |
(normalize | |
(zip/xml-zip (xml/parse "http://www.ruby-lang.org/en/feeds/news.rss")))))) | |
(defn title [entry] | |
(some->> entry | |
:content | |
(some #(when (= :title (:tag %)) %)) | |
:content | |
first)) | |
(defn count-text-task [extractor txt feed] | |
(let [items (feed-children feed) | |
re (Pattern/compile (str "(?i)" txt))] | |
(->> items | |
(map extractor) | |
(mapcat #(re-seq re %)) | |
count))) | |
(defmacro as-futures [[a args] & body] | |
(let [parts (partition-by #{'=>} body) | |
[acts _ [res]] (partition-by #{:as} (first parts)) | |
[_ _ task] parts] | |
`(let [~res (for [~a ~args] (future ~@acts))] | |
~@task))) | |
(defn occurrences [extractor tag & feeds] | |
(as-futures [feed feeds] | |
(count-text-task extractor tag feed) | |
:as results | |
=> | |
(reduce (fn [total res] (+ total @res)) | |
0 | |
results))) | |
(comment | |
(count-text-task | |
title | |
"Erlang" | |
"http://feeds.feedburner.com/ElixirLang") | |
;;=> 0 | |
(count-text-task | |
title | |
"Elixir" | |
"http://feeds.feedburner.com/ElixirLang") | |
;;=> 14 | |
(count-text-task | |
title | |
"Yak" | |
"http://blog.fogus.me/feed/") | |
;;=> 1 | |
(count-text-task | |
title | |
"Ruby" | |
"http://www.ruby-lang.org/en/feeds/news.rss") | |
(occurrences title "released" | |
"http://blog.fogus.me/feed/" | |
"http://feeds.feedburner.com/ElixirLang" | |
"http://www.ruby-lang.org/en/feeds/news.rss") | |
;;=> 11 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment