Created
February 26, 2014 17:16
-
-
Save glorphindale/9234032 to your computer and use it in GitHub Desktop.
Пример использования enlive для выдирания информации из страниц
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns enlive-examples.core | |
(:require [clojure.string :as string] | |
[net.cgrand.enlive-html :as html] | |
[cheshire.core :as chesh])) | |
(def ^:dynamic *base-url* "http://2013.codefest.ru/members/") | |
(defn fetch-url [url] | |
(html/html-resource (java.net.URL. url))) | |
(def raw-data | |
(fetch-url *base-url*)) | |
(def selected | |
(html/select raw-data [[:div (html/attr= :data-role "peoples-names")] :li.b-peoples__item])) | |
(defn with-letter? [item] | |
(= (-> item :content (#(nth % 1)) :attrs :class) | |
"b-peoples__letter")) | |
(defn item->name [item] | |
(if-let [href (first (html/select item [[:a (html/attr-starts :href "/speaker")]]))] | |
(-> href html/text) | |
(if (with-letter? item) | |
(-> item :content (nth 2) string/trim) | |
(-> item :content (nth 0) string/trim)))) | |
(defn item->person [item] | |
(let [pname (item->name item) | |
parts (-> (html/select item [:span.b-peoples__company html/text]) | |
first | |
(string/split #"," 2)) | |
[pcomp ppos] (->> parts | |
(map string/trim) | |
(map string/lower-case))] | |
[pname pcomp ppos])) | |
(doall (map item->person (take 25 selected))) | |
(def grouped-data (map item->person selected)) | |
(comment | |
(->> grouped-data | |
(map second) | |
frequencies | |
(sort-by second) | |
reverse | |
(take 15))) | |
(spit "codefest-2013-raw.json" (chesh/generate-string grouped-data) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment