Skip to content

Instantly share code, notes, and snippets.

Last active February 4, 2021 11:00
Show Gist options
  • Save jiro4989/19d6cbae47859ed3964e5e1cb54f7534 to your computer and use it in GitHub Desktop.
Save jiro4989/19d6cbae47859ed3964e5e1cb54f7534 to your computer and use it in GitHub Desktop.
{enlive/enlive {:mvn/version "1.1.6"}
org.clojure/data.csv {:mvn/version "1.0.0"}}
; 実行方法
; $ clojure scrape.clj
(ns deresute
(:require [net.cgrand.enlive-html :as html]
[ :as csv]
[ :as io]
[clojure.string :as str]))
(def deresute-url "")
(def htmlall (html/html-resource ( deresute-url)))
(def header (html/select htmlall [:table :thead]))
(def body (html/select htmlall [:table :tbody]))
(defn map-content [elem]
(if (map? elem)
(-> elem :content first map-content)
(def cols
(-> header
(->> (map :content)
(remove nil?)
(map first)
(map map-content))
(conj "タイプ")))
(defn profile [elem]
(let [content (:content elem)]
(flatten (list
(-> content first :content first)
(-> content second :content first :content first)
(-> content
(nthrest 2)
(->> (map #(-> % :content first))
(map map-content)))))))
(def rows
(-> body
(nth 4)
(filter map?)
(map profile)
(remove #(empty? (second %))))))
(def csv-body (cons cols rows))
(with-open [w (io/writer "deresute.csv")]
(csv/write-csv w csv-body))
; CSVのヘッダをハッシュマップのキーに変換
(def map-profile (->> rows
(map #(zipmap cols %))))
(defn hash-map-apply [v f]
(hash-map (key v) (-> v val f)))
(defn hash-map-count [v]
(hash-map-apply v count))
(println "属性ごとのアイドルの人数")
(defn count-of-type [v]
(->> v
(group-by #(get % "タイプ"))
(map hash-map-count)))
(println (count-of-type map-profile))
; ({Cu 65} {Co 65} {Pa 60})
(println "CVがついているアイドルとそうでないアイドルの人数")
(defn count-of-having-cv [v]
(->> v
(map #(assoc %
:has-cv (if (not= "-" (get % "CV"))
(group-by :has-cv)
(map hash-map-count)))
(println (count-of-having-cv map-profile))
; ({CVなし 100} {CVあり 90})
(defn sort-idols-by-age [idols]
(->> idols
(sort-by #(get % "年齢"))
(map #(hash-map :name (get % "名前")
:age (get % "年齢")))))
(defn idol-age [idols f]
(-> idols
(println "属性ごとの最年少、最年長のアイドル")
(defn min-max-of-type [v]
(->> v
(group-by #(get % "タイプ"))
(map #(hash-map (key %) {:min (idol-age % first)
:max (idol-age % last)}))))
(println (min-max-of-type map-profile))
; ({Cu {:min {:age 0, :name 横山千佳}, :max {:age 27歳, :name 兵藤レナ}}} {Co {:min {:age 10歳, :name 佐城雪美}, :max {:age 31歳, :name 柊志乃}}} {Pa {:min {:age 0, :name 市原仁奈}, :max {:age 28歳, :name 片桐早苗}}})
(defn birthday-month [idol]
(-> idol
(get "誕生")
(str/split #"/")
(println "誕生日の月で集計")
(defn count-of-birthday-month [v]
(sort-by first (->> v
(group-by birthday-month)
(map hash-map-count))))
(println (count-of-birthday-month map-profile))
; ({01 13} {02 14} {03 16} {04 17} {05 15} {06 17} {07 17} {08 20} {09 17} {10 15} {11 15} {12 14})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment