Skip to content

Instantly share code, notes, and snippets.

@humbhenri
Last active January 1, 2016 22:49
Show Gist options
  • Save humbhenri/8212582 to your computer and use it in GitHub Desktop.
Save humbhenri/8212582 to your computer and use it in GitHub Desktop.
Download images from a page in parallel
(ns get-images.core
(:require [clojure.tools.cli :refer [parse-opts]]
[net.cgrand.enlive-html :refer [select html-resource]]
[clojure.string :refer [split join]]
[clojure.java.io :refer [input-stream output-stream copy]])
(:import [java.net URL]
[java.io File])
(:gen-class))
(def options
[["-w" "--url URL" "site url"]
["-h" "--help"]])
(defn fetch-images [url dir]
(let [root-url (str "http://" (-> (URL. url) (.getHost)))
complete-url (fn [img-url]
(if (.startsWith img-url "/") (str root-url img-url) img-url))
get-url #(-> (:attrs %) (:src) (complete-url))
get-name #(-> (:attrs %) (:src) (split #"/") (last))
save-all-images (fn [images]
(dorun
(pmap #(do
(println "Fetching" (get-url %))
(with-open [in (input-stream (get-url %))
out (output-stream (File. dir (get-name %)))]
(copy in out)))
images)))]
(-> (html-resource (URL. url))
(select #{[:img]})
(save-all-images))))
(defn -main
[& args]
(let [{:keys [options arguments errors summary]} (parse-opts args options)
usage (str "Usage\n" summary)
exit (fn [status msg] (println msg) (System/exit status))
url (:url options)
dir (-> (URL. url) (.getHost))]
(cond
(:help options) (exit 0 usage)
(not url) (exit 1 usage)
errors (exit 1 (join \newline errors)))
(.mkdir (File. dir))
(fetch-images url dir)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment