Last active
January 1, 2016 22:49
-
-
Save humbhenri/8212582 to your computer and use it in GitHub Desktop.
Download images from a page in parallel
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns get-images.core | |
(:require [clojure.tools.cli :refer [parse-opts]] | |
[net.cgrand.enlive-html :refer [select html-resource]] | |
[clojure.string :refer [split join]] | |
[clojure.java.io :refer [input-stream output-stream copy]]) | |
(:import [java.net URL] | |
[java.io File]) | |
(:gen-class)) | |
(def options | |
[["-w" "--url URL" "site url"] | |
["-h" "--help"]]) | |
(defn fetch-images [url dir] | |
(let [root-url (str "http://" (-> (URL. url) (.getHost))) | |
complete-url (fn [img-url] | |
(if (.startsWith img-url "/") (str root-url img-url) img-url)) | |
get-url #(-> (:attrs %) (:src) (complete-url)) | |
get-name #(-> (:attrs %) (:src) (split #"/") (last)) | |
save-all-images (fn [images] | |
(dorun | |
(pmap #(do | |
(println "Fetching" (get-url %)) | |
(with-open [in (input-stream (get-url %)) | |
out (output-stream (File. dir (get-name %)))] | |
(copy in out))) | |
images)))] | |
(-> (html-resource (URL. url)) | |
(select #{[:img]}) | |
(save-all-images)))) | |
(defn -main | |
[& args] | |
(let [{:keys [options arguments errors summary]} (parse-opts args options) | |
usage (str "Usage\n" summary) | |
exit (fn [status msg] (println msg) (System/exit status)) | |
url (:url options) | |
dir (-> (URL. url) (.getHost))] | |
(cond | |
(:help options) (exit 0 usage) | |
(not url) (exit 1 usage) | |
errors (exit 1 (join \newline errors))) | |
(.mkdir (File. dir)) | |
(fetch-images url dir))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment