Skip to content

Instantly share code, notes, and snippets.

@elfenlaid
Last active December 29, 2015 20:59
Show Gist options
  • Save elfenlaid/7727000 to your computer and use it in GitHub Desktop.
Save elfenlaid/7727000 to your computer and use it in GitHub Desktop.
trying to get some fun with clojure
(ns hcache.core
(:require [clj-http.lite.client :as client]
[clojure.string :as string]
[clojure.core.async :as async :refer [>! <! >!! <!! go chan]])
(:import (java.io File)))
(def h {"User-Agent" "Mozilla/5.0 (Windows NT 6.1;) Gecko/20100101 Firefox/13.0.1"})
(defn page [url]
(:body (client/get url {:headers h})))
(defn page-imgs [page]
(letfn [(strip-url [url]
(string/replace url #"['\"()]" ""))]
(map strip-url (re-seq #"['\"(][^\s'\"()]+\.(?:png|jpg|jpeg)['\")]" page))))
(defn compose-path
([x] (some not-empty [x]))
([x y] (if (every? not-empty [x y])
(.. (File. x y) toString)
(some not-empty [x y])))
([x y & paths] (reduce compose-path (compose-path x y) paths)))
(defn resource-name [img]
(last (string/split img #"/")))
(defn move-url-to-dir [dir page url]
(if-let [n (resource-name url)]
(let [path (compose-path dir n)]
(string/replace page url path))
(page)))
(defn move-resources-to-dir [dir page urls]
(reduce (partial move-url-to-dir dir) page urls))
(defn move-resources-to-root [page urls]
(move-resources-to-dir "" page urls))
(defn add-charset-meta [page]
(let [meta-re #"<meta\s+charset="
meta-tag "<meta charset=utf-8>"]
(if-not (re-find meta-re page)
(str meta-tag page)
page)))
(defn save-http-page [page path]
(let [imgs (page-imgs page)
s-page (move-resources-to-root page imgs)
m-page (add-charset-meta s-page)]
(spit path m-page)))
(defn save-pages [pages]
(let [c (chan)]
(doseq [p pages]
(go (>! c [p (page p)])))
(loop [i 0]
(when (< i (count pages))
(let [[p body] (<!! c)
name (str "test-" (last (string/split p #"//")) ".html")]
(save-http-page body name))
(recur (inc i))))))
(save-pages ["http://google.com"
"http://dev.by"
"http://amazon.com"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment