Created
January 7, 2012 01:07
-
-
Save ga2arch/1573338 to your computer and use it in GitHub Desktop.
pfetch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns clj-pfetch.core | |
| (:use [clojure.java.shell :only (sh)] | |
| [clojure.java.io :only (as-url input-stream)] | |
| [clojure.string :only (blank?)]) | |
| (:require [clojure.xml :as xml])) | |
| ;;; Records | |
| (defrecord Url [start href output freq args]) | |
| ;;; Agents | |
| (def running (agent true)) | |
| ;;; Methods | |
| (defn- read-file [filename] | |
| (slurp filename)) | |
| (defn- parse-xml [data] | |
| (-> (.getBytes data "UTF-8") | |
| input-stream | |
| xml/parse | |
| xml-seq)) | |
| (defn- make-url [data] | |
| (let [attrs (:attrs data) | |
| href (:href attrs) | |
| output (:output attrs) | |
| freq (Integer/parseInt (:freq attrs)) | |
| content (:content data) | |
| path (:path (:attrs (first content))) | |
| args (map #(first (:content %)) (:content (first content)))] | |
| (Url. (rand-int 10) href output freq (cons path args)))) | |
| (defn- make-urls [x] | |
| (let [r-urls (:content (first x))] | |
| (map make-url r-urls))) | |
| (defn exec [args] | |
| (try | |
| (apply sh args) | |
| (catch Exception e | |
| nil))) | |
| (defn- parse-headers [headers] | |
| (let [hs (mapcat #(list (keyword (.getKey %)) | |
| (.get (.getValue %) 0)) headers)] | |
| (apply hash-map hs))) | |
| (defn cond-get [url & args] | |
| (let [uri (as-url url) | |
| con (.openConnection uri) | |
| plus (first args)] | |
| (when (:etag plus) | |
| (.addRequestProperty con "If-None-Match" (:etag plus))) | |
| (when (:last-modified plus) | |
| (.addRequestProperty con "If-Modified-Since" (:last-modified plus))) | |
| (let [resp (.getInputStream con) | |
| status (.getResponseCode con) | |
| headers (parse-headers (doto (.getHeaderFields con) .entrySet)) | |
| etag (:ETag headers) | |
| last-modified (:Last-Modified headers)] | |
| {:content (slurp resp) | |
| :etag etag | |
| :last-modified last-modified}))) | |
| (defn- process-url [url] | |
| (Thread/sleep (* (:start url) 1000)) | |
| (loop [result (cond-get (:href url))] | |
| (when-not (blank? (:content result)) | |
| (spit (:output url) (:content result)) | |
| (exec (cons (:output url) (:args url)))) | |
| (Thread/sleep (* (:freq url) 1000)) | |
| (if @running | |
| (recur (cond-get (:href url) (dissoc result :content ))) | |
| nil))) | |
| (defn process-urls [urls] | |
| (map #(send (agent %) process-url) urls)) | |
| ;;; Main | |
| (defn -main [filename] | |
| (let [content (read-file filename) | |
| p-xml (parse-xml content) | |
| urls (make-urls p-xml)] | |
| (process-urls urls))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment