Skip to content

Instantly share code, notes, and snippets.

@ga2arch
Created January 7, 2012 01:07
Show Gist options
  • Select an option

  • Save ga2arch/1573338 to your computer and use it in GitHub Desktop.

Select an option

Save ga2arch/1573338 to your computer and use it in GitHub Desktop.
pfetch
(ns clj-pfetch.core
(:use [clojure.java.shell :only (sh)]
[clojure.java.io :only (as-url input-stream)]
[clojure.string :only (blank?)])
(:require [clojure.xml :as xml]))
;;; Records
(defrecord Url [start href output freq args])
;;; Agents
(def running (agent true))
;;; Methods
(defn- read-file [filename]
(slurp filename))
(defn- parse-xml [data]
(-> (.getBytes data "UTF-8")
input-stream
xml/parse
xml-seq))
(defn- make-url [data]
(let [attrs (:attrs data)
href (:href attrs)
output (:output attrs)
freq (Integer/parseInt (:freq attrs))
content (:content data)
path (:path (:attrs (first content)))
args (map #(first (:content %)) (:content (first content)))]
(Url. (rand-int 10) href output freq (cons path args))))
(defn- make-urls [x]
(let [r-urls (:content (first x))]
(map make-url r-urls)))
(defn exec [args]
(try
(apply sh args)
(catch Exception e
nil)))
(defn- parse-headers [headers]
(let [hs (mapcat #(list (keyword (.getKey %))
(.get (.getValue %) 0)) headers)]
(apply hash-map hs)))
(defn cond-get [url & args]
(let [uri (as-url url)
con (.openConnection uri)
plus (first args)]
(when (:etag plus)
(.addRequestProperty con "If-None-Match" (:etag plus)))
(when (:last-modified plus)
(.addRequestProperty con "If-Modified-Since" (:last-modified plus)))
(let [resp (.getInputStream con)
status (.getResponseCode con)
headers (parse-headers (doto (.getHeaderFields con) .entrySet))
etag (:ETag headers)
last-modified (:Last-Modified headers)]
{:content (slurp resp)
:etag etag
:last-modified last-modified})))
(defn- process-url [url]
(Thread/sleep (* (:start url) 1000))
(loop [result (cond-get (:href url))]
(when-not (blank? (:content result))
(spit (:output url) (:content result))
(exec (cons (:output url) (:args url))))
(Thread/sleep (* (:freq url) 1000))
(if @running
(recur (cond-get (:href url) (dissoc result :content )))
nil)))
(defn process-urls [urls]
(map #(send (agent %) process-url) urls))
;;; Main
(defn -main [filename]
(let [content (read-file filename)
p-xml (parse-xml content)
urls (make-urls p-xml)]
(process-urls urls)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment