Created
December 4, 2011 04:49
-
-
Save mattak/1429207 to your computer and use it in GitHub Desktop.
bing image search api. PLEASE INSERT YOUR APIKEY IN USE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env clojure | |
(import '(org.apache.http HttpHost) | |
'(org.apache.http.client HttpClient) | |
'(org.apache.http.client.methods HttpGet) | |
'(org.apache.http.impl.client DefaultHttpClient) | |
'(org.apache.http.conn.params ConnRoutePNames) | |
'(java.io File | |
BufferedReader InputStreamReader | |
InputStream FileInputStream | |
OutputStreamWriter FileOutputStream)) | |
;(use '[clojure.contrib.duck-streams :only (reader writer read-lines write-lines)]) | |
(use '[clojure.xml :only (parse)]) | |
;; util | |
;;-------------- | |
(defn rm [file] | |
(.delete (File. file))) | |
(defn filetype [file] | |
(if-let [val (re-find #"\.([a-zA-Z]+)$" file)] | |
(.toLowerCase (second val)))) | |
;; stream | |
;;-------------- | |
(defn tois [f] | |
"String to InputStream" | |
(FileInputStream. (File. f))) | |
(defn toos [f] | |
"String to OutputStream" | |
(FileOutputStream. (File. f))) | |
(defn lazy-input [input-stream] | |
"lazy sequence for input character" | |
(let [step (fn step [] | |
(let [c (.read input-stream)] | |
(when-not (== c -1) | |
(cons (char c) (lazy-seq (step))))))] | |
(lazy-seq (step)))) | |
(defn lazy-input-line [input-stream] | |
"lazy sequence for input line" | |
(let [rd (BufferedReader. (InputStreamReader. input-stream))] | |
(let [step (fn step [] | |
(let [line (.readLine rd)] | |
(when-not (= line nil) | |
(cons line (lazy-seq (step))))))] | |
(lazy-seq (step))))) | |
(defn is2str [#^InputStream is] | |
"get context string from input stream" | |
(apply str (lazy-input-line is))) | |
(defn is2file [#^InputStream is outpath] | |
"save context string from inputstream" | |
(with-open [os (FileOutputStream. (File. outpath))] | |
(let [data (byte-array (int (Math/pow 2 13)))] | |
(loop [] | |
(let [readed (. is read data)] | |
(when-not (== readed -1) | |
(.write os data 0 readed) | |
(recur))))))) | |
;; http | |
;;-------------- | |
(defn env_proxy_and_port [] | |
(let [orig (System/getenv "http_proxy")] | |
(if (nil? orig) | |
nil | |
(let [host_port (apply str (filter #(not (= %1 \/)) (nth (.split orig "\\/\\/") 1)))] | |
(let [host_port (.split host_port ":")] | |
[(nth host_port 0) (Integer/parseInt (nth host_port 1))] | |
))))) | |
(defn httpget [url & [{:keys [host port save]}]] | |
(let [client (DefaultHttpClient.) | |
method (HttpGet. url) | |
proxy (if (and host port) | |
(HttpHost. host port) | |
nil)] | |
(if proxy | |
(doto (.getParams client) | |
(.setParameter ConnRoutePNames/DEFAULT_PROXY proxy))) | |
(if-let [response | |
(try (. client execute method) | |
(catch Exception e (.println *err* "Error") nil))] | |
(if save | |
(is2file (.. response getEntity getContent) save) | |
(is2str (.. response getEntity getContent)) | |
)))) | |
(defn httpget-autoproxy [url & [{:keys [save]}]] | |
(if-let [prx (env_proxy_and_port)] | |
(if save | |
(httpget url {:host (first prx) :port (second prx) :save save}) | |
(httpget url {:host (first prx) :port (second prx)})) | |
(if save | |
(httpget url {:save save}) | |
(httpget url)))) | |
;; xml | |
;;-------------- | |
(defn xml-bytag [xml tag] | |
(for [elm (xml-seq xml) :when (= tag (:tag elm))] | |
(:content elm))) | |
(defn content1 [elm] | |
(first (:content elm))) | |
(defn imgcount [xml] | |
(Integer/parseInt (-> (xml-bytag xml :mms:Total) first first))) | |
(defn imglist [xml] | |
(map first | |
(for [results (xml-bytag xml :mms:ImageResult)] | |
(for [res results :when (= :mms:MediaUrl (:tag res))] | |
(content1 res))))) | |
(defn getsaveall [urllist namelist] | |
(doseq [lst (map #(vector %1 %2) urllist namelist)] | |
(.start | |
(Thread. | |
#(if-let [type (filetype (first lst))] | |
(httpget-autoproxy | |
(first lst) {:save (str (second lst) "." type)}) | |
(httpget-autoproxy | |
(first lst) {:save (str (second lst) ".jpg")}) | |
))))) | |
;; query | |
;;-------------- | |
(defn getnamelist [fmt nlst] | |
" \"%03d.jpg\" '(1 2 3 4)" | |
(map #(format fmt %1) | |
nlst)) | |
(defn queryurl [appkey query offset count] | |
(str "http://api.bing.net/xml.aspx?Appid=" | |
appkey | |
"&query=" query "&sources=image" | |
"&image.offset=" offset | |
"&image.count=" count)) | |
(defn divided-query [appkey query offset count] | |
(loop [ofs offset cnt count queries nil] | |
(cond (<= cnt 0) (reverse queries) | |
(<= cnt 50) | |
(recur (+ ofs cnt) | |
0 | |
(conj queries (queryurl appkey query ofs cnt))) | |
true | |
(recur (+ ofs cnt) | |
(- cnt 50) | |
(conj queries (queryurl appkey query ofs 50)))))) | |
(defn query2imglist [appkey query offset count] | |
(flatten | |
(for [s (divided-query appkey query offset count)] | |
(do | |
(httpget-autoproxy s {:save ".tmp.xml"}) | |
(let [xml (imglist (parse ".tmp.xml"))] | |
(rm ".tmp.xml") | |
xml))))) | |
;; main | |
;;-------------- | |
; usage | |
(defn usage-exit [] | |
(println "usage: search-word number? offset? format?") | |
(println " number: query image number, default 100") | |
(println " offset: query image offset, default 0") | |
(println " format: save file name format except extension, default \"%03d\"") | |
(println "ex:") | |
(println " bing cat 100 0 \"%cat03d\"") | |
(println " bing cat 200 100") | |
(System/exit 0)) | |
(defn args [n & default] | |
(if (> (count *command-line-args*) n) | |
(nth *command-line-args* n) | |
(if default | |
(first default) | |
nil))) | |
; argument check | |
(if (< (count *command-line-args*) 1) | |
(usage-exit)) | |
(def config { | |
:apikey "INSERT_YOUR_APIKEY" | |
:tmpxml "tmp.xml" | |
:search-word (args 0) | |
:query-number (Integer/parseInt (args 1 "100")) | |
:query-offset (Integer/parseInt (args 2 "0")) | |
:save-format (args 3 "%03d") | |
}) | |
(def namelist | |
(getnamelist | |
(:save-format config) | |
(range | |
(:query-offset config) | |
(+ (:query-offset config) (:query-number config))))) | |
(def urllist | |
(query2imglist | |
(:apikey config) | |
(:search-word config) | |
(:query-offset config) | |
(:query-number config))) | |
(getsaveall urllist namelist) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment