Created
October 19, 2012 20:11
-
-
Save damionjunk/3920399 to your computer and use it in GitHub Desktop.
ZMQ Twitter ANEW Scoring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns zmq-anew3l.core | |
(:require [zmq-anew3l.zmqex.zhelper :as mq] | |
[cheshire.core :as json] | |
[clojure.tools.cli :as cli] | |
[clojure.tools.logging :as log] | |
[sentimental.anew :as anew])) | |
(defn queue-address [host port] (str "tcp://" host ":" port)) | |
(defn gen-all | |
"Takes a JSON (Clojure) input, and produces JSON (Clojure) output. | |
nil is returned if there is no score." | |
[c-jin] | |
(when-let [text (:text (:twitter c-jin))] | |
(let [anew-en (anew/score-phrase text :english) | |
anew-es (anew/score-phrase text :spanish) | |
anew-pt (anew/score-phrase text :portuguese)] | |
(when-let [fin (filter (fn [x] (not (empty? (:words (second x))))) | |
{:anew-en anew-en | |
:anew-es anew-es | |
:anew-pt anew-pt})] | |
(if (not (empty? fin)) (into {} fin)))))) | |
(defn gen-best-of3 | |
"Provides the 'Best of the 3' -- the score that matches the most keywords." | |
[c-jin] | |
(when-let [text (:text (:twitter c-jin))] | |
(anew/score-phrase-langs text))) | |
;; | |
;; A map of our score-generator functions, we can swap these out with | |
;; a command line param / lookup. | |
(def anew-fns {:bo3 gen-best-of3 :all gen-all}) | |
(defn score-stream | |
"Very basic ZMQ synchronous subscription based ANEW scoring | |
function." | |
[host port fn-keyword] | |
(let [ctx (mq/context 1) | |
subscriber (mq/socket ctx mq/sub) | |
syncclient (mq/socket ctx mq/req)] | |
(mq/connect subscriber (queue-address host port)) | |
(mq/subscribe subscriber "") | |
(mq/recv subscriber) | |
(loop [item (mq/recv-str subscriber)] | |
(do | |
(let [jmsg (json/parse-string item true) | |
anew-b ((fn-keyword anew-fns) jmsg)] | |
(when anew-b | |
;; Do something real here, for now, just print some stuff. | |
(println anew-b ) | |
)) | |
(recur (mq/recv-str subscriber)))))) | |
;; | |
;; Main | |
(defn -main | |
"Compiles down to public static void main(String[] args) for JVM entry point | |
from command line." | |
[& args] | |
(let [[opts args banner] (cli/cli args ["-lexicon" "ANEW lexicon CSV"] | |
["-host" "ZMQ Raw/Twitter Host" | |
:default "localhost"] | |
["-port" "ZMQ Raw/Twitter Port" | |
:default (str "30104")] | |
["-fn" "The scoring function one of bo3, all" | |
:default :bo3 | |
:parse-fn #(keyword %)] | |
["-h" "--help" "Show help." :flag true])] | |
;; If the required options are not present from the command line | |
;; print the banner and exit. | |
(when (or (:help opts) (nil? (:lexicon opts))) | |
(println banner) | |
(System/exit 0)) | |
;; Load the ANEW lexicon. Future version can read directly from S3. | |
;; (expect as a paramter a filename or URL) | |
(anew/load-lexicon (:lexicon opts)) | |
;; This is a "loop forever" process ... | |
(score-stream (:host opts) (:port opts) (:fn opts)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment