Created
February 25, 2010 15:01
-
-
Save rcampbell/314600 to your computer and use it in GitHub Desktop.
Rendering Calais tag aggregates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns calais.web | |
(:use compojure | |
[compojure.http response] | |
[clojure.contrib.def :only [defn-memo]] | |
[clojure.contrib.seq-utils :only [rand-elt]] | |
[clojure.contrib.str-utils :only [re-gsub]]) | |
(:require [calais.rdf :as rdf]) | |
(:import [java.io File])) | |
(declare percent) | |
(def title "Entity Recognition & Decoration Prototype") | |
(defn section-gen | |
([title] | |
(section-gen title #(identity [:li (:name %) | |
[:small {:style "color: blue;"} | |
(percent (:relevance %))]]))) | |
([title li-fn] | |
(fn [list] | |
(when-not (empty? list) | |
(let [rating (if ((first list) :score) :score :relevance)] | |
(html [:h3 title] | |
[:ul (map li-fn (reverse (sort-by rating list)))])))))) | |
(def add-companies (section-gen "Companies" | |
#(identity [:li (str (:name %) " [" (:ticker %) "]") | |
[:small {:style "color: blue;"} | |
(percent (:score %))]]))) | |
(def add-technologies (section-gen "Technologies")) | |
(def add-industry-terms (section-gen "Industry Terms")) | |
(def add-products (section-gen "Products")) | |
(def add-people (section-gen "People")) | |
(defn add-meta-section [file] | |
(let [m (rdf/ask-all file)] | |
(str (add-companies (m :companies)) | |
(add-technologies (m :technologies)) | |
(add-industry-terms (m :industry-terms)) | |
(add-products (m :products)) | |
(add-people (m :people))))) | |
(defn results-page [id] | |
(html | |
(doctype :html4) | |
[:html | |
[:head | |
[:title id]] | |
[:body | |
[:h1 id] | |
[:a {:href (str "/original/" id)} "Download DOC"] | |
(add-meta-section (File. (str "s:/idc-rdf/" id ".rdf"))) | |
[:br] | |
[:a {:href "/"} "Back"]]])) | |
(defn get-random-ids [num] | |
(let [all-ids (map #(apply str (drop-last 4 (.getName %))) | |
(filter #(> (.length %) 3072) (.listFiles (File. "S:/idc-rdf"))))] | |
(loop [some-ids #{}] | |
(if (= (count some-ids) num) some-ids | |
(recur (conj some-ids (rand-elt all-ids))))))) | |
(defn-memo show-tag-section [category] | |
(letfn [(sort-and-filter [category] | |
(rdf/sorted-by-val (rdf/filter-with | |
((deref rdf/freq) category))))] | |
(html | |
[:h3 (capitalize (name category))] | |
[:ul (pmap #(identity [:li | |
[:a {:href (str "/tag/" | |
(re-gsub #"\." "" | |
(url-encode (first %))))} | |
(first %)] | |
[:small " (" (second %) ")"]]) | |
(sort-and-filter category))]))) | |
(defn-memo count-docs [] | |
(count (.listFiles (File. "S:/idc-doc")))) | |
(defn percent [val] | |
(str " - " (int (* 100 (Float/valueOf val))) "%")) | |
(defn index-page [] | |
(html | |
(doctype :html4) | |
[:html | |
[:head | |
[:title title]] | |
[:body | |
[:table {:width "100%"} | |
[:tr | |
[:td {:width "50%" :valign "top"} | |
[:h4 "Enter any valid document number (container id):"] | |
(form-to [:post "/"] | |
(text-field :container-id) | |
(submit-button "Submit"))] | |
[:td {:width "50%" :valign "top"} | |
[:h4 "Some documents to get you started:"] | |
[:ul (map #(identity [:li [:a {:href %} %]]) (get-random-ids 10)) | |
[:li "..." (count-docs) " more..." [:a {:href "/"} "refresh"]]]]]] | |
[:hr] | |
[:table {:width "100%"} | |
[:tr | |
[:td {:valign "top" :width "25%"} (show-tag-section :companies)] | |
[:td {:valign "top" :width "25%"} (show-tag-section :technologies)] | |
[:td {:valign "top" :width "25%"} (show-tag-section :industry-terms)] | |
[:td {:valign "top" :width "25%"} (show-tag-section :products)]]]]])) | |
(defn doc-not-found [] | |
(html | |
(doctype :html4) | |
[:html | |
[:head | |
[:title title]] | |
[:body | |
[:h4 (str "The document you entered has not yet been run " | |
"through Calais. Please try another document.")] | |
[:a {:href "/"} "Try Again"]]])) | |
(defn tag-page [tag] | |
(html | |
(doctype :html4) | |
[:html | |
[:head | |
[:title "Documents tagged as " tag]] | |
[:body | |
[:h1 "Documents tagged as " tag] | |
[:ul (map #(identity [:li [:a {:href (str "/" %)} %]]) | |
((deref rdf/xref) tag))] | |
[:a {:href "/"} "Back"]]])) | |
(defroutes calais-app | |
(GET "/" | |
(index-page)) | |
(GET "/tag/:tag" | |
(tag-page (params :tag))) | |
(POST "/" | |
(try | |
(results-page (.trim (params :container-id))) | |
(catch Exception _ (doc-not-found)))) | |
(GET "/:id" | |
(try | |
(results-page (.trim (params :id))) | |
(catch Exception _ (doc-not-found)))) | |
(GET "/original/:id" | |
(let [filename (str (.trim (params :id)) ".doc")] | |
[{:header {"Content-Type" "application/msword" | |
"Content-Disposition" (str "attachment; filename=" | |
filename) | |
"Content-Type" "application/force-download"}} | |
(File. (str "S:/idc-doc/" filename))])) | |
(ANY "*" | |
(page-not-found))) | |
; (run-server {:port 8081} "/*" (servlet calais-app)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment