Created
July 6, 2012 14:01
-
-
Save jsmorph/3060305 to your computer and use it in GitHub Desktop.
Lucenalog: Datalog interface to Lucene in 10 lines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns lucenalog.core | |
"Lucenalog = Datalog interface to Lucene in 10 lines. | |
Simple but powerful. | |
Use | |
(db/add (index) {:a \"foo\" :b \"bar\"}) | |
to index a map with Lucene. Then you can use the relation | |
'lucenalog-rel' from core.logic. That relation works on maps. Bound map | |
values produce a Lucene query, which returns matching maps. See | |
'lucenalog-test' for an example." | |
(:require [clucy.core :as db] | |
[clojure.core.logic :as logic])) | |
;; (defproject lucenalog "0.0.1-SNAPSHOT" | |
;; :description "Lucenalog = Datalog interface to Lucene in 10 lines" | |
;; :dependencies [[org.clojure/clojure "1.2.1"] | |
;; [clucy "0.3.0"] | |
;; [org.clojure/core.logic "0.6.6"] | |
;; ] | |
;; :main lucenalog.core) | |
(set! *warn-on-reflection* true) | |
;; Accessory functions. Not important. | |
(let [m (ref {:max-query-results 1024 | |
:verbose true})] | |
(defn config | |
"Lucenalog configuration. Call with no args to see the current | |
configuration. Get a configuration property's value by passing the | |
property to this function. Change the config using the two-argument | |
dispatch." | |
([] @m) | |
([k] (@m k)) | |
([k v] (dosync (alter m assoc k v))))) | |
(defonce index ;; Get the default Lucene index. | |
(let [i (db/memory-index)] | |
(fn [] i))) | |
(defn- note [& args] | |
"Println the args if (config :verbose). Return the last arg." | |
(when (config :verbose) | |
(apply println :note args)) | |
(last args)) | |
;; Generate the Lucene query string. | |
(defn- lucene-query | |
"Query Lucene based on the given object and Substitutions. The | |
query Q should be a map that has some values bound by substitutions A. | |
The generated Lucene query string looks like 'p1:v1 AND p2:v2', where | |
Q contains :p1 lv1 and :p2 lv2 and the substitutions take lv1 to v1 and | |
lv2 to v2. That query string is then used in the Lucene query." | |
([q a] | |
(db/search (index) | |
(let [walked | |
(logic/walk* a q) | |
query | |
(reduce str | |
(interpose " AND " | |
(map (fn [[k v]] | |
(str (name k) ":" v)) | |
(remove (comp logic/lvar? last) | |
walked))))] | |
(note :lucene-search walked q a :query query)) | |
(config :max-query-results)))) | |
;; The core.logic relation. | |
(defn lucenalog-rel [q] | |
"A clojure.core.logic relation backed by Lucene. Lucene query | |
generated by lucene-query based on the given map." | |
(fn [a] | |
(logic/to-stream | |
(map #(logic/unify a % q) | |
(lucene-query q a))))) | |
;; Tests and examples. | |
(defn lucenalog-test | |
"Simple check that Lucenalog is working. Returns true if things | |
look okay." | |
([] | |
;; Let's use our own in-memory Lucene index. | |
(binding [index (let [i (db/memory-index)] | |
(fn [] i))] | |
(with-open [i ^org.apache.lucene.store.Directory (index)] | |
;; Add a little data. | |
(doseq [m [{:a "a1" :b "z1"} | |
{:a "z1" :b "b1"} | |
{:a "a1" :b "z2"} | |
{:a "z1" :b "b2"}]] | |
(db/add i m)) | |
;; Check to see if we can chase a1 to b1 and b2 via z1. | |
(let [expect #{"b1" "b2"} | |
got (set | |
(logic/run* [q] | |
(logic/fresh [a b c] | |
(logic/== a "a1") | |
(lucenalog-rel {:a a :b b}) | |
(lucenalog-rel {:a b :b c}) | |
(logic/== q c)))) | |
passed? (= expect got)] | |
(println :test passed? :got got :expected expect) | |
passed?))))) | |
(defn -main ([& args] (lucenalog-test))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just a small improvement, instead of (subs (str k) 1) you can write (name k) to get the name of the key, e.g. (name :foo) returns the string "foo".