Created
August 21, 2011 14:51
-
-
Save bru/1160694 to your computer and use it in GitHub Desktop.
In Memory Lucene Index in CRuby - ported from http://ikaisays.com/2010/04/25/jruby-in-memory-search-example-with-lucene-3-0-1/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$:.unshift './lib' | |
require 'rubygems' | |
require 'rjb' | |
require 'rjbextension' | |
Document = Rjb::import('org.apache.lucene.document.Document') | |
StandardAnalyzer = Rjb::import('org.apache.lucene.analysis.standard.StandardAnalyzer') | |
Field = Rjb::import('org.apache.lucene.document.Field') | |
IndexWriter = Rjb::import('org.apache.lucene.index.IndexWriter') | |
IndexWriterConfig = Rjb::import('org.apache.lucene.index.IndexWriterConfig') | |
ParseException = Rjb::import('org.apache.lucene.queryParser.ParseException') | |
QueryParser = Rjb::import('org.apache.lucene.queryParser.QueryParser') | |
RAMDirectory = Rjb::import('org.apache.lucene.store.RAMDirectory') | |
IndexSearcher = Rjb::import('org.apache.lucene.search.IndexSearcher') | |
Store = Rjb::import 'org.apache.lucene.document.Field$Store' | |
Index = Rjb::import 'org.apache.lucene.document.Field$Index' | |
Resolution = Rjb::import 'org.apache.lucene.document.DateTools$Resolution' | |
VERSION = org.apache.lucene.util.Version.LUCENE_33 | |
load_jvm(['-Xms128m', '-Xmx512m']) | |
def create_document(title, content) | |
doc = Document.new | |
doc.add Field.new("title", title, Store.YES, Index.NO) | |
doc.add Field.new("content", content, Store.YES, Index.ANALYZED) | |
doc | |
end | |
def create_index | |
idx = RAMDirectory.new | |
config = IndexWriterConfig.new(VERSION, StandardAnalyzer.new(VERSION)) | |
writer = IndexWriter.new(idx, config) | |
writer.add_document(create_document("Theodore Roosevelt", | |
"It behooves every man to remember that the work of the " + | |
"critic, is of altogether secondary importance, and that, " + | |
"in the end, progress is accomplished by the man who does " + | |
"things.")) | |
writer.add_document(create_document("Friedrich Hayek", | |
"The case for individual freedom rests largely on the " + | |
"recognition of the inevitable and universal ignorance " + | |
"of all of us concerning a great many of the factors on " + | |
"which the achievements of our ends and welfare depend.")) | |
writer.add_document(create_document("Ayn Rand", | |
"There is nothing to take a man's freedom away from " + | |
"him, save other men. To be free, a man must be free " + | |
"of his brothers.")) | |
writer.add_document(create_document("Mohandas Gandhi", | |
"Freedom is not worth having if it does not connote " + | |
"freedom to err.")) | |
writer.optimize | |
writer.close | |
idx | |
end | |
def search(searcher, query_string) | |
parser = QueryParser.new(VERSION, "content", StandardAnalyzer.new(VERSION)) | |
puts "Searching for #{query_string}" | |
query = parser.parse(query_string) | |
hits = searcher.search(query, 10) | |
hit_count = hits.totalHits | |
if hit_count.zero? | |
puts "No matching documents." | |
else | |
puts "%d total matching documents" % hit_count | |
puts "Hits for %s were found in quotes by:" % query_string | |
hits.scoreDocs.each_with_index do |score_doc, i| | |
doc_id = score_doc.doc | |
doc_score = score_doc.score | |
puts "doc_id: %s \t score: %s" % [doc_id, doc_score] | |
doc = searcher.doc(doc_id) | |
puts "%d. %s" % [i, doc.get("title")] | |
puts "Content: %s" % doc.get("content") | |
puts | |
end | |
end | |
end | |
def main | |
index = create_index | |
searcher = IndexSearcher.new(index) | |
search(searcher, "freedom") | |
search(searcher, "free"); | |
search(searcher, "progress or achievements"); | |
search(searcher, "wibble") | |
searcher.close | |
end | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment