bru · August 21, 2011 14:51
diff --git a/in_memory.rb b/in_memory.rb
 $:.unshift './lib'

 require 'rubygems'
 require 'rjb'
 require 'rjbextension'

 Document          = Rjb::import('org.apache.lucene.document.Document')
 StandardAnalyzer  = Rjb::import('org.apache.lucene.analysis.standard.StandardAnalyzer')
 Field             = Rjb::import('org.apache.lucene.document.Field')
 IndexWriter       = Rjb::import('org.apache.lucene.index.IndexWriter')
 IndexWriterConfig = Rjb::import('org.apache.lucene.index.IndexWriterConfig')
 ParseException    = Rjb::import('org.apache.lucene.queryParser.ParseException')
 QueryParser       = Rjb::import('org.apache.lucene.queryParser.QueryParser')
 RAMDirectory      = Rjb::import('org.apache.lucene.store.RAMDirectory')
 IndexSearcher     = Rjb::import('org.apache.lucene.search.IndexSearcher')

 Store       = Rjb::import 'org.apache.lucene.document.Field$Store'
 Index       = Rjb::import 'org.apache.lucene.document.Field$Index'
 Resolution  = Rjb::import 'org.apache.lucene.document.DateTools$Resolution'

 VERSION = org.apache.lucene.util.Version.LUCENE_33

 load_jvm(['-Xms128m', '-Xmx512m'])

 def create_document(title, content)
  doc = Document.new
  doc.add Field.new("title", title, Store.YES, Index.NO)
  doc.add Field.new("content", content, Store.YES, Index.ANALYZED)
  doc
 end

 def create_index
  idx     = RAMDirectory.new
  config = IndexWriterConfig.new(VERSION,  StandardAnalyzer.new(VERSION))
  writer  = IndexWriter.new(idx, config)

  writer.add_document(create_document("Theodore Roosevelt",
          "It behooves every man to remember that the work of the " +
                  "critic, is of altogether secondary importance, and that, " +
                  "in the end, progress is accomplished by the man who does " +
                  "things."))
  writer.add_document(create_document("Friedrich Hayek",
          "The case for individual freedom rests largely on the " +
                  "recognition of the inevitable and universal ignorance " +
                  "of all of us concerning a great many of the factors on " +
                  "which the achievements of our ends and welfare depend."))
  writer.add_document(create_document("Ayn Rand",
          "There is nothing to take a man's freedom away from " +
                  "him, save other men. To be free, a man must be free " +
                  "of his brothers."))
  writer.add_document(create_document("Mohandas Gandhi",
          "Freedom is not worth having if it does not connote " +
                  "freedom to err."))

  writer.optimize
  writer.close
  idx
 end

 def search(searcher, query_string)
  parser = QueryParser.new(VERSION, "content", StandardAnalyzer.new(VERSION))

  puts "Searching for #{query_string}"
  query = parser.parse(query_string)

  hits = searcher.search(query, 10)

  hit_count = hits.totalHits

  if hit_count.zero?
    puts "No matching documents."
  else
    puts "%d total matching documents" % hit_count

    puts "Hits for %s were found in quotes by:" % query_string

    hits.scoreDocs.each_with_index do |score_doc, i|
      doc_id = score_doc.doc
      doc_score = score_doc.score

      puts "doc_id: %s \t score: %s" % [doc_id, doc_score]

      doc = searcher.doc(doc_id)
      puts "%d. %s" % [i, doc.get("title")]
      puts "Content: %s" % doc.get("content")
      puts

    end

  end

 end

 def main
  index = create_index
  searcher = IndexSearcher.new(index)

  search(searcher, "freedom")
  search(searcher, "free");
  search(searcher, "progress or achievements");
  search(searcher, "wibble")

  searcher.close
 end

 main()
	$:.unshift './lib'

	require 'rubygems'
	require 'rjb'
	require 'rjbextension'

	Document = Rjb::import('org.apache.lucene.document.Document')
	StandardAnalyzer = Rjb::import('org.apache.lucene.analysis.standard.StandardAnalyzer')
	Field = Rjb::import('org.apache.lucene.document.Field')
	IndexWriter = Rjb::import('org.apache.lucene.index.IndexWriter')
	IndexWriterConfig = Rjb::import('org.apache.lucene.index.IndexWriterConfig')
	ParseException = Rjb::import('org.apache.lucene.queryParser.ParseException')
	QueryParser = Rjb::import('org.apache.lucene.queryParser.QueryParser')
	RAMDirectory = Rjb::import('org.apache.lucene.store.RAMDirectory')
	IndexSearcher = Rjb::import('org.apache.lucene.search.IndexSearcher')

	Store = Rjb::import 'org.apache.lucene.document.Field$Store'
	Index = Rjb::import 'org.apache.lucene.document.Field$Index'
	Resolution = Rjb::import 'org.apache.lucene.document.DateTools$Resolution'

	VERSION = org.apache.lucene.util.Version.LUCENE_33

	load_jvm(['-Xms128m', '-Xmx512m'])

	def create_document(title, content)
	doc = Document.new
	doc.add Field.new("title", title, Store.YES, Index.NO)
	doc.add Field.new("content", content, Store.YES, Index.ANALYZED)
	doc
	end

	def create_index
	idx = RAMDirectory.new
	config = IndexWriterConfig.new(VERSION, StandardAnalyzer.new(VERSION))
	writer = IndexWriter.new(idx, config)

	writer.add_document(create_document("Theodore Roosevelt",
	"It behooves every man to remember that the work of the " +
	"critic, is of altogether secondary importance, and that, " +
	"in the end, progress is accomplished by the man who does " +
	"things."))
	writer.add_document(create_document("Friedrich Hayek",
	"The case for individual freedom rests largely on the " +
	"recognition of the inevitable and universal ignorance " +
	"of all of us concerning a great many of the factors on " +
	"which the achievements of our ends and welfare depend."))
	writer.add_document(create_document("Ayn Rand",
	"There is nothing to take a man's freedom away from " +
	"him, save other men. To be free, a man must be free " +
	"of his brothers."))
	writer.add_document(create_document("Mohandas Gandhi",
	"Freedom is not worth having if it does not connote " +
	"freedom to err."))

	writer.optimize
	writer.close
	idx
	end

	def search(searcher, query_string)
	parser = QueryParser.new(VERSION, "content", StandardAnalyzer.new(VERSION))

	puts "Searching for #{query_string}"
	query = parser.parse(query_string)

	hits = searcher.search(query, 10)

	hit_count = hits.totalHits

	if hit_count.zero?
	puts "No matching documents."
	else
	puts "%d total matching documents" % hit_count

	puts "Hits for %s were found in quotes by:" % query_string

	hits.scoreDocs.each_with_index do \|score_doc, i\|
	doc_id = score_doc.doc
	doc_score = score_doc.score

	puts "doc_id: %s \t score: %s" % [doc_id, doc_score]

	doc = searcher.doc(doc_id)
	puts "%d. %s" % [i, doc.get("title")]
	puts "Content: %s" % doc.get("content")
	puts

	end

	end

	end

	def main
	index = create_index
	searcher = IndexSearcher.new(index)

	search(searcher, "freedom")
	search(searcher, "free");
	search(searcher, "progress or achievements");
	search(searcher, "wibble")

	searcher.close
	end

	main()