Created
May 13, 2012 21:28
-
-
Save anvie/2690268 to your computer and use it in GitHub Desktop.
My lucene implementation test on Scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.ansvia.belajar.lucene | |
import org.apache.lucene.analysis.standard.StandardAnalyzer | |
import org.apache.lucene.util.Version | |
import org.apache.lucene.store.SimpleFSDirectory | |
import java.io.{FileReader, File} | |
import org.apache.lucene.document.{Field, Document} | |
import org.apache.lucene.queryParser.QueryParser | |
import org.apache.lucene.index.{IndexReader, IndexWriter} | |
import org.apache.lucene.search.{TopScoreDocCollector, IndexSearcher} | |
object LuceneTest { | |
val indexDirF = new File("lucene_index"); | |
val indexDir = new SimpleFSDirectory(indexDirF) | |
def doIndexing() { | |
val fileDir = new File("files_to_index"); | |
val luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_36) | |
val indexWriter = new IndexWriter(indexDir, luceneAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED) | |
val textFiles = fileDir.listFiles() | |
textFiles foreach { | |
f => | |
if (!f.isFile) { | |
return | |
} | |
println("Indexing " + f.getCanonicalPath + "...") | |
val textReader = new FileReader(f); | |
val doc = new Document() | |
doc.add(new Field("content", textReader)) | |
doc.add(new Field("path", f.getPath, Field.Store.YES, Field.Index.ANALYZED)) | |
indexWriter.addDocument(doc) | |
} | |
indexWriter.forceMerge(1, true) | |
indexWriter.close() | |
println("Done.") | |
} | |
def doSearch(query:String){ | |
val analyzer = new StandardAnalyzer(Version.LUCENE_36) | |
val q = (new QueryParser(Version.LUCENE_36, "content", analyzer)).parse(query) | |
val reader = IndexReader.open(indexDir) | |
val searcher = new IndexSearcher(reader) | |
val collector = TopScoreDocCollector.create(10, true) | |
searcher.search(q, collector) | |
val hits = collector.topDocs().scoreDocs | |
hits foreach { | |
hit => | |
val id = hit.doc | |
val doc = searcher.doc(id) | |
println("Found in: " + doc.get("path")) | |
} | |
} | |
def main(args: Array[String]): Unit = { | |
println("Lucene Test Indexer") | |
if (args.length < 1){ | |
println("USAGE: lucene-test [index|search] [?q]") | |
return | |
} | |
args(0) match { | |
case "index" => doIndexing() | |
case "search" => | |
val q = args(1) | |
println("Search for '" + q + "'") | |
doSearch(q) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment