Created
May 18, 2013 20:25
-
-
Save thaniaclair/5605672 to your computer and use it in GitHub Desktop.
Hello Lucene
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.StringField; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TopScoreDocCollector; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
public class HelloLucene { | |
public static void main(String[] args) throws Exception { | |
// 0. Specify the analyzer for tokenizing text. | |
// The same analyzer should be used for indexing and searching | |
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); | |
// 1. create the index | |
Directory index = new RAMDirectory(); | |
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); | |
IndexWriter w = new IndexWriter(index, config); | |
addDoc(w, "Lucene in Action", "193398817"); | |
addDoc(w, "Lucene for Dummies", "55320055Z"); | |
addDoc(w, "Managing Gigabytes", "55063554A"); | |
addDoc(w, "The Art of Computer Science", "9900333X"); | |
w.close(); | |
// 2. query | |
String querystr = args.length > 0 ? args[0] : "193398817"; | |
// the "title" arg specifies the default field to use | |
// when no field is explicitly specified in the query. | |
Query q = new QueryParser(Version.LUCENE_43, "isbn", analyzer) | |
.parse(querystr); | |
// 3. search | |
int hitsPerPage = 10; | |
IndexReader reader = DirectoryReader.open(index); | |
IndexSearcher searcher = new IndexSearcher(reader); | |
TopScoreDocCollector collector = TopScoreDocCollector.create( | |
hitsPerPage, true); | |
searcher.search(q, collector); | |
ScoreDoc[] hits = collector.topDocs().scoreDocs; | |
// 4. display results | |
System.out.println("Found " + hits.length + " hits."); | |
for (int i = 0; i < hits.length; ++i) { | |
int docId = hits[i].doc; | |
Document d = searcher.doc(docId); | |
System.out.println((i + 1) + ". " + d.get("isbn") + "\t" | |
+ d.get("title")); | |
} | |
// reader can only be closed when there | |
// is no need to access the documents any more. | |
reader.close(); | |
} | |
private static void addDoc(IndexWriter w, String title, String isbn) | |
throws IOException { | |
Document doc = new Document(); | |
doc.add(new TextField("title", title, Field.Store.YES)); | |
// use a string field for isbn because we don't want it tokenized | |
doc.add(new StringField("isbn", isbn, Field.Store.YES)); | |
w.addDocument(doc); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment