Created
June 30, 2012 14:40
-
-
Save mocobeta/3024041 to your computer and use it in GitHub Desktop.
Lucene入門 4章 インデックス検索プログラム - Lucene 3.6 バージョン
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package searcher; | |
import java.io.File; | |
import java.io.IOException; | |
import java.lang.reflect.Constructor; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.search.TotalHitCountCollector; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.FSDirectory; | |
import org.apache.lucene.util.Version; | |
public abstract class BaseSearcher { | |
protected static final String DEFAULT_ANALYZER = | |
"org.apache.lucene.analysis.ja.JapaneseAnalyzer"; | |
protected static final String GOSEN_ANALYZER = | |
"org.apache.lucene.analysis.gosen.GosenAnalyzer"; | |
protected static final String CJK_ANALYZER = | |
"org.apache.lucene.analysis.cjk.CJKAnalyzer"; | |
protected static final String PROP_ANALYZER = "analyzer"; | |
protected Analyzer analyzer; | |
protected String indexDir; | |
private String analyzerClass; | |
protected BaseSearcher(String indexDir, String analyzerClass) { | |
this.indexDir = indexDir; | |
this.analyzerClass = analyzerClass; | |
} | |
/** Analyzerを取得する */ | |
protected Analyzer getAnalyzer() { | |
try { | |
if (analyzer == null) { | |
// Analyzerクラスを生成 | |
Class clazz = getClass().getClassLoader().loadClass(analyzerClass); | |
Constructor<Analyzer> constructor = clazz.getConstructor(Version.class); | |
Object[] args = new Object[]{ Version.LUCENE_36 }; | |
analyzer = constructor.newInstance(args); | |
System.out.println("* Analyzer : " + analyzerClass); | |
} | |
return analyzer; | |
} catch (Exception e) { | |
throw new SearcherException(e); | |
} | |
} | |
/** インデックス格納先Directoryを取得する */ | |
protected Directory getDirectory() { | |
try { | |
Directory d = FSDirectory.open(new File(indexDir)); | |
return d; | |
} catch (IOException e) { | |
throw new SearcherException(e); | |
} | |
} | |
/** 指定されたQueryでインデックスを検索する */ | |
protected void searchIndex(Query query) { | |
System.out.println("Query = \"" + query + "\""); | |
IndexReader reader = null; | |
IndexSearcher searcher = null; | |
try { | |
// IndexReader作成 | |
reader = IndexReader.open(getDirectory()); | |
// IndexReaderを与えてIndexSearcherを作成 | |
searcher = new IndexSearcher(reader); | |
// Queryで検索されるトータルドキュメント数を取得する | |
TotalHitCountCollector total = new TotalHitCountCollector(); | |
searcher.search(query, total); | |
int totalHits = total.getTotalHits(); | |
// ランキング上位(最大)10件のドキュメント取得する | |
TopDocs docs = searcher.search(query, 10); | |
ScoreDoc[] hits = docs.scoreDocs; | |
System.out.println(Integer.toString(totalHits) + " 件中 " + Integer.toString(hits.length) + " 件取得しました。"); | |
// ヒットしたドキュメントを表示する | |
for (ScoreDoc hit : hits) { | |
// ScoreDocオブジェクトはドキュメントIDしか持っていないので、searcherを経由してドキュメント本体を取得 | |
Document doc = searcher.doc(hit.doc); | |
printDocument(doc); | |
System.out.println(Integer.toString(hit.doc) + ", スコア = " + Float.toString(hit.score)); | |
} | |
} catch (IOException e) { | |
throw new SearcherException(e); | |
} finally { | |
try { | |
if (searcher != null) { | |
searcher.close(); | |
} | |
if (reader != null) { | |
reader.close(); | |
} | |
} catch (IOException e) {} | |
} | |
} | |
protected abstract void printDocument(Document doc); | |
public static class SearcherException extends RuntimeException { | |
public SearcherException(Exception e) { | |
super(e); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package searcher; | |
import indexer.BookIndexer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.queryParser.ParseException; | |
import org.apache.lucene.queryParser.QueryParser; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.util.NumericUtils; | |
import org.apache.lucene.util.Version; | |
public class BookSearcher extends BaseSearcher { | |
public static void main(String[] args) { | |
BookSearcher bs = new BookSearcher("book-index/kuromoji", DEFAULT_ANALYZER); | |
QueryParser qp = new QueryParser(Version.LUCENE_36, BookIndexer.F_TITLE, bs.getAnalyzer()); | |
try { | |
Query query = qp.parse("Java プログラミング"); | |
bs.searchIndex(query); | |
} catch (ParseException e) { | |
throw new SearcherException(e); | |
} | |
} | |
protected BookSearcher(String indexDir, String analyzerClass) { | |
super(indexDir, analyzerClass); | |
} | |
@Override | |
protected void printDocument(Document doc) { | |
System.out.println("============================================================================="); | |
System.out.println(BookIndexer.F_PUBLISHER + " = " + doc.get(BookIndexer.F_PUBLISHER)); | |
System.out.println(BookIndexer.F_CATEGORY + " = " + doc.get(BookIndexer.F_CATEGORY)); | |
System.out.println(BookIndexer.F_TITLE + " = " + doc.get(BookIndexer.F_TITLE)); | |
System.out.println(BookIndexer.F_AUTHOR + " = " + doc.get(BookIndexer.F_AUTHOR)); | |
System.out.println(NumericUtils.prefixCodedToLong(doc.get(BookIndexer.F_PAGES)) + " ページ"); | |
System.out.println(BookIndexer.F_ISBN + " = " + doc.get(BookIndexer.F_ISBN)); | |
System.out.println(BookIndexer.F_DATE + " = " + doc.get(BookIndexer.F_DATE)); | |
System.out.println(NumericUtils.prefixCodedToLong(doc.get(BookIndexer.F_PRICE)) + " 円"); | |
System.out.println(BookIndexer.F_SUMMARY + " = " + doc.get(BookIndexer.F_SUMMARY)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://mocobeta-backup.tumblr.com/post/26212093829/lucene-3-6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package searcher; | |
import indexer.PostIndexer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.queryParser.ParseException; | |
import org.apache.lucene.queryParser.QueryParser; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.util.Version; | |
public class PostSearcher extends BaseSearcher { | |
public static void main(String[] args) { | |
PostSearcher ps = new PostSearcher("post-index/kuromoji", DEFAULT_ANALYZER); | |
QueryParser qp = new QueryParser(Version.LUCENE_36, PostIndexer.F_NAME, ps.getAnalyzer()); | |
try { | |
Query q = qp.parse("日立製作所"); | |
ps.searchIndex(q); | |
} catch (ParseException e) { | |
throw new SearcherException(e); | |
} | |
} | |
protected PostSearcher(String indexDir, String analyzerClass) { | |
super(indexDir, analyzerClass); | |
} | |
@Override | |
protected void printDocument(Document doc) { | |
System.out.println("============================================================================="); | |
System.out.println(PostIndexer.F_CODE + " = " + doc.get(PostIndexer.F_CODE)); | |
System.out.println(PostIndexer.F_KANA + " = " + doc.get(PostIndexer.F_KANA)); | |
System.out.println(PostIndexer.F_NAME + " = " + doc.get(PostIndexer.F_NAME)); | |
System.out.println(PostIndexer.F_ADDR + " = " + doc.get(PostIndexer.F_ADDR)); | |
System.out.println(PostIndexer.F_CONTENT + " = " + doc.get(PostIndexer.F_CONTENT)); | |
System.out.println(PostIndexer.F_INDZIP + " = " + doc.get(PostIndexer.F_INDZIP)); | |
System.out.println(PostIndexer.F_ZIP + " = " + doc.get(PostIndexer.F_ZIP)); | |
System.out.println(PostIndexer.F_POST + " = " + doc.get(PostIndexer.F_POST)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment