Last active
November 18, 2017 00:39
-
-
Save mocobeta/5213521 to your computer and use it in GitHub Desktop.
Lucene in Action Chapter 5 : Sorting Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package example; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.util.Properties; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.IntField; | |
import org.apache.lucene.document.StoredField; | |
import org.apache.lucene.document.StringField; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.util.Version; | |
public class BookDataIndexer { | |
private Directory directory; | |
private String baseDir; | |
public BookDataIndexer(Directory directory, String baseDir) { | |
this.directory = directory; | |
this.baseDir = baseDir; | |
} | |
public void indexBookData() throws IOException { | |
File d = new File(baseDir); | |
IndexWriter writer = getWriter(); | |
indexBookData(writer, d); | |
writer.close(); | |
} | |
private void indexBookData(IndexWriter writer, File file) throws IOException { | |
if (file.isDirectory()) { | |
for (File f : file.listFiles()) { | |
indexBookData(writer, f); | |
} | |
} else { | |
String category = file.getParent().substring(baseDir.length()) | |
.replace(File.separatorChar, '/'); | |
Document doc = getDocument(category, file); | |
writer.addDocument(doc); | |
} | |
} | |
private IndexWriter getWriter() throws IOException { | |
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, | |
new StandardAnalyzer(Version.LUCENE_41)); | |
IndexWriter writer = new IndexWriter(directory, config); | |
return writer; | |
} | |
private Document getDocument(String category, File file) throws IOException { | |
Properties props = new Properties(); | |
props.load(new FileInputStream(file)); | |
String isbn = props.getProperty("isbn"); | |
String title = props.getProperty("title"); | |
String author = props.getProperty("author"); | |
String url = props.getProperty("url"); | |
String subject = props.getProperty("subject"); | |
String pubmonth = props.getProperty("pubmonth"); | |
Document doc = new Document(); | |
doc.add(new StringField("isbn", isbn, Store.YES)); | |
doc.add(new StringField("category", category, Store.YES)); | |
doc.add(new StringField("title", title, Store.YES)); | |
doc.add(new StringField("title2", title.toLowerCase(), Store.NO)); | |
for (String val : author.split(",")) { | |
doc.add(new StringField("author", val, Store.YES)); | |
} | |
doc.add(new StoredField("url", url)); | |
doc.add(new TextField("subject", subject, Store.NO)); | |
doc.add(new IntField("pubmonth", Integer.parseInt(pubmonth), Store.YES)); | |
String contents = title + " " + subject + " " + author; | |
doc.add(new TextField("contents", contents, Store.NO)); | |
return doc; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. スコア順でソート の結果 | |
Results for: *:* (contents:java contents:action) sorted by <score> | |
Title pubmonth id score | |
Java Development with Ant 200208 5 0.65223 | |
/technology/computers/programming | |
Lucene in Action 200406 7 0.46424 | |
/technology/computers/programming | |
Tapestry in Action 200403 9 0.46424 | |
/technology/computers/programming | |
JUnit in Action 200310 6 0.42828 | |
/technology/computers/programming | |
A Modern Art of Education 198106 0 0.14221 | |
/education/pedagogy | |
Imperial Secrets of Health... 199401 1 0.14221 | |
/health/alternative/chinese | |
Tao Te Ching 道德經 198810 2 0.14221 | |
/philosophy/eastern | |
Gödel, Escher, Bach: an Et... 197903 3 0.14221 | |
/technology/computers/ai | |
Mindstorms 198001 4 0.14221 | |
/technology/computers/programming/education | |
Extreme Programming Explained 199910 8 0.14221 | |
/technology/computers/programming/methodology | |
The Pragmatic Programmer 199910 10 0.14221 | |
/technology/computers/programming |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2. インデックス順でソート の結果 | |
Results for: *:* (contents:java contents:action) sorted by <doc> | |
Title pubmonth id score | |
A Modern Art of Education 198106 0 0.14221 | |
/education/pedagogy | |
Imperial Secrets of Health... 199401 1 0.14221 | |
/health/alternative/chinese | |
Tao Te Ching 道德經 198810 2 0.14221 | |
/philosophy/eastern | |
Gödel, Escher, Bach: an Et... 197903 3 0.14221 | |
/technology/computers/ai | |
Mindstorms 198001 4 0.14221 | |
/technology/computers/programming/education | |
Java Development with Ant 200208 5 0.65223 | |
/technology/computers/programming | |
JUnit in Action 200310 6 0.42828 | |
/technology/computers/programming | |
Lucene in Action 200406 7 0.46424 | |
/technology/computers/programming | |
Extreme Programming Explained 199910 8 0.14221 | |
/technology/computers/programming/methodology | |
Tapestry in Action 200403 9 0.46424 | |
/technology/computers/programming | |
The Pragmatic Programmer 199910 10 0.14221 | |
/technology/computers/programming |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3. 'category'フィールド(文字列)の昇順でソート の結果 | |
Results for: *:* (contents:java contents:action) sorted by <string: "category"> | |
Title pubmonth id score | |
A Modern Art of Education 198106 0 0.14221 | |
/education/pedagogy | |
Imperial Secrets of Health... 199401 1 0.14221 | |
/health/alternative/chinese | |
Tao Te Ching 道德經 198810 2 0.14221 | |
/philosophy/eastern | |
Gödel, Escher, Bach: an Et... 197903 3 0.14221 | |
/technology/computers/ai | |
Java Development with Ant 200208 5 0.65223 | |
/technology/computers/programming | |
JUnit in Action 200310 6 0.42828 | |
/technology/computers/programming | |
Lucene in Action 200406 7 0.46424 | |
/technology/computers/programming | |
Tapestry in Action 200403 9 0.46424 | |
/technology/computers/programming | |
The Pragmatic Programmer 199910 10 0.14221 | |
/technology/computers/programming | |
Mindstorms 198001 4 0.14221 | |
/technology/computers/programming/education | |
Extreme Programming Explained 199910 8 0.14221 | |
/technology/computers/programming/methodology |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4. 'pubmonth'フィールド(整数)の降順でソート の結果 | |
Results for: *:* (contents:java contents:action) sorted by <int: "pubmonth">! | |
Title pubmonth id score | |
Lucene in Action 200406 7 0.46424 | |
/technology/computers/programming | |
Tapestry in Action 200403 9 0.46424 | |
/technology/computers/programming | |
JUnit in Action 200310 6 0.42828 | |
/technology/computers/programming | |
Java Development with Ant 200208 5 0.65223 | |
/technology/computers/programming | |
Extreme Programming Explained 199910 8 0.14221 | |
/technology/computers/programming/methodology | |
The Pragmatic Programmer 199910 10 0.14221 | |
/technology/computers/programming | |
Imperial Secrets of Health... 199401 1 0.14221 | |
/health/alternative/chinese | |
Tao Te Ching 道德經 198810 2 0.14221 | |
/philosophy/eastern | |
A Modern Art of Education 198106 0 0.14221 | |
/education/pedagogy | |
Mindstorms 198001 4 0.14221 | |
/technology/computers/programming/education | |
Gödel, Escher, Bach: an Et... 197903 3 0.14221 | |
/technology/computers/ai |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5. 第2, 第3, ... ソートを与える例 の結果 | |
Results for: *:* (contents:java contents:action) sorted by <string: "category">,<score>,<int: "pubmonth">! | |
Title pubmonth id score | |
A Modern Art of Education 198106 0 0.14221 | |
/education/pedagogy | |
Imperial Secrets of Health... 199401 1 0.14221 | |
/health/alternative/chinese | |
Tao Te Ching 道德經 198810 2 0.14221 | |
/philosophy/eastern | |
Gödel, Escher, Bach: an Et... 197903 3 0.14221 | |
/technology/computers/ai | |
Java Development with Ant 200208 5 0.65223 | |
/technology/computers/programming | |
Lucene in Action 200406 7 0.46424 | |
/technology/computers/programming | |
Tapestry in Action 200403 9 0.46424 | |
/technology/computers/programming | |
JUnit in Action 200310 6 0.42828 | |
/technology/computers/programming | |
The Pragmatic Programmer 199910 10 0.14221 | |
/technology/computers/programming | |
Mindstorms 198001 4 0.14221 | |
/technology/computers/programming/education | |
Extreme Programming Explained 199910 8 0.14221 | |
/technology/computers/programming/methodology |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6. 配列で第2, 第3, ... ソートを配列で与える例 の結果 | |
Results for: *:* (contents:java contents:action) sorted by <score>,<string: "category"> | |
Title pubmonth id score | |
Java Development with Ant 200208 5 0.65223 | |
/technology/computers/programming | |
Lucene in Action 200406 7 0.46424 | |
/technology/computers/programming | |
Tapestry in Action 200403 9 0.46424 | |
/technology/computers/programming | |
JUnit in Action 200310 6 0.42828 | |
/technology/computers/programming | |
A Modern Art of Education 198106 0 0.14221 | |
/education/pedagogy | |
Imperial Secrets of Health... 199401 1 0.14221 | |
/health/alternative/chinese | |
Tao Te Ching 道德經 198810 2 0.14221 | |
/philosophy/eastern | |
Gödel, Escher, Bach: an Et... 197903 3 0.14221 | |
/technology/computers/ai | |
The Pragmatic Programmer 199910 10 0.14221 | |
/technology/computers/programming | |
Mindstorms 198001 4 0.14221 | |
/technology/computers/programming/education | |
Extreme Programming Explained 199910 8 0.14221 | |
/technology/computers/programming/methodology |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.BooleanClause.Occur; | |
import org.apache.lucene.search.BooleanQuery; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.MatchAllDocsQuery; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.Sort; | |
import org.apache.lucene.search.SortField; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
import com.ibm.icu.text.DecimalFormat; | |
import example.BookDataIndexer; | |
public class SortTest { | |
private Directory directory; | |
public SortTest() throws IOException { | |
directory = new RAMDirectory(); | |
new BookDataIndexer(directory, "bookdata").indexBookData(); | |
} | |
public void displayResults(Query query, Sort sort) throws IOException { | |
IndexReader r = DirectoryReader.open(directory); | |
IndexSearcher searcher = new IndexSearcher(r); | |
// search()メソッドコール | |
// 第1引数: Queryオブジェクト | |
// 第2引数: Filterオブジェクト (ここではフィルタをかけないので null) | |
// 第3引数: Sortオブジェクト | |
// 第4引数: ヒットした各ドキュメントのスコアを計算、返却する場合は true | |
// 第5引数: ヒットしたドキュメントのうち、最大のスコアを返却する場合は false | |
TopDocs results = searcher.search(query, null, 20, sort, true, false); | |
System.out.println("Results for: " + | |
query.toString() + " sorted by " + sort); | |
System.out.println( | |
StringUtils.rightPad("Title", 30) + | |
StringUtils.rightPad("pubmonth", 10) + | |
StringUtils.center("id", 4) + | |
StringUtils.center("score", 15)); | |
DecimalFormat scoreFormatter = new DecimalFormat("0.#####"); | |
for (ScoreDoc sd : results.scoreDocs) { | |
int docID = sd.doc; | |
float score = sd.score; | |
Document doc = searcher.doc(docID); | |
System.out.println( | |
StringUtils.rightPad( | |
StringUtils.abbreviate(doc.get("title"), 29), 30) + | |
StringUtils.rightPad(doc.get("pubmonth"), 10) + | |
StringUtils.center("" + docID, 4) + | |
StringUtils.leftPad( | |
scoreFormatter.format(score), 12)); | |
System.out.println(" " + doc.get("category")); | |
// System.out.print(searcher.explain(query, docID)); | |
} | |
r.close(); | |
} | |
public static void main(String[] args) throws Exception { | |
Query allBooks = new MatchAllDocsQuery(); | |
QueryParser parser = new QueryParser(Version.LUCENE_41, "contents", new StandardAnalyzer(Version.LUCENE_41)); | |
BooleanQuery query = new BooleanQuery(); | |
query.add(allBooks, Occur.SHOULD); | |
query.add(parser.parse("java OR action"), Occur.SHOULD); | |
SortTest test = new SortTest(); | |
/** 1. スコア順でソート */ | |
test.displayResults(query, Sort.RELEVANCE); | |
/** 2. インデックス順でソート */ | |
test.displayResults(query, Sort.INDEXORDER); | |
/** 3. 'category'フィールド(文字列)の昇順でソート */ | |
test.displayResults(query, new Sort(new SortField("category", SortField.Type.STRING))); | |
/** 4. 'pubmonth'フィールド(整数)の降順でソート*/ | |
test.displayResults(query, new Sort(new SortField("pubmonth", SortField.Type.INT, true))); | |
/** 5. 第2, 第3, ... ソートを与えることも可能。*/ | |
test.displayResults(query, | |
new Sort(new SortField("category", SortField.Type.STRING), | |
SortField.FIELD_SCORE, | |
new SortField("pubmonth", SortField.Type.INT, true))); | |
/** 配列で第2, 第3, ... ソートを配列で与える例 */ | |
test.displayResults(query, | |
new Sort(new SortField[]{ | |
SortField.FIELD_SCORE, | |
new SortField("category", SortField.Type.STRING)})); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment