Last active
December 18, 2015 05:09
-
-
Save mocobeta/5730884 to your computer and use it in GitHub Desktop.
Lucene API カスタム QueryParser サンプル
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* 以下は、Apache Softoware Licence v2.0 の元に頒布されているコードに一部改変を加えたものです。 | |
* http://www.apache.org/licenses/LICENSE-2.0.txt | |
*/ | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.PhraseQuery; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.spans.SpanNearQuery; | |
import org.apache.lucene.search.spans.SpanTermQuery; | |
import org.apache.lucene.util.Version; | |
import org.apache.lucene.index.Term; | |
public class CustomQueryParser extends QueryParser { | |
public CustomQueryParser(Version matchVersion, String f, Analyzer a) { | |
super(matchVersion, f, a); | |
} | |
@Override | |
protected Query getFieldQuery(String field, String queryText, int slop) | |
throws ParseException { | |
Query orig = super.getFieldQuery(field, queryText, slop); | |
// PhraseQuery でない場合はそのまま返す | |
if (!(orig instanceof PhraseQuery)) { | |
return orig; | |
} | |
// PhraseQuery の場合は、SpanNearQuery に変換 | |
PhraseQuery pq = (PhraseQuery) orig; | |
Term[] terms = pq.getTerms(); | |
SpanTermQuery[] clauses = new SpanTermQuery[terms.length]; | |
for (int i = 0; i < terms.length; i++) { | |
clauses[i] = new SpanTermQuery(terms[i]); | |
} | |
// 語の出現順を意識するため、第3引数にtrueを指定 | |
SpanNearQuery query = new SpanNearQuery(clauses, slop, true); | |
return query; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* 以下は、Apache Softoware Licence v2.0 の元に頒布されているコードに一部改変を加えたものです。 | |
* http://www.apache.org/licenses/LICENSE-2.0.txt | |
*/ | |
import static org.junit.Assert.*; | |
import java.io.IOException; | |
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
import org.junit.Test; | |
public class CustomQueryParserTest { | |
@Test | |
public void testPhraseQuery() throws Exception { | |
Directory dir = new RAMDirectory(); | |
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, | |
new WhitespaceAnalyzer(Version.LUCENE_43)); | |
IndexWriter writer = new IndexWriter(dir, config); | |
Document doc = new Document(); | |
doc.add(new TextField("field", | |
"the quick brown fox jumped over the lazy dog", Store.YES)); | |
writer.addDocument(doc); | |
writer.close(); | |
IndexReader reader = DirectoryReader.open(dir); | |
IndexSearcher searcher = new IndexSearcher(reader); | |
// デフォルト QueryParser | |
QueryParser parser = new QueryParser(Version.LUCENE_43, "field", | |
new WhitespaceAnalyzer(Version.LUCENE_43)); | |
// カスタム QueryParser | |
QueryParser customParser = new CustomQueryParser(Version.LUCENE_43, | |
"field", new WhitespaceAnalyzer(Version.LUCENE_43)); | |
// slop に 1 を指定 | |
// デフォルトQueryParserでも、カスタムQueryParserでも同様にヒットする | |
assertTrue(matched(parser, searcher, "\"quick fox\"", 1)); | |
assertTrue(matched(customParser, searcher, "\"quick fox\"", 1)); | |
// slop に 3 を指定 | |
// デフォルトQueryParserの場合、(十分なslopを与えれば)フレーズ内の語の出現順が逆でもヒットする | |
assertTrue(matched(parser, searcher, "\"fox quick\"", 3)); | |
// カスタムQueryParserの場合、フレーズ内の語の出現順が合わないとヒットしない | |
assertFalse(matched(customParser, searcher, "\"fox quick\"", 3)); | |
} | |
private boolean matched(QueryParser parser, IndexSearcher searcher, | |
String phrase, int slop) throws IOException, ParseException { | |
parser.setPhraseSlop(slop); // slopをセット | |
Query query = parser.parse(phrase); | |
TopDocs docs = searcher.search(query, 10); | |
return docs.totalHits > 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment