Skip to content

Instantly share code, notes, and snippets.

@mocobeta
Last active December 18, 2015 05:09
Show Gist options
  • Save mocobeta/5730884 to your computer and use it in GitHub Desktop.
Save mocobeta/5730884 to your computer and use it in GitHub Desktop.
Lucene API カスタム QueryParser サンプル
/**
* 以下は、Apache Softoware Licence v2.0 の元に頒布されているコードに一部改変を加えたものです。
* http://www.apache.org/licenses/LICENSE-2.0.txt
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Version;
import org.apache.lucene.index.Term;
public class CustomQueryParser extends QueryParser {
public CustomQueryParser(Version matchVersion, String f, Analyzer a) {
super(matchVersion, f, a);
}
@Override
protected Query getFieldQuery(String field, String queryText, int slop)
throws ParseException {
Query orig = super.getFieldQuery(field, queryText, slop);
// PhraseQuery でない場合はそのまま返す
if (!(orig instanceof PhraseQuery)) {
return orig;
}
// PhraseQuery の場合は、SpanNearQuery に変換
PhraseQuery pq = (PhraseQuery) orig;
Term[] terms = pq.getTerms();
SpanTermQuery[] clauses = new SpanTermQuery[terms.length];
for (int i = 0; i < terms.length; i++) {
clauses[i] = new SpanTermQuery(terms[i]);
}
// 語の出現順を意識するため、第3引数にtrueを指定
SpanNearQuery query = new SpanNearQuery(clauses, slop, true);
return query;
}
}
/**
* 以下は、Apache Softoware Licence v2.0 の元に頒布されているコードに一部改変を加えたものです。
* http://www.apache.org/licenses/LICENSE-2.0.txt
*/
import static org.junit.Assert.*;
import java.io.IOException;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
public class CustomQueryParserTest {
@Test
public void testPhraseQuery() throws Exception {
Directory dir = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43,
new WhitespaceAnalyzer(Version.LUCENE_43));
IndexWriter writer = new IndexWriter(dir, config);
Document doc = new Document();
doc.add(new TextField("field",
"the quick brown fox jumped over the lazy dog", Store.YES));
writer.addDocument(doc);
writer.close();
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
// デフォルト QueryParser
QueryParser parser = new QueryParser(Version.LUCENE_43, "field",
new WhitespaceAnalyzer(Version.LUCENE_43));
// カスタム QueryParser
QueryParser customParser = new CustomQueryParser(Version.LUCENE_43,
"field", new WhitespaceAnalyzer(Version.LUCENE_43));
// slop に 1 を指定
// デフォルトQueryParserでも、カスタムQueryParserでも同様にヒットする
assertTrue(matched(parser, searcher, "\"quick fox\"", 1));
assertTrue(matched(customParser, searcher, "\"quick fox\"", 1));
// slop に 3 を指定
// デフォルトQueryParserの場合、(十分なslopを与えれば)フレーズ内の語の出現順が逆でもヒットする
assertTrue(matched(parser, searcher, "\"fox quick\"", 3));
// カスタムQueryParserの場合、フレーズ内の語の出現順が合わないとヒットしない
assertFalse(matched(customParser, searcher, "\"fox quick\"", 3));
}
private boolean matched(QueryParser parser, IndexSearcher searcher,
String phrase, int slop) throws IOException, ParseException {
parser.setPhraseSlop(slop); // slopをセット
Query query = parser.parse(phrase);
TopDocs docs = searcher.search(query, 10);
return docs.totalHits > 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment