Last active
January 23, 2020 14:50
-
-
Save mika76/712d93ca5938b6a165cf32ad41b5e81a to your computer and use it in GitHub Desktop.
QueryVisitor for rewriting lucene.net query (add wildcards) - check https://stackoverflow.com/a/5748786/11421 - originally from Random Ramblings blog (https://web.archive.org/web/20130207075825/http://devhost.se/blog/post/2011/04/21/A-QueryVisitor-for-Lucene.aspx)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var queryParser = new QueryParser("f", new StandardAnalyzer()); | |
var query = queryParser.Parse("awesome rewrite^0.5 \"including one phrase\""); | |
var rewritten = new PrefixRewriter().Visit(query); | |
Console.WriteLine(query); | |
Console.WriteLine(rewritten); | |
// Outputs... | |
// f:awesome f:rewrite^0.5 f:"including one phrase" | |
// f:awesome* f:rewrite*^0.5 f:"including one phrase" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Lucene.Net.Index; | |
using Lucene.Net.Search; | |
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Reflection; | |
namespace Lucene.Helpers | |
{ | |
// https://stackoverflow.com/a/5748786/11421 | |
// https://web.archive.org/web/20130207075825/http://devhost.se/blog/post/2011/04/21/A-QueryVisitor-for-Lucene.aspx | |
public class QueryVisitor | |
{ | |
public virtual Query Visit(Query query) | |
{ | |
if (query is BooleanQuery) | |
return VisitBooleanQuery((BooleanQuery)query); | |
if (query is DisjunctionMaxQuery) | |
return VisitDisjunctionMaxQuery((DisjunctionMaxQuery)query); | |
if (query is MultiPhraseQuery) | |
return VisitMultiPhraseQuery((MultiPhraseQuery)query); | |
if (query is PhraseQuery) | |
return VisitPhraseQuery((PhraseQuery)query); | |
if (query is PrefixQuery) | |
return VisitPrefixQuery((PrefixQuery)query); | |
if (query is TermQuery) | |
return VisitTermQuery((TermQuery)query); | |
if (query is TermRangeQuery) | |
return VisitTermRangeQuery((TermRangeQuery)query); | |
return query; | |
} | |
protected virtual Query VisitMultiPhraseQuery(MultiPhraseQuery query) | |
{ | |
return query; | |
} | |
protected virtual Query VisitTermQuery(TermQuery query) | |
{ | |
var oldTerm = query.Term; | |
if (oldTerm == null) | |
return null; | |
var newTerm = VisitTerm(oldTerm); | |
var newQuery = (newTerm != null) ? new TermQuery(newTerm) : null; | |
return CopyBoost(query, newQuery); | |
} | |
protected virtual Query VisitBooleanQuery(BooleanQuery query) | |
{ | |
var newClauses = query.GetClauses() | |
.Select(clause => VisitBooleanClause(clause)) | |
.Where(clause => clause != null) | |
.ToList(); | |
if (newClauses.Count == 0) | |
return null; | |
var newQuery = new BooleanQuery(); | |
newClauses.ForEach(newQuery.Add); | |
return CopyBoost(query, newQuery); | |
} | |
protected virtual Query VisitPhraseQuery(PhraseQuery query) | |
{ | |
var newTerms = query.GetTerms() | |
.Select(t => VisitTerm(t)) | |
.Where(t => t != null) | |
.ToList(); | |
if (newTerms.Count == 0) | |
return null; | |
var newQuery = new PhraseQuery(); | |
newTerms.ForEach(newQuery.Add); | |
return CopyBoost(query, newQuery); | |
} | |
protected virtual Query VisitPrefixQuery(PrefixQuery query) | |
{ | |
var oldTerm = query.Prefix; | |
if (oldTerm == null) | |
return null; | |
var newTerm = VisitTerm(oldTerm); | |
if (newTerm == null) | |
return null; | |
return CopyBoost(query, new PrefixQuery(newTerm)); | |
} | |
protected virtual Query VisitDisjunctionMaxQuery(DisjunctionMaxQuery query) | |
{ | |
var queries = new List<Query>(); | |
var iterator = query.GetEnumerator(); | |
while (iterator.MoveNext()) | |
{ | |
var subQuery = (Query)iterator.Current; | |
if (subQuery != null) | |
subQuery = Visit(subQuery); | |
if (subQuery != null) | |
queries.Add(subQuery); | |
} | |
if (queries.Count == 0) | |
return null; | |
var tieBreakerMultiplierField = typeof(DisjunctionMaxQuery).GetField("tieBreakerMultiplier", BindingFlags.Instance | BindingFlags.NonPublic); | |
var tieBreakerMultiplier = (Single)tieBreakerMultiplierField.GetValue(query); | |
return new DisjunctionMaxQuery(queries, tieBreakerMultiplier); | |
} | |
protected virtual TermRangeQuery VisitTermRangeQuery(TermRangeQuery query) | |
{ | |
var oldField = query.Field; | |
var newField = VisitField(oldField); | |
if (newField == null) | |
return null; | |
var oldLowerTerm = query.LowerTerm; | |
var newLowerTerm = (oldLowerTerm != null) ? VisitTermText(oldLowerTerm) : null; | |
var newIncludesLower = query.IncludesLower || (newLowerTerm == null); | |
var oldUpperTerm = query.UpperTerm; | |
var newUpperTerm = (oldUpperTerm != null) ? VisitTermText(oldUpperTerm) : null; | |
var newIncludesUpper = query.IncludesUpper || (newUpperTerm == null); | |
return new TermRangeQuery(newField, newLowerTerm, newUpperTerm, newIncludesLower, newIncludesUpper); | |
} | |
protected virtual BooleanClause VisitBooleanClause(BooleanClause clause) | |
{ | |
var oldQuery = clause.Query; | |
if (oldQuery == null) | |
return null; | |
var oldOccur = clause.Occur; | |
//if (oldOccur == null) | |
// return null; | |
var newQuery = Visit(oldQuery); | |
if (newQuery == null) | |
return null; | |
var newOccur = VisitOccur(oldOccur); | |
return new BooleanClause(newQuery, newOccur); | |
} | |
protected virtual Occur VisitOccur(Occur occur) | |
{ | |
return occur; | |
} | |
protected virtual Term VisitTerm(Term term) | |
{ | |
var oldField = term.Field; | |
var newField = VisitField(oldField); | |
if (newField == null) | |
return null; | |
var oldTermText = term.Text; | |
var newTermText = VisitTermText(oldTermText); | |
if (newTermText == null) | |
return null; | |
return new Term(newField, newTermText); | |
} | |
protected virtual String VisitField(String field) | |
{ | |
return field; | |
} | |
protected virtual String VisitTermText(String termText) | |
{ | |
return termText; | |
} | |
protected virtual T CopyBoost<T>(Query source, T target) where T : Query | |
{ | |
if (target != null) | |
{ | |
var boost = source.Boost; | |
target.Boost = boost; | |
} | |
return target; | |
} | |
} | |
public class PrefixRewriter : QueryVisitor | |
{ | |
protected override Query VisitTermQuery(TermQuery query) | |
{ | |
var term = query.Term; | |
var newQuery = new PrefixQuery(term); | |
return CopyBoost(query, newQuery); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment