Last active
April 22, 2020 22:04
-
-
Save philippludwig/14e0d9b527a6522511ae79823adef73a to your computer and use it in GitHub Desktop.
Small example on how to use a CustomScoreQuery to influence lucene's ranking.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package luceneTest; | |
import java.io.IOException; | |
import java.util.HashSet; | |
import java.util.Set; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.FieldType; | |
import org.apache.lucene.document.StoredField; | |
import org.apache.lucene.document.StringField; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexOptions; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.IndexableField; | |
import org.apache.lucene.index.LeafReader; | |
import org.apache.lucene.index.LeafReaderContext; | |
import org.apache.lucene.queries.CustomScoreProvider; | |
import org.apache.lucene.queries.CustomScoreQuery; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.RAMDirectory; | |
public class WeightTest { | |
/** | |
* Make a Document object with an un-indexed title field and an indexed | |
* content field. | |
*/ | |
private static Document createDocument(String title, String content, Float value) { | |
Document doc = new Document(); | |
doc.add(new StringField("title", title, Field.Store.YES)); | |
doc.add(new StoredField("boost", value)); | |
FieldType type = new FieldType(); | |
type.setTokenized(true); | |
type.setStoreTermVectors(true); | |
type.setStored(false); | |
type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); | |
doc.add(new Field("content", content, type)); | |
return doc; | |
} | |
private static class MyScoreQuery extends CustomScoreQuery { | |
public MyScoreQuery(Query subQuery) { | |
super(subQuery); | |
} | |
private class MyScoreProvider extends CustomScoreProvider { | |
private LeafReader reader; | |
private Set<String> fieldsToLoad; | |
public MyScoreProvider(LeafReaderContext context) { | |
super(context); | |
reader = context.reader(); | |
fieldsToLoad = new HashSet<>(); | |
fieldsToLoad.add("boost"); | |
} | |
@Override | |
public float customScore(int doc_id, float currentScore, float valSrcScore) throws IOException { | |
Document doc = reader.document(doc_id, fieldsToLoad); | |
// Get boost value from index | |
float influence = 1f; | |
IndexableField field = doc.getField("boost"); | |
Number number = field.numericValue(); | |
float boost = number.floatValue() * influence; | |
return currentScore + boost; | |
} | |
} | |
@Override | |
public CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) { | |
return new MyScoreProvider(context); | |
} | |
} | |
/** | |
* Searches for the given string in the "content" field | |
*/ | |
private static void search(IndexSearcher searcher, String queryString) throws ParseException, IOException { | |
// Build a Query object | |
QueryParser parser = new QueryParser("content", new StandardAnalyzer()); | |
Query query = parser.parse(queryString); | |
// FunctionScoreQuery fsq = new FunctionScoreQuery(query, DoubleValuesSource.fromLongField("boost")); | |
MyScoreQuery msq = new MyScoreQuery(query); | |
// Search for the query | |
TopDocs hits = searcher.search(msq, 10); | |
// Examine the Hits object to see if there were any matches | |
int hitCount = hits.totalHits; | |
if (hitCount == 0) { | |
System.out.println("No matches were found for \"" + queryString + "\""); | |
} else { | |
System.out.println("Hits for \"" + queryString + "\":"); | |
// Iterate over the Documents in the Hits object | |
for (int i = 0; i < hitCount; i++) { | |
ScoreDoc doc = hits.scoreDocs[i]; | |
System.out.println(" " + (i + 1) + ". " + searcher.doc(doc.doc).get("title") + " - " + doc.score); | |
} | |
} | |
System.out.println(); | |
} | |
public static void main(String[] args) throws Exception { | |
RAMDirectory idx = new RAMDirectory(); | |
// Create Index | |
IndexWriter writer = new IndexWriter(idx, new IndexWriterConfig(new StandardAnalyzer())); | |
// Add some Document objects containing quotes | |
writer.addDocument(createDocument("Doc 1", "Some content", 20f)); | |
writer.addDocument(createDocument("Doc 2", "More content", 1.f)); | |
writer.addDocument(createDocument("Doc 3", "Even more content ", 10.f)); | |
// Close index | |
writer.close(); | |
// Build an IndexSearcher using the in-memory index | |
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx)); | |
// Run some queries | |
search(searcher, "content"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment