-
-
Save arjones/4051359 to your computer and use it in GitHub Desktop.
Lucene Facets Drill Down
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.facet.index.CategoryDocumentBuilder; | |
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; | |
import org.apache.lucene.facet.index.params.FacetIndexingParams; | |
import org.apache.lucene.facet.search.DrillDown; | |
import org.apache.lucene.facet.search.FacetsCollector; | |
import org.apache.lucene.facet.search.params.CountFacetRequest; | |
import org.apache.lucene.facet.search.params.FacetRequest; | |
import org.apache.lucene.facet.search.params.FacetSearchParams; | |
import org.apache.lucene.facet.search.results.FacetResult; | |
import org.apache.lucene.facet.search.results.FacetResultNode; | |
import org.apache.lucene.facet.taxonomy.CategoryPath; | |
import org.apache.lucene.facet.taxonomy.TaxonomyReader; | |
import org.apache.lucene.facet.taxonomy.TaxonomyWriter; | |
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader; | |
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.MatchAllDocsQuery; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.QueryWrapperFilter; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.List; | |
/** | |
* @author Tobi Knaup | |
*/ | |
public class FacetExample { | |
Directory indexDirectory; | |
Directory taxonomyDirectory; | |
public static void main(String[] args) throws IOException { | |
FacetExample example = new FacetExample(); | |
example.run(); | |
} | |
FacetExample() throws IOException { | |
this.indexDirectory = new RAMDirectory(); | |
this.taxonomyDirectory = new RAMDirectory(); | |
} | |
void run() throws IOException { | |
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)); | |
IndexWriter indexWriter = new IndexWriter(indexDirectory, config); | |
TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(taxonomyDirectory); | |
CategoryDocumentBuilder categoryDocumentBuilder = new CategoryDocumentBuilder(taxonomyWriter); | |
CategoryPath mission = new CategoryPath("neighborhood", "Mission"); | |
CategoryPath soma = new CategoryPath("neighborhood", "SOMA"); | |
// Add document 1, with neighborhood Mission | |
Document doc1 = new Document(); | |
List<CategoryPath> doc1Categories = new ArrayList<CategoryPath>(); | |
doc1Categories.add(mission); | |
categoryDocumentBuilder.setCategoryPaths(doc1Categories).build(doc1); | |
indexWriter.addDocument(doc1); | |
// Add document 1, with neighborhood SOMA | |
Document doc2 = new Document(); | |
List<CategoryPath> doc2Categories = new ArrayList<CategoryPath>(); | |
doc2Categories.add(soma); | |
categoryDocumentBuilder.setCategoryPaths(doc2Categories).build(doc2); | |
indexWriter.addDocument(doc2); | |
indexWriter.commit(); | |
taxonomyWriter.commit(); | |
IndexReader indexReader = IndexReader.open(indexDirectory, true); | |
IndexSearcher indexSearcher = new IndexSearcher(indexReader); | |
// Search for all documents | |
Query matchAllDocsQuery1 = new MatchAllDocsQuery(); | |
FacetsCollector facetsCollector1 = getFacetsCollector(); | |
indexSearcher.search(matchAllDocsQuery1, new QueryWrapperFilter(matchAllDocsQuery1), facetsCollector1); | |
// Output: | |
// SOMA: 1 | |
// Mission: 1 | |
System.out.println("All documents query"); | |
printFacetResult(facetsCollector1.getFacetResults()); | |
// Drill down query for the Mission | |
Query matchAllDocsQuery2 = new MatchAllDocsQuery(); | |
Query missionQuery = DrillDown.query(matchAllDocsQuery2, mission); | |
FacetsCollector facetsCollector2 = getFacetsCollector(); | |
indexSearcher.search(missionQuery, new QueryWrapperFilter(matchAllDocsQuery2), facetsCollector2); | |
// Output: | |
// Mission: 1 | |
// But I want the same facet counts like before. | |
// Is this possible without running two queries: one with the drill down query to retrieve the documents, | |
// and another one without the drill down query to count the facets? | |
System.out.println("Drill down query"); | |
printFacetResult(facetsCollector2.getFacetResults()); | |
} | |
void printFacetResult(List<FacetResult> facetResults) { | |
for (FacetResult facetResult : facetResults) { | |
FacetResultNode resultNode = facetResult.getFacetResultNode(); | |
if (resultNode.getNumSubResults() > 0) { | |
int numSubResults = resultNode.getNumSubResults(); | |
String facetName = resultNode.getLabel().lastComponent(); | |
for (FacetResultNode node : resultNode.getSubResults()) { | |
String label = node.getLabel().lastComponent(); | |
Integer count = (int) node.getValue(); | |
System.out.println(label + ": " + count); | |
} | |
} | |
} | |
} | |
FacetsCollector getFacetsCollector() throws IOException { | |
IndexReader indexReader = IndexReader.open(indexDirectory, true); | |
TaxonomyReader taxonomyReader = new LuceneTaxonomyReader(taxonomyDirectory); | |
CategoryPath neighborhood = new CategoryPath("neighborhood"); | |
FacetIndexingParams indexingParams = new DefaultFacetIndexingParams(); | |
FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams); | |
FacetRequest neighborhoodFacetRequest = new CountFacetRequest(neighborhood, 10); | |
facetSearchParams.addFacetRequest(neighborhoodFacetRequest); | |
return new FacetsCollector(facetSearchParams, indexReader, taxonomyReader); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment