Skip to content

Instantly share code, notes, and snippets.

@rjernst
Created June 19, 2015 05:18
Show Gist options
  • Save rjernst/4c0bb396b9ad981387af to your computer and use it in GitHub Desktop.
Save rjernst/4c0bb396b9ad981387af to your computer and use it in GitHub Desktop.
Index: src/java/org/apache/lucene/search/ConjunctionDISI.java
===================================================================
--- src/java/org/apache/lucene/search/ConjunctionDISI.java (revision 1681184)
+++ src/java/org/apache/lucene/search/ConjunctionDISI.java (working copy)
@@ -38,13 +38,7 @@
final List<DocIdSetIterator> allIterators = new ArrayList<>();
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
for (DocIdSetIterator iter : iterators) {
- TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(iter);
- if (twoPhaseIter != null) {
- allIterators.add(twoPhaseIter.approximation());
- twoPhaseIterators.add(twoPhaseIter);
- } else { // no approximation support, use the iterator as-is
- allIterators.add(iter);
- }
+ collapseSubs(iter, allIterators, twoPhaseIterators);
}
if (twoPhaseIterators.isEmpty()) {
@@ -54,6 +48,25 @@
}
}
+ // collapse sub conjunctions into this conjunction
+ private static void collapseSubs(DocIdSetIterator disi, List<DocIdSetIterator> allIterators, List<TwoPhaseIterator> twoPhaseIterators) {
+ if (disi instanceof ConjunctionDISI) {
+ ConjunctionDISI conjunction = (ConjunctionDISI)disi;
+ collapseSubs(conjunction.lead, allIterators, twoPhaseIterators);
+ for (DocIdSetIterator sub : conjunction.others) {
+ collapseSubs(sub, allIterators, twoPhaseIterators);
+ }
+ } else {
+ TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(disi);
+ if (twoPhaseIter != null) {
+ allIterators.add(twoPhaseIter.approximation());
+ twoPhaseIterators.add(twoPhaseIter);
+ } else { // no approximation support, use the iterator as-is
+ allIterators.add(disi);
+ }
+ }
+ }
+
final DocIdSetIterator lead;
final DocIdSetIterator[] others;
Index: src/test/org/apache/lucene/search/TestConjunctionDISI.java
===================================================================
--- src/test/org/apache/lucene/search/TestConjunctionDISI.java (revision 1681184)
+++ src/test/org/apache/lucene/search/TestConjunctionDISI.java (working copy)
@@ -19,6 +19,8 @@
import java.io.IOException;
import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.FixedBitSet;
@@ -244,4 +246,45 @@
}
}
+ public void testCollapseSubConjunctions() throws IOException {
+ final int iters = atLeast(100);
+ for (int iter = 0; iter < iters; ++iter) {
+ final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
+ final int numIterators = TestUtil.nextInt(random(), 5, 10);
+ final FixedBitSet[] sets = new FixedBitSet[numIterators];
+ final List<DocIdSetIterator> iterators = new LinkedList<>();
+ for (int i = 0; i < numIterators; ++i) {
+ final FixedBitSet set = randomSet(maxDoc);
+ if (random().nextBoolean()) {
+ // simple iterator
+ sets[i] = set;
+ iterators.add(new BitDocIdSet(set).iterator());
+ } else {
+ // scorer with approximation
+ final FixedBitSet confirmed = clearRandomBits(set);
+ sets[i] = confirmed;
+ final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
+ iterators.add(scorer(approximation));
+ }
+ }
+
+ // make some sub sequences into sub conjunctions
+ final int subIters = atLeast(3);
+ for (int subIter = 0; subIter < subIters && iterators.size() > 3; ++subIter) {
+ final int subSeqStart = TestUtil.nextInt(random(), 0, iterators.size() - 2);
+ final int subSeqEnd = TestUtil.nextInt(random(), subSeqStart + 2, iterators.size());
+ System.out.println("cutting [" + subSeqStart + ":" + subSeqEnd + ") from iterators of size " + iterators.size());
+ final ConjunctionDISI subConjunction = ConjunctionDISI.intersect(iterators.subList(subSeqStart, subSeqEnd));
+ iterators.set(subSeqStart, subConjunction);
+ int toRemove = subSeqEnd - subSeqStart - 1;
+ while (toRemove-- > 0) {
+ iterators.remove(subSeqStart + 1);
+ }
+ }
+
+ final ConjunctionDISI conjunction = ConjunctionDISI.intersect(iterators);
+ assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
+ }
+ }
+
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment