blob: 07227d2c501975f708b6dfedf3c4f1c464ed9211 [file] [log] [blame]
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.CollectionUtil;
/** A conjunction of DocIdSetIterators.
* This iterates over the doc ids that are present in each given DocIdSetIterator.
* <br>Public only for use in {@link org.apache.lucene.search.spans}.
* @lucene.internal
*/
public class ConjunctionDISI extends DocIdSetIterator {
/** Create a conjunction over the provided iterators, taking advantage of
* {@link TwoPhaseIterator}. */
public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
if (iterators.size() < 2) {
throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators");
}
final List<DocIdSetIterator> allIterators = new ArrayList<>();
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
for (DocIdSetIterator iter : iterators) {
addIterator(iter, allIterators, twoPhaseIterators);
}
if (twoPhaseIterators.isEmpty()) {
return new ConjunctionDISI(allIterators);
} else {
return new TwoPhase(allIterators, twoPhaseIterators);
}
}
/** Adds the iterator, possibly splitting up into two phases or collapsing if it is another conjunction */
private static void addIterator(DocIdSetIterator disi, List<DocIdSetIterator> allIterators, List<TwoPhaseIterator> twoPhaseIterators) {
// Check for exactly this class for collapsing. Subclasses can do their own optimizations.
if (disi.getClass() == ConjunctionScorer.class) {
addIterator(((ConjunctionScorer) disi).disi, allIterators, twoPhaseIterators);
} else if (disi.getClass() == ConjunctionDISI.class || disi.getClass() == TwoPhase.class) {
ConjunctionDISI conjunction = (ConjunctionDISI) disi;
// subconjuctions have already split themselves into two phase iterators and others, so we can take those
// iterators as they are and move them up to this conjunction
allIterators.add(conjunction.lead);
Collections.addAll(allIterators, conjunction.others);
if (conjunction.getClass() == TwoPhase.class) {
TwoPhase twoPhase = (TwoPhase) conjunction;
Collections.addAll(twoPhaseIterators, twoPhase.twoPhaseView.twoPhaseIterators);
}
} else {
TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(disi);
if (twoPhaseIter != null) {
allIterators.add(twoPhaseIter.approximation());
twoPhaseIterators.add(twoPhaseIter);
} else { // no approximation support, use the iterator as-is
allIterators.add(disi);
}
}
}
final DocIdSetIterator lead;
final DocIdSetIterator[] others;
ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
assert iterators.size() >= 2;
// Sort the array the first time to allow the least frequent DocsEnum to
// lead the matching.
CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
@Override
public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
return Long.compare(o1.cost(), o2.cost());
}
});
lead = iterators.get(0);
others = iterators.subList(1, iterators.size()).toArray(new DocIdSetIterator[0]);
}
protected boolean matches() throws IOException {
return true;
}
TwoPhaseIterator asTwoPhaseIterator() {
return null;
}
private int doNext(int doc) throws IOException {
for(;;) {
if (doc == NO_MORE_DOCS) {
// we need this check because it is only ok to call #matches when positioned
return NO_MORE_DOCS;
}
advanceHead: for(;;) {
for (DocIdSetIterator other : others) {
// invariant: docsAndFreqs[i].doc <= doc at this point.
// docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
// on the previous iteration and the advance on the lead scorer exactly matched.
if (other.docID() < doc) {
final int next = other.advance(doc);
if (next > doc) {
// DocsEnum beyond the current doc - break and advance lead to the new highest doc.
doc = lead.advance(next);
break advanceHead;
}
}
}
if (matches()) {
// success - all DocsEnums are on the same doc
return doc;
} else {
doc = lead.nextDoc();
break advanceHead;
}
}
}
}
@Override
public int advance(int target) throws IOException {
return doNext(lead.advance(target));
}
@Override
public int docID() {
return lead.docID();
}
@Override
public int nextDoc() throws IOException {
return doNext(lead.nextDoc());
}
@Override
public long cost() {
return lead.cost(); // overestimate
}
/**
* {@link TwoPhaseIterator} view of a {@link TwoPhase} conjunction.
*/
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
private final TwoPhaseIterator[] twoPhaseIterators;
private final float matchCost;
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
super(new ConjunctionDISI(iterators));
assert twoPhaseIterators.size() > 0;
CollectionUtil.timSort(twoPhaseIterators, new Comparator<TwoPhaseIterator>() {
@Override
public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
return Float.compare(o1.matchCost(), o2.matchCost());
}
});
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
// Compute the matchCost as the total matchCost of the sub iterators.
// TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight.
float totalMatchCost = 0;
for (TwoPhaseIterator tpi : twoPhaseIterators) {
totalMatchCost += tpi.matchCost();
}
matchCost = totalMatchCost;
}
@Override
public boolean matches() throws IOException {
for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first
if (twoPhaseIterator.matches() == false) {
return false;
}
}
return true;
}
@Override
public float matchCost() {
return matchCost;
}
}
/**
* A conjunction DISI built on top of approximations. This implementation
* verifies that documents actually match by consulting the provided
* {@link TwoPhaseIterator}s.
*
* Another important difference with {@link ConjunctionDISI} is that this
* implementation supports approximations too: the approximation of this
* impl is the conjunction of the approximations of the wrapped iterators.
* This allows eg. {@code +"A B" +C} to be approximated as
* {@code +(+A +B) +C}.
*/
// NOTE: this is essentially the same as TwoPhaseDocIdSetIterator.asDocIdSetIterator
// but is its own impl in order to be able to expose a two-phase view
private static class TwoPhase extends ConjunctionDISI {
final TwoPhaseConjunctionDISI twoPhaseView;
private TwoPhase(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
super(iterators);
twoPhaseView = new TwoPhaseConjunctionDISI(iterators, twoPhaseIterators);
}
@Override
public TwoPhaseConjunctionDISI asTwoPhaseIterator() {
return twoPhaseView;
}
@Override
protected boolean matches() throws IOException {
return twoPhaseView.matches();
}
}
}