blob: fa86505088d807ff05aa725c128d425d67bac866 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
/**
* A {@link MatchesIterator} that combines matches from a set of sub-iterators
*
* <p>Matches are sorted by their start positions, and then by their end positions, so that prefixes
* sort first. Matches may overlap, or be duplicated if they appear in more than one of the
* sub-iterators.
*/
final class DisjunctionMatchesIterator implements MatchesIterator {
/**
* Create a {@link DisjunctionMatchesIterator} over a list of terms
*
* <p>Only terms that have at least one match in the given document will be included
*/
static MatchesIterator fromTerms(
LeafReaderContext context, int doc, Query query, String field, List<Term> terms)
throws IOException {
Objects.requireNonNull(field);
for (Term term : terms) {
if (Objects.equals(field, term.field()) == false) {
throw new IllegalArgumentException(
"Tried to generate iterator from terms in multiple fields: expected ["
+ field
+ "] but got ["
+ term.field()
+ "]");
}
}
return fromTermsEnum(context, doc, query, field, asBytesRefIterator(terms));
}
private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
return new BytesRefIterator() {
int i = 0;
@Override
public BytesRef next() {
if (i >= terms.size()) return null;
return terms.get(i++).bytes();
}
};
}
/**
* Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link
* BytesRefIterator}
*
* <p>Only terms that have at least one match in the given document will be included
*/
static MatchesIterator fromTermsEnum(
LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms)
throws IOException {
Objects.requireNonNull(field);
Terms t = Terms.getTerms(context.reader(), field);
TermsEnum te = t.iterator();
PostingsEnum reuse = null;
for (BytesRef term = terms.next(); term != null; term = terms.next()) {
if (te.seekExact(term)) {
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
if (pe.advance(doc) == doc) {
return new TermsEnumDisjunctionMatchesIterator(
new TermMatchesIterator(query, pe), terms, te, doc, query);
} else {
reuse = pe;
}
}
}
return null;
}
// MatchesIterator over a set of terms that only loads the first matching term at construction,
// waiting until the iterator is actually used before it loads all other matching terms.
private static class TermsEnumDisjunctionMatchesIterator implements MatchesIterator {
private final MatchesIterator first;
private final BytesRefIterator terms;
private final TermsEnum te;
private final int doc;
private final Query query;
private MatchesIterator it = null;
TermsEnumDisjunctionMatchesIterator(
MatchesIterator first, BytesRefIterator terms, TermsEnum te, int doc, Query query) {
this.first = first;
this.terms = terms;
this.te = te;
this.doc = doc;
this.query = query;
}
private void init() throws IOException {
List<MatchesIterator> mis = new ArrayList<>();
mis.add(first);
PostingsEnum reuse = null;
for (BytesRef term = terms.next(); term != null; term = terms.next()) {
if (te.seekExact(term)) {
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
if (pe.advance(doc) == doc) {
mis.add(new TermMatchesIterator(query, pe));
reuse = null;
} else {
reuse = pe;
}
}
}
it = fromSubIterators(mis);
}
@Override
public boolean next() throws IOException {
if (it == null) {
init();
}
assert it != null;
return it.next();
}
@Override
public int startPosition() {
return it.startPosition();
}
@Override
public int endPosition() {
return it.endPosition();
}
@Override
public int startOffset() throws IOException {
return it.startOffset();
}
@Override
public int endOffset() throws IOException {
return it.endOffset();
}
@Override
public MatchesIterator getSubMatches() throws IOException {
return it.getSubMatches();
}
@Override
public Query getQuery() {
return it.getQuery();
}
}
static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
if (mis.size() == 0) return null;
if (mis.size() == 1) return mis.get(0);
return new DisjunctionMatchesIterator(mis);
}
private final PriorityQueue<MatchesIterator> queue;
private boolean started = false;
private DisjunctionMatchesIterator(List<MatchesIterator> matches) throws IOException {
queue =
new PriorityQueue<MatchesIterator>(matches.size()) {
@Override
protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
return a.startPosition() < b.startPosition()
|| (a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition())
|| (a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition());
}
};
for (MatchesIterator mi : matches) {
if (mi.next()) {
queue.add(mi);
}
}
}
@Override
public boolean next() throws IOException {
if (started == false) {
started = true;
return queue.size() > 0;
}
if (queue.top().next() == false) {
queue.pop();
}
if (queue.size() > 0) {
queue.updateTop();
return true;
}
return false;
}
@Override
public int startPosition() {
return queue.top().startPosition();
}
@Override
public int endPosition() {
return queue.top().endPosition();
}
@Override
public int startOffset() throws IOException {
return queue.top().startOffset();
}
@Override
public int endOffset() throws IOException {
return queue.top().endOffset();
}
@Override
public MatchesIterator getSubMatches() throws IOException {
return queue.top().getSubMatches();
}
@Override
public Query getQuery() {
return queue.top().getQuery();
}
}