blob: 2e599fa2b7c1820824528a42cabba6f3ee11b1c4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* Exposes flex API, merged from flex API of
* sub-segments.
*
* @lucene.experimental
*/
public final class MultiTerms extends Terms {
private final Terms[] subs;
private final ReaderSlice[] subSlices;
private final boolean hasFreqs;
private final boolean hasOffsets;
private final boolean hasPositions;
private final boolean hasPayloads;
/**
* Sole constructor. Use {@link #getTerms(IndexReader, String)} instead if possible.
*
* @param subs The {@link Terms} instances of all sub-readers.
* @param subSlices A parallel array (matching {@code
* subs}) describing the sub-reader slices.
* @lucene.internal
*/
public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException { //TODO make private?
this.subs = subs;
this.subSlices = subSlices;
assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
boolean _hasFreqs = true;
boolean _hasOffsets = true;
boolean _hasPositions = true;
boolean _hasPayloads = false;
for(int i=0;i<subs.length;i++) {
_hasFreqs &= subs[i].hasFreqs();
_hasOffsets &= subs[i].hasOffsets();
_hasPositions &= subs[i].hasPositions();
_hasPayloads |= subs[i].hasPayloads();
}
hasFreqs = _hasFreqs;
hasOffsets = _hasOffsets;
hasPositions = _hasPositions;
hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
}
/** This method may return null if the field does not exist or if it has no terms. */
public static Terms getTerms(IndexReader r, String field) throws IOException {
final List<LeafReaderContext> leaves = r.leaves();
if (leaves.size() == 1) {
return leaves.get(0).reader().terms(field);
}
final List<Terms> termsPerLeaf = new ArrayList<>(leaves.size());
final List<ReaderSlice> slicePerLeaf = new ArrayList<>(leaves.size());
for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) {
LeafReaderContext ctx = leaves.get(leafIdx);
Terms subTerms = ctx.reader().terms(field);
if (subTerms != null) {
termsPerLeaf.add(subTerms);
slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx));
}
}
if (termsPerLeaf.size() == 0) {
return null;
} else {
return new MultiTerms(termsPerLeaf.toArray(EMPTY_ARRAY),
slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY));
}
}
/** Returns {@link PostingsEnum} for the specified
* field and term. This will return null if the field or
* term does not exist or positions were not indexed.
* @see #getTermPostingsEnum(IndexReader, String, BytesRef, int) */
public static PostingsEnum getTermPostingsEnum(IndexReader r, String field, BytesRef term) throws IOException {
return getTermPostingsEnum(r, field, term, PostingsEnum.ALL);
}
/** Returns {@link PostingsEnum} for the specified
* field and term, with control over whether freqs, positions, offsets or payloads
* are required. Some codecs may be able to optimize
* their implementation when offsets and/or payloads are not
* required. This will return null if the field or term does not
* exist. See {@link TermsEnum#postings(PostingsEnum,int)}. */
public static PostingsEnum getTermPostingsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(term)) {
return termsEnum.postings(null, flags);
}
}
return null;
}
/** Expert: returns the Terms being merged. */
public Terms[] getSubTerms() {
return subs;
}
/** Expert: returns pointers to the sub-readers corresponding to the Terms being merged. */
public ReaderSlice[] getSubSlices() {
return subSlices;
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
@Override
public BytesRef getMin() throws IOException {
BytesRef minTerm = null;
for(Terms terms : subs) {
BytesRef term = terms.getMin();
if (minTerm == null || term.compareTo(minTerm) < 0) {
minTerm = term;
}
}
return minTerm;
}
@Override
public BytesRef getMax() throws IOException {
BytesRef maxTerm = null;
for(Terms terms : subs) {
BytesRef term = terms.getMax();
if (maxTerm == null || term.compareTo(maxTerm) > 0) {
maxTerm = term;
}
}
return maxTerm;
}
@Override
public TermsEnum iterator() throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].iterator();
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
@Override
public long size() {
return -1;
}
@Override
public long getSumTotalTermFreq() throws IOException {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumTotalTermFreq();
assert v != -1;
sum += v;
}
return sum;
}
@Override
public long getSumDocFreq() throws IOException {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumDocFreq();
assert v != -1;
sum += v;
}
return sum;
}
@Override
public int getDocCount() throws IOException {
int sum = 0;
for(Terms terms : subs) {
final int v = terms.getDocCount();
assert v != -1;
sum += v;
}
return sum;
}
@Override
public boolean hasFreqs() {
return hasFreqs;
}
@Override
public boolean hasOffsets() {
return hasOffsets;
}
@Override
public boolean hasPositions() {
return hasPositions;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
}