blob: 199b2f758834b4f069e30bb0c7bd3785fba891a6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.uniformsplit.sharedterms;
import java.io.IOException;
import java.util.List;
import java.util.RandomAccess;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
/**
* Combines {@link PostingsEnum} for the same term for a given field from multiple segments. It is
* used during segment merging.
*
* @lucene.experimental
*/
public class STMergingTermsEnum extends TermsEnum {
protected final String fieldName;
protected final MultiSegmentsPostingsEnum multiPostingsEnum;
protected BytesRef term;
/** Constructs a {@link STMergingTermsEnum} for a given field. */
protected STMergingTermsEnum(String fieldName, int numSegments) {
this.fieldName = fieldName;
multiPostingsEnum = new MultiSegmentsPostingsEnum(numSegments);
}
/**
* Resets this {@link STMergingTermsEnum} with a new term and its list of {@link
* STUniformSplitTermsWriter.SegmentPostings} to combine.
*
* @param segmentPostings List sorted by segment index.
*/
protected void reset(
BytesRef term, List<STUniformSplitTermsWriter.SegmentPostings> segmentPostings) {
this.term = term;
multiPostingsEnum.reset(segmentPostings);
}
@Override
public AttributeSource attributes() {
throw new UnsupportedOperationException();
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public SeekStatus seekCeil(BytesRef text) {
throw new UnsupportedOperationException();
}
@Override
public void seekExact(long ord) {
throw new UnsupportedOperationException();
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef term() {
return term;
}
@Override
public long ord() {
throw new UnsupportedOperationException();
}
@Override
public int docFreq() {
throw new UnsupportedOperationException();
}
@Override
public long totalTermFreq() {
throw new UnsupportedOperationException();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) {
multiPostingsEnum.setPostingFlags(flags);
return multiPostingsEnum;
}
@Override
public ImpactsEnum impacts(int flags) {
throw new UnsupportedOperationException();
}
@Override
public TermState termState() {
throw new UnsupportedOperationException();
}
@Override
public BytesRef next() {
throw new UnsupportedOperationException();
}
/**
* Combines multiple segments {@link PostingsEnum} as a single {@link PostingsEnum}, for one field
* and one term.
*
* <p>This {@link PostingsEnum} does not extend {@link
* org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum} because it updates the delegate
* for each segment.
*/
protected class MultiSegmentsPostingsEnum extends PostingsEnum {
protected final PostingsEnum[] reusablePostingsEnums;
protected List<STUniformSplitTermsWriter.SegmentPostings> segmentPostingsList;
protected int segmentIndex;
protected PostingsEnum postingsEnum;
protected boolean postingsEnumExhausted;
protected MergeState.DocMap docMap;
protected int docId;
protected int postingsFlags;
protected MultiSegmentsPostingsEnum(int numSegments) {
reusablePostingsEnums = new PostingsEnum[numSegments];
}
/**
* Resets/reuse this {@link PostingsEnum}.
*
* @param segmentPostingsList List of segment postings ordered by segment index.
*/
protected void reset(List<STUniformSplitTermsWriter.SegmentPostings> segmentPostingsList) {
assert segmentPostingsList instanceof RandomAccess
: "We optimize by accessing the list elements instead of creating an Iterator";
this.segmentPostingsList = segmentPostingsList;
segmentIndex = -1;
postingsEnumExhausted = true;
docId = -1;
}
protected void setPostingFlags(int flags) {
this.postingsFlags = flags;
}
@Override
public int freq() throws IOException {
return postingsEnum.freq();
}
@Override
public int nextPosition() throws IOException {
return postingsEnum.nextPosition();
}
@Override
public int startOffset() throws IOException {
return postingsEnum.startOffset();
}
@Override
public int endOffset() throws IOException {
return postingsEnum.endOffset();
}
@Override
public BytesRef getPayload() throws IOException {
return postingsEnum.getPayload();
}
@Override
public int docID() {
return docId;
}
@Override
public int nextDoc() throws IOException {
assert segmentPostingsList != null : "reset not called";
while (true) {
if (postingsEnumExhausted) {
if (segmentIndex == segmentPostingsList.size() - 1) {
return docId = NO_MORE_DOCS;
} else {
segmentIndex++;
STUniformSplitTermsWriter.SegmentPostings segmentPostings =
segmentPostingsList.get(segmentIndex);
postingsEnum = getPostings(segmentPostings);
postingsEnumExhausted = false;
docMap = segmentPostings.docMap;
}
}
int docId = postingsEnum.nextDoc();
if (docId == NO_MORE_DOCS) {
postingsEnumExhausted = true;
} else {
docId = docMap.get(docId);
if (docId != -1) {
assert docId > this.docId : "next docId=" + docId + ", current docId=" + this.docId;
return this.docId = docId;
}
}
}
}
protected PostingsEnum getPostings(STUniformSplitTermsWriter.SegmentPostings segmentPostings)
throws IOException {
// The field is present in the segment because it is one of the segments provided in the
// reset() method.
return reusablePostingsEnums[segmentPostings.segmentIndex] =
segmentPostings.getPostings(
fieldName, reusablePostingsEnums[segmentPostings.segmentIndex], postingsFlags);
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return 0; // Cost is not useful here.
}
}
}