blob: 0659827c80ff468b07647f06e6dd19c9de85dfde [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MappedMultiFields;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.ReaderSlice;
/**
* Abstract API that consumes terms, doc, freq, prox, offset and payloads postings. Concrete
* implementations of this actually do "something" with the postings (write it into the index in a
* specific format).
*
* @lucene.experimental
*/
public abstract class FieldsConsumer implements Closeable {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected FieldsConsumer() {}
// TODO: can we somehow compute stats for you...?
// TODO: maybe we should factor out "limited" (only
// iterables, no counts/stats) base classes from
// Fields/Terms/Docs/AndPositions?
/**
* Write all fields, terms and postings. This the "pull" API, allowing you to iterate more than
* once over the postings, somewhat analogous to using a DOM API to traverse an XML tree.
*
* <p><b>Notes</b>:
*
* <ul>
* <li>You must compute index statistics, including each Term's docFreq and totalTermFreq, as
* well as the summary sumTotalTermFreq, sumTotalDocFreq and docCount.
* <li>You must skip terms that have no docs and fields that have no terms, even though the
* provided Fields API will expose them; this typically requires lazily writing the field or
* term until you've actually seen the first term or document.
* <li>The provided Fields instance is limited: you cannot call any methods that return
* statistics/counts; you cannot pass a non-null live docs when pulling docs/positions
* enums.
* </ul>
*/
public abstract void write(Fields fields, NormsProducer norms) throws IOException;
/**
* Merges in the fields from the readers in <code>mergeState</code>. The default implementation
* skips and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
* Implementations can override this method for more sophisticated merging (bulk-byte copying,
* etc).
*/
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
final List<Fields> fields = new ArrayList<>();
final List<ReaderSlice> slices = new ArrayList<>();
int docBase = 0;
for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
final FieldsProducer f = mergeState.fieldsProducers[readerIndex];
final int maxDoc = mergeState.maxDocs[readerIndex];
f.checkIntegrity();
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
fields.add(f);
docBase += maxDoc;
}
Fields mergedFields =
new MappedMultiFields(
mergeState,
new MultiFields(
fields.toArray(Fields.EMPTY_ARRAY), slices.toArray(ReaderSlice.EMPTY_ARRAY)));
write(mergedFields, norms);
}
// NOTE: strange but necessary so javadocs linting is happy:
@Override
public abstract void close() throws IOException;
}