blob: 1f4d339658ba05e5981b737906d727f2ef39b6b3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import static org.apache.lucene.index.IndexWriter.isCongruentSort;
/** Holds common state used during segment merging.
*
* @lucene.experimental */
public class MergeState {
/** Maps document IDs from old segments to document IDs in the new segment */
public final DocMap[] docMaps;
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos mergeFieldInfos;
/** Stored field producers being merged */
public final StoredFieldsReader[] storedFieldsReaders;
/** Term vector producers being merged */
public final TermVectorsReader[] termVectorsReaders;
/** Norms producers being merged */
public final NormsProducer[] normsProducers;
/** DocValues producers being merged */
public final DocValuesProducer[] docValuesProducers;
/** FieldInfos being merged */
public final FieldInfos[] fieldInfos;
/** Live docs for each reader */
public final Bits[] liveDocs;
/** Postings to merge */
public final FieldsProducer[] fieldsProducers;
/** Point readers to merge */
public final PointsReader[] pointsReaders;
/** Max docs per reader */
public final int[] maxDocs;
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
/** Indicates if the index needs to be sorted **/
public boolean needsIndexSort;
/** Sole constructor. */
MergeState(List<CodecReader> readers, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
verifyIndexSort(readers, segmentInfo);
this.infoStream = infoStream;
int numReaders = readers.size();
maxDocs = new int[numReaders];
fieldsProducers = new FieldsProducer[numReaders];
normsProducers = new NormsProducer[numReaders];
storedFieldsReaders = new StoredFieldsReader[numReaders];
termVectorsReaders = new TermVectorsReader[numReaders];
docValuesProducers = new DocValuesProducer[numReaders];
pointsReaders = new PointsReader[numReaders];
fieldInfos = new FieldInfos[numReaders];
liveDocs = new Bits[numReaders];
int numDocs = 0;
for(int i=0;i<numReaders;i++) {
final CodecReader reader = readers.get(i);
maxDocs[i] = reader.maxDoc();
liveDocs[i] = reader.getLiveDocs();
fieldInfos[i] = reader.getFieldInfos();
normsProducers[i] = reader.getNormsReader();
if (normsProducers[i] != null) {
normsProducers[i] = normsProducers[i].getMergeInstance();
}
docValuesProducers[i] = reader.getDocValuesReader();
if (docValuesProducers[i] != null) {
docValuesProducers[i] = docValuesProducers[i].getMergeInstance();
}
storedFieldsReaders[i] = reader.getFieldsReader();
if (storedFieldsReaders[i] != null) {
storedFieldsReaders[i] = storedFieldsReaders[i].getMergeInstance();
}
termVectorsReaders[i] = reader.getTermVectorsReader();
if (termVectorsReaders[i] != null) {
termVectorsReaders[i] = termVectorsReaders[i].getMergeInstance();
}
fieldsProducers[i] = reader.getPostingsReader().getMergeInstance();
pointsReaders[i] = reader.getPointsReader();
if (pointsReaders[i] != null) {
pointsReaders[i] = pointsReaders[i].getMergeInstance();
}
numDocs += reader.numDocs();
}
segmentInfo.setMaxDoc(numDocs);
this.segmentInfo = segmentInfo;
this.docMaps = buildDocMaps(readers, segmentInfo.getIndexSort());
}
// Remap docIDs around deletions
private DocMap[] buildDeletionDocMaps(List<CodecReader> readers) {
int totalDocs = 0;
int numReaders = readers.size();
DocMap[] docMaps = new DocMap[numReaders];
for (int i = 0; i < numReaders; i++) {
LeafReader reader = readers.get(i);
Bits liveDocs = reader.getLiveDocs();
final PackedLongValues delDocMap;
if (liveDocs != null) {
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
} else {
delDocMap = null;
}
final int docBase = totalDocs;
docMaps[i] = new DocMap() {
@Override
public int get(int docID) {
if (liveDocs == null) {
return docBase + docID;
} else if (liveDocs.get(docID)) {
return docBase + (int) delDocMap.get(docID);
} else {
return -1;
}
}
};
totalDocs += reader.numDocs();
}
return docMaps;
}
private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
if (indexSort == null) {
// no index sort ... we only must map around deletions, and rebase to the merged segment's docID space
return buildDeletionDocMaps(readers);
} else {
// do a merge sort of the incoming leaves:
long t0 = System.nanoTime();
DocMap[] result = MultiSorter.sort(indexSort, readers);
if (result == null) {
// already sorted so we can switch back to map around deletions
return buildDeletionDocMaps(readers);
} else {
needsIndexSort = true;
}
long t1 = System.nanoTime();
if (infoStream.isEnabled("SM")) {
infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
}
return result;
}
}
private static void verifyIndexSort(List<CodecReader> readers, SegmentInfo segmentInfo) {
Sort indexSort = segmentInfo.getIndexSort();
if (indexSort == null) {
return;
}
for (CodecReader leaf : readers) {
Sort segmentSort = leaf.getMetaData().getSort();
if (segmentSort == null || isCongruentSort(indexSort, segmentSort) == false) {
throw new IllegalArgumentException("index sort mismatch: merged segment has sort=" + indexSort +
" but to-be-merged segment has sort=" + (segmentSort == null ? "null" : segmentSort));
}
}
}
/** A map of doc IDs. */
public static abstract class DocMap {
/** Sole constructor */
public DocMap() {
}
/** Return the mapped docID or -1 if the given doc is not mapped. */
public abstract int get(int docID);
}
static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
docMapBuilder.add(i - del);
if (liveDocs.get(i) == false) {
++del;
}
}
return docMapBuilder.build();
}
}