blob: d258c6b1a5278a1bf3c3b52c5d36f1aa3da62757 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import static org.apache.lucene.index.IndexWriter.isCongruentSort;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
/**
* Holds common state used during segment merging.
*
* @lucene.experimental
*/
public class MergeState {
/** Maps document IDs from old segments to document IDs in the new segment */
public final DocMap[] docMaps;
// Only used by IW when it must remap deletes that arrived against the merging segments while a
// merge was running:
final DocMap[] leafDocMaps;
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos mergeFieldInfos;
/** Stored field producers being merged */
public final StoredFieldsReader[] storedFieldsReaders;
/** Term vector producers being merged */
public final TermVectorsReader[] termVectorsReaders;
/** Norms producers being merged */
public final NormsProducer[] normsProducers;
/** DocValues producers being merged */
public final DocValuesProducer[] docValuesProducers;
/** FieldInfos being merged */
public final FieldInfos[] fieldInfos;
/** Live docs for each reader */
public final Bits[] liveDocs;
/** Postings to merge */
public final FieldsProducer[] fieldsProducers;
/** Point readers to merge */
public final PointsReader[] pointsReaders;
/** Vector readers to merge */
public final VectorReader[] vectorReaders;
/** Max docs per reader */
public final int[] maxDocs;
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
/** Indicates if the index needs to be sorted * */
public boolean needsIndexSort;
/** Sole constructor. */
MergeState(List<CodecReader> originalReaders, SegmentInfo segmentInfo, InfoStream infoStream)
throws IOException {
this.infoStream = infoStream;
final Sort indexSort = segmentInfo.getIndexSort();
int numReaders = originalReaders.size();
leafDocMaps = new DocMap[numReaders];
List<CodecReader> readers = maybeSortReaders(originalReaders, segmentInfo);
maxDocs = new int[numReaders];
fieldsProducers = new FieldsProducer[numReaders];
normsProducers = new NormsProducer[numReaders];
storedFieldsReaders = new StoredFieldsReader[numReaders];
termVectorsReaders = new TermVectorsReader[numReaders];
docValuesProducers = new DocValuesProducer[numReaders];
pointsReaders = new PointsReader[numReaders];
vectorReaders = new VectorReader[numReaders];
fieldInfos = new FieldInfos[numReaders];
liveDocs = new Bits[numReaders];
int numDocs = 0;
for (int i = 0; i < numReaders; i++) {
final CodecReader reader = readers.get(i);
maxDocs[i] = reader.maxDoc();
liveDocs[i] = reader.getLiveDocs();
fieldInfos[i] = reader.getFieldInfos();
normsProducers[i] = reader.getNormsReader();
if (normsProducers[i] != null) {
normsProducers[i] = normsProducers[i].getMergeInstance();
}
docValuesProducers[i] = reader.getDocValuesReader();
if (docValuesProducers[i] != null) {
docValuesProducers[i] = docValuesProducers[i].getMergeInstance();
}
storedFieldsReaders[i] = reader.getFieldsReader();
if (storedFieldsReaders[i] != null) {
storedFieldsReaders[i] = storedFieldsReaders[i].getMergeInstance();
}
termVectorsReaders[i] = reader.getTermVectorsReader();
if (termVectorsReaders[i] != null) {
termVectorsReaders[i] = termVectorsReaders[i].getMergeInstance();
}
fieldsProducers[i] = reader.getPostingsReader().getMergeInstance();
pointsReaders[i] = reader.getPointsReader();
if (pointsReaders[i] != null) {
pointsReaders[i] = pointsReaders[i].getMergeInstance();
}
vectorReaders[i] = reader.getVectorReader();
if (vectorReaders[i] != null) {
vectorReaders[i] = vectorReaders[i].getMergeInstance();
}
numDocs += reader.numDocs();
}
segmentInfo.setMaxDoc(numDocs);
this.segmentInfo = segmentInfo;
this.docMaps = buildDocMaps(readers, indexSort);
}
// Remap docIDs around deletions
private DocMap[] buildDeletionDocMaps(List<CodecReader> readers) {
int totalDocs = 0;
int numReaders = readers.size();
DocMap[] docMaps = new DocMap[numReaders];
for (int i = 0; i < numReaders; i++) {
LeafReader reader = readers.get(i);
Bits liveDocs = reader.getLiveDocs();
final PackedLongValues delDocMap;
if (liveDocs != null) {
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
} else {
delDocMap = null;
}
final int docBase = totalDocs;
docMaps[i] =
new DocMap() {
@Override
public int get(int docID) {
if (liveDocs == null) {
return docBase + docID;
} else if (liveDocs.get(docID)) {
return docBase + (int) delDocMap.get(docID);
} else {
return -1;
}
}
};
totalDocs += reader.numDocs();
}
return docMaps;
}
private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
if (indexSort == null) {
// no index sort ... we only must map around deletions, and rebase to the merged segment's
// docID space
return buildDeletionDocMaps(readers);
} else {
// do a merge sort of the incoming leaves:
long t0 = System.nanoTime();
DocMap[] result = MultiSorter.sort(indexSort, readers);
if (result == null) {
// already sorted so we can switch back to map around deletions
return buildDeletionDocMaps(readers);
} else {
needsIndexSort = true;
}
long t1 = System.nanoTime();
if (infoStream.isEnabled("SM")) {
infoStream.message(
"SM",
String.format(
Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1 - t0) / 1000000.0));
}
return result;
}
}
private List<CodecReader> maybeSortReaders(
List<CodecReader> originalReaders, SegmentInfo segmentInfo) throws IOException {
// Default to identity:
for (int i = 0; i < originalReaders.size(); i++) {
leafDocMaps[i] =
new DocMap() {
@Override
public int get(int docID) {
return docID;
}
};
}
Sort indexSort = segmentInfo.getIndexSort();
if (indexSort == null) {
return originalReaders;
}
List<CodecReader> readers = new ArrayList<>(originalReaders.size());
for (CodecReader leaf : originalReaders) {
Sort segmentSort = leaf.getMetaData().getSort();
if (segmentSort == null || isCongruentSort(indexSort, segmentSort) == false) {
throw new IllegalArgumentException(
"index sort mismatch: merged segment has sort="
+ indexSort
+ " but to-be-merged segment has sort="
+ (segmentSort == null ? "null" : segmentSort));
}
readers.add(leaf);
}
return readers;
}
/** A map of doc IDs. */
public abstract static class DocMap {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
// Explicitly declared so that we have non-empty javadoc
protected DocMap() {}
/** Return the mapped docID or -1 if the given doc is not mapped. */
public abstract int get(int docID);
}
static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
final PackedLongValues.Builder docMapBuilder =
PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
docMapBuilder.add(i - del);
if (liveDocs.get(i) == false) {
++del;
}
}
return docMapBuilder.build();
}
}