blob: cc60b8ecafa60d957ee689913cf0fee1439db07b [file] [log] [blame]
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/** Holds common state used during segment merging.
*
* @lucene.experimental */
public class MergeState {
/**
* Remaps docids around deletes during merge
*/
public static abstract class DocMap {
DocMap() {}
/** Returns the mapped docID corresponding to the provided one. */
public abstract int get(int docID);
/** Returns the total number of documents, ignoring
* deletions. */
public abstract int maxDoc();
/** Returns the number of not-deleted documents. */
public final int numDocs() {
return maxDoc() - numDeletedDocs();
}
/** Returns the number of deleted documents. */
public abstract int numDeletedDocs();
/** Returns true if there are any deletions. */
public boolean hasDeletions() {
return numDeletedDocs() > 0;
}
/** Creates a {@link DocMap} instance appropriate for
* this reader. */
public static DocMap build(AtomicReader reader) {
final int maxDoc = reader.maxDoc();
if (!reader.hasDeletions()) {
return new NoDelDocMap(maxDoc);
}
final Bits liveDocs = reader.getLiveDocs();
return build(maxDoc, liveDocs);
}
static DocMap build(final int maxDoc, final Bits liveDocs) {
assert liveDocs != null;
final MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
docMap.add(i - del);
if (!liveDocs.get(i)) {
++del;
}
}
docMap.freeze();
final int numDeletedDocs = del;
assert docMap.size() == maxDoc;
return new DocMap() {
@Override
public int get(int docID) {
if (!liveDocs.get(docID)) {
return -1;
}
return (int) docMap.get(docID);
}
@Override
public int maxDoc() {
return maxDoc;
}
@Override
public int numDeletedDocs() {
return numDeletedDocs;
}
};
}
}
private static final class NoDelDocMap extends DocMap {
private final int maxDoc;
NoDelDocMap(int maxDoc) {
this.maxDoc = maxDoc;
}
@Override
public int get(int docID) {
return docID;
}
@Override
public int maxDoc() {
return maxDoc;
}
@Override
public int numDeletedDocs() {
return 0;
}
}
/** {@link SegmentInfo} of the newly merged segment. */
public final SegmentInfo segmentInfo;
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos fieldInfos;
/** Readers being merged. */
public final List<AtomicReader> readers;
/** Maps docIDs around deletions. */
public DocMap[] docMaps;
/** New docID base per reader. */
public int[] docBase;
/** Holds the CheckAbort instance, which is invoked
* periodically to see if the merge has been aborted. */
public final CheckAbort checkAbort;
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
/** Counter used for periodic calls to checkAbort
* @lucene.internal */
public int checkAbortCount;
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
// but is this really so expensive to compute again in different components, versus once in SM?
/** {@link SegmentReader}s that have identical field
* name/number mapping, so their stored fields and term
* vectors may be bulk merged. */
public SegmentReader[] matchingSegmentReaders;
/** How many {@link #matchingSegmentReaders} are set. */
public int matchedCount;
/** Sole constructor. */
MergeState(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, CheckAbort checkAbort) {
this.readers = readers;
this.segmentInfo = segmentInfo;
this.infoStream = infoStream;
this.checkAbort = checkAbort;
}
/**
* Class for recording units of work when merging segments.
*/
public static class CheckAbort {
private double workCount;
private final MergePolicy.OneMerge merge;
private final Directory dir;
/** Creates a #CheckAbort instance. */
public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
this.merge = merge;
this.dir = dir;
}
/**
* Records the fact that roughly units amount of work
* have been done since this method was last called.
* When adding time-consuming code into SegmentMerger,
* you should test different values for units to ensure
* that the time in between calls to merge.checkAborted
* is up to ~ 1 second.
*/
public void work(double units) throws MergePolicy.MergeAbortedException {
workCount += units;
if (workCount >= 10000.0) {
merge.checkAborted(dir);
workCount = 0;
}
}
/** If you use this: IW.close(false) cannot abort your merge!
* @lucene.internal */
static final MergeState.CheckAbort NONE = new MergeState.CheckAbort(null, null) {
@Override
public void work(double units) {
// do nothing
}
};
}
}