| package org.apache.lucene.index; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.util.List; |
| |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; |
| |
| /** Holds common state used during segment merging. |
| * |
| * @lucene.experimental */ |
| public class MergeState { |
| |
| /** |
| * Remaps docids around deletes during merge |
| */ |
| public static abstract class DocMap { |
| |
| DocMap() {} |
| |
| /** Returns the mapped docID corresponding to the provided one. */ |
| public abstract int get(int docID); |
| |
| /** Returns the total number of documents, ignoring |
| * deletions. */ |
| public abstract int maxDoc(); |
| |
| /** Returns the number of not-deleted documents. */ |
| public final int numDocs() { |
| return maxDoc() - numDeletedDocs(); |
| } |
| |
| /** Returns the number of deleted documents. */ |
| public abstract int numDeletedDocs(); |
| |
| /** Returns true if there are any deletions. */ |
| public boolean hasDeletions() { |
| return numDeletedDocs() > 0; |
| } |
| |
| /** Creates a {@link DocMap} instance appropriate for |
| * this reader. */ |
| public static DocMap build(AtomicReader reader) { |
| final int maxDoc = reader.maxDoc(); |
| if (!reader.hasDeletions()) { |
| return new NoDelDocMap(maxDoc); |
| } |
| final Bits liveDocs = reader.getLiveDocs(); |
| return build(maxDoc, liveDocs); |
| } |
| |
| static DocMap build(final int maxDoc, final Bits liveDocs) { |
| assert liveDocs != null; |
| final MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer(); |
| int del = 0; |
| for (int i = 0; i < maxDoc; ++i) { |
| docMap.add(i - del); |
| if (!liveDocs.get(i)) { |
| ++del; |
| } |
| } |
| docMap.freeze(); |
| final int numDeletedDocs = del; |
| assert docMap.size() == maxDoc; |
| return new DocMap() { |
| |
| @Override |
| public int get(int docID) { |
| if (!liveDocs.get(docID)) { |
| return -1; |
| } |
| return (int) docMap.get(docID); |
| } |
| |
| @Override |
| public int maxDoc() { |
| return maxDoc; |
| } |
| |
| @Override |
| public int numDeletedDocs() { |
| return numDeletedDocs; |
| } |
| |
| }; |
| } |
| |
| } |
| |
| private static final class NoDelDocMap extends DocMap { |
| |
| private final int maxDoc; |
| |
| NoDelDocMap(int maxDoc) { |
| this.maxDoc = maxDoc; |
| } |
| |
| @Override |
| public int get(int docID) { |
| return docID; |
| } |
| |
| @Override |
| public int maxDoc() { |
| return maxDoc; |
| } |
| |
| @Override |
| public int numDeletedDocs() { |
| return 0; |
| } |
| } |
| |
| /** {@link SegmentInfo} of the newly merged segment. */ |
| public final SegmentInfo segmentInfo; |
| |
| /** {@link FieldInfos} of the newly merged segment. */ |
| public FieldInfos fieldInfos; |
| |
| /** Readers being merged. */ |
| public final List<AtomicReader> readers; |
| |
| /** Maps docIDs around deletions. */ |
| public DocMap[] docMaps; |
| |
| /** New docID base per reader. */ |
| public int[] docBase; |
| |
| /** Holds the CheckAbort instance, which is invoked |
| * periodically to see if the merge has been aborted. */ |
| public final CheckAbort checkAbort; |
| |
| /** InfoStream for debugging messages. */ |
| public final InfoStream infoStream; |
| |
| /** Counter used for periodic calls to checkAbort |
| * @lucene.internal */ |
| public int checkAbortCount; |
| |
| // TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging) |
| // but is this really so expensive to compute again in different components, versus once in SM? |
| |
| /** {@link SegmentReader}s that have identical field |
| * name/number mapping, so their stored fields and term |
| * vectors may be bulk merged. */ |
| public SegmentReader[] matchingSegmentReaders; |
| |
| /** How many {@link #matchingSegmentReaders} are set. */ |
| public int matchedCount; |
| |
| /** Sole constructor. */ |
| MergeState(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, CheckAbort checkAbort) { |
| this.readers = readers; |
| this.segmentInfo = segmentInfo; |
| this.infoStream = infoStream; |
| this.checkAbort = checkAbort; |
| } |
| |
| /** |
| * Class for recording units of work when merging segments. |
| */ |
| public static class CheckAbort { |
| private double workCount; |
| private final MergePolicy.OneMerge merge; |
| private final Directory dir; |
| |
| /** Creates a #CheckAbort instance. */ |
| public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { |
| this.merge = merge; |
| this.dir = dir; |
| } |
| |
| /** |
| * Records the fact that roughly units amount of work |
| * have been done since this method was last called. |
| * When adding time-consuming code into SegmentMerger, |
| * you should test different values for units to ensure |
| * that the time in between calls to merge.checkAborted |
| * is up to ~ 1 second. |
| */ |
| public void work(double units) throws MergePolicy.MergeAbortedException { |
| workCount += units; |
| if (workCount >= 10000.0) { |
| merge.checkAborted(dir); |
| workCount = 0; |
| } |
| } |
| |
| /** If you use this: IW.close(false) cannot abort your merge! |
| * @lucene.internal */ |
| static final MergeState.CheckAbort NONE = new MergeState.CheckAbort(null, null) { |
| @Override |
| public void work(double units) { |
| // do nothing |
| } |
| }; |
| } |
| } |