| package org.apache.lucene.index.sorter; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.lucene.analysis.Analyzer; // javadocs |
| import org.apache.lucene.index.AtomicReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.MergePolicy; |
| import org.apache.lucene.index.MergeState; |
| import org.apache.lucene.index.MultiReader; |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.index.SegmentCommitInfo; |
| import org.apache.lucene.index.SegmentInfos; |
| import org.apache.lucene.index.SegmentReader; |
| import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; |
| |
| /** A {@link MergePolicy} that reorders documents according to a {@link Sort} |
| * before merging them. As a consequence, all segments resulting from a merge |
| * will be sorted while segments resulting from a flush will be in the order |
| * in which documents have been added. |
| * <p><b>NOTE</b>: Never use this policy if you rely on |
| * {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments} |
| * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. |
| * <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s |
| * so that the order of segments is predictable. For example, using |
| * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make |
| * the order of documents in a segment depend on the number of times the segment |
| * has been merged. |
| * @lucene.experimental */ |
| public final class SortingMergePolicy extends MergePolicy { |
| |
| /** |
| * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that |
| * this segment is sorted. |
| */ |
| public static final String SORTER_ID_PROP = "sorter"; |
| |
| class SortingOneMerge extends OneMerge { |
| |
| List<AtomicReader> unsortedReaders; |
| Sorter.DocMap docMap; |
| AtomicReader sortedView; |
| |
| SortingOneMerge(List<SegmentCommitInfo> segments) { |
| super(segments); |
| } |
| |
| @Override |
| public List<AtomicReader> getMergeReaders() throws IOException { |
| if (unsortedReaders == null) { |
| unsortedReaders = super.getMergeReaders(); |
| final AtomicReader atomicView; |
| if (unsortedReaders.size() == 1) { |
| atomicView = unsortedReaders.get(0); |
| } else { |
| final IndexReader multiReader = new MultiReader(unsortedReaders.toArray(new AtomicReader[unsortedReaders.size()])); |
| atomicView = SlowCompositeReaderWrapper.wrap(multiReader); |
| } |
| docMap = sorter.sort(atomicView); |
| sortedView = SortingAtomicReader.wrap(atomicView, docMap); |
| } |
| // a null doc map means that the readers are already sorted |
| return docMap == null ? unsortedReaders : Collections.singletonList(sortedView); |
| } |
| |
| @Override |
| public void setInfo(SegmentCommitInfo info) { |
| Map<String,String> diagnostics = info.info.getDiagnostics(); |
| diagnostics.put(SORTER_ID_PROP, sorter.getID()); |
| super.setInfo(info); |
| } |
| |
| private MonotonicAppendingLongBuffer getDeletes(List<AtomicReader> readers) { |
| MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer(); |
| int deleteCount = 0; |
| for (AtomicReader reader : readers) { |
| final int maxDoc = reader.maxDoc(); |
| final Bits liveDocs = reader.getLiveDocs(); |
| for (int i = 0; i < maxDoc; ++i) { |
| if (liveDocs != null && !liveDocs.get(i)) { |
| ++deleteCount; |
| } else { |
| deletes.add(deleteCount); |
| } |
| } |
| } |
| deletes.freeze(); |
| return deletes; |
| } |
| |
| @Override |
| public MergePolicy.DocMap getDocMap(final MergeState mergeState) { |
| if (unsortedReaders == null) { |
| throw new IllegalStateException(); |
| } |
| if (docMap == null) { |
| return super.getDocMap(mergeState); |
| } |
| assert mergeState.docMaps.length == 1; // we returned a singleton reader |
| final MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders); |
| return new MergePolicy.DocMap() { |
| @Override |
| public int map(int old) { |
| final int oldWithDeletes = old + (int) deletes.get(old); |
| final int newWithDeletes = docMap.oldToNew(oldWithDeletes); |
| return mergeState.docMaps[0].get(newWithDeletes); |
| } |
| }; |
| } |
| |
| } |
| |
| class SortingMergeSpecification extends MergeSpecification { |
| |
| @Override |
| public void add(OneMerge merge) { |
| super.add(new SortingOneMerge(merge.segments)); |
| } |
| |
| @Override |
| public String segString(Directory dir) { |
| return "SortingMergeSpec(" + super.segString(dir) + ", sorter=" + sorter + ")"; |
| } |
| |
| } |
| |
| /** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */ |
| public static boolean isSorted(AtomicReader reader, Sort sort) { |
| if (reader instanceof SegmentReader) { |
| final SegmentReader segReader = (SegmentReader) reader; |
| final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics(); |
| if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private MergeSpecification sortedMergeSpecification(MergeSpecification specification) { |
| if (specification == null) { |
| return null; |
| } |
| MergeSpecification sortingSpec = new SortingMergeSpecification(); |
| for (OneMerge merge : specification.merges) { |
| sortingSpec.add(merge); |
| } |
| return sortingSpec; |
| } |
| |
| final MergePolicy in; |
| final Sorter sorter; |
| final Sort sort; |
| |
| /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ |
| public SortingMergePolicy(MergePolicy in, Sort sort) { |
| this.in = in; |
| this.sorter = new Sorter(sort); |
| this.sort = sort; |
| } |
| |
| @Override |
| public MergeSpecification findMerges(MergeTrigger mergeTrigger, |
| SegmentInfos segmentInfos) throws IOException { |
| return sortedMergeSpecification(in.findMerges(mergeTrigger, segmentInfos)); |
| } |
| |
| @Override |
| public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, |
| int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge) |
| throws IOException { |
| return sortedMergeSpecification(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge)); |
| } |
| |
| @Override |
| public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos) |
| throws IOException { |
| return sortedMergeSpecification(in.findForcedDeletesMerges(segmentInfos)); |
| } |
| |
| @Override |
| public MergePolicy clone() { |
| return new SortingMergePolicy(in.clone(), sort); |
| } |
| |
| @Override |
| public void close() { |
| in.close(); |
| } |
| |
| @Override |
| public boolean useCompoundFile(SegmentInfos segments, |
| SegmentCommitInfo newSegment) throws IOException { |
| return in.useCompoundFile(segments, newSegment); |
| } |
| |
| @Override |
| public void setIndexWriter(IndexWriter writer) { |
| in.setIndexWriter(writer); |
| } |
| |
| @Override |
| public String toString() { |
| return "SortingMergePolicy(" + in + ", sorter=" + sorter + ")"; |
| } |
| |
| } |