| diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefSliceComparator.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefSliceComparator.java |
| new file mode 100644 |
| index 0000000..41351c1 |
| --- /dev/null |
| +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefSliceComparator.java |
| @@ -0,0 +1,38 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.util; |
| + |
| +import java.util.Comparator; |
| + |
| +/** A {@link Comparator} for a slice of a {@link BytesRef}. |
| + * @lucene.internal */ |
| +public final class BytesRefSliceComparator implements Comparator<BytesRef> { |
| + |
| + final int offset, length; |
| + |
| + /** Sole constructor. */ |
| + public BytesRefSliceComparator(int offset, int length) { |
| + this.offset = offset; |
| + this.length = length; |
| + } |
| + |
| + @Override |
| + public int compare(BytesRef a, BytesRef b) { |
| + return StringHelper.compare(length, a.bytes, a.offset + offset, b.bytes, b.offset + offset); |
| + } |
| + |
| +} |
| diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java b/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java |
| index 346b908..b2bbf12 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java |
| +++ b/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java |
| @@ -105,6 +105,37 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray { |
| orderedEntries[i] = i; |
| } |
| |
| + if (comp instanceof BytesRefSliceComparator) { |
| + int offset = ((BytesRefSliceComparator) comp).offset; |
| + int length = ((BytesRefSliceComparator) comp).length; |
| + |
| + new StringMSBRadixSorter() { |
| + |
| + final BytesRef scratch; |
| + |
| + { |
| + scratch = new BytesRef(); |
| + scratch.length = length; |
| + } |
| + |
| + @Override |
| + protected void swap(int i, int j) { |
| + int o = orderedEntries[i]; |
| + orderedEntries[i] = orderedEntries[j]; |
| + orderedEntries[j] = o; |
| + } |
| + |
| + @Override |
| + protected BytesRef get(int i) { |
| + int index = orderedEntries[i]; |
| + scratch.bytes = blocks[index / valuesPerBlock]; |
| + scratch.offset = (index % valuesPerBlock) * valueLength + offset; |
| + return scratch; |
| + } |
| + }.sort(0, size()); |
| + return orderedEntries; |
| + } |
| + |
| final BytesRef pivot = new BytesRef(); |
| final BytesRef scratch1 = new BytesRef(); |
| final BytesRef scratch2 = new BytesRef(); |
| @@ -120,7 +151,7 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray { |
| orderedEntries[i] = orderedEntries[j]; |
| orderedEntries[j] = o; |
| } |
| - |
| + |
| @Override |
| protected int compare(int i, int j) { |
| int index1 = orderedEntries[i]; |
| diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java |
| index 09eef26..8daba0a 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java |
| +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java |
| @@ -25,7 +25,6 @@ import java.util.List; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.index.MergeState; |
| -import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| @@ -33,6 +32,7 @@ import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.TrackingDirectoryWrapper; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefSliceComparator; |
| import org.apache.lucene.util.FixedBitSet; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.IntroSorter; |
| @@ -718,28 +718,34 @@ public class BKDWriter implements Closeable { |
| |
| final int offset = bytesPerDim * dim; |
| |
| - Comparator<BytesRef> cmp = new Comparator<BytesRef>() { |
| - |
| - final ByteArrayDataInput reader = new ByteArrayDataInput(); |
| + Comparator<BytesRef> cmp; |
| + if (dim == numDims - 1) { |
| + // the bytes for the value and the doc id are contiguous so we can use a |
| + // BytesRefSliceComparator which will trigger an optimization when sorting |
| + // to use radix sort rather than a comparison-based sort |
| + cmp = new BytesRefSliceComparator(packedBytesLength - bytesPerDim, bytesPerDim + Integer.BYTES); |
| + } else { |
| + cmp = new Comparator<BytesRef>() { |
| |
| - @Override |
| - public int compare(BytesRef a, BytesRef b) { |
| - // First compare by the requested dimension we are sorting by: |
| - int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset); |
| + @Override |
| + public int compare(BytesRef a, BytesRef b) { |
| + // First compare by the requested dimension we are sorting by: |
| + int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset); |
| |
| - if (cmp != 0) { |
| - return cmp; |
| - } |
| + if (cmp != 0) { |
| + return cmp; |
| + } |
| |
| - // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has |
| - // the same value in a given dimension indexed more than once: it can't matter at search |
| - // time since we don't write ords into the index: |
| + // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has |
| + // the same value in a given dimension indexed more than once: it can't matter at search |
| + // time since we don't write ords into the index: |
| |
| - return StringHelper.compare(Integer.BYTES, |
| - a.bytes, a.offset + packedBytesLength, |
| - b.bytes, b.offset + packedBytesLength); |
| - } |
| - }; |
| + return StringHelper.compare(Integer.BYTES, |
| + a.bytes, a.offset + packedBytesLength, |
| + b.bytes, b.offset + packedBytesLength); |
| + } |
| + }; |
| + } |
| |
| OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) { |
| |