blob: be8b294dde68c5461e5f677a2ae9f2b5ef607367 [file] [log] [blame]
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefSliceComparator.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefSliceComparator.java
new file mode 100644
index 0000000..41351c1
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefSliceComparator.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+import java.util.Comparator;
+
+/** A {@link Comparator} for a slice of a {@link BytesRef}.
+ * @lucene.internal */
+public final class BytesRefSliceComparator implements Comparator<BytesRef> {
+
+ final int offset, length;
+
+ /** Sole constructor. */
+ public BytesRefSliceComparator(int offset, int length) {
+ this.offset = offset;
+ this.length = length;
+ }
+
+ @Override
+ public int compare(BytesRef a, BytesRef b) {
+ return StringHelper.compare(length, a.bytes, a.offset + offset, b.bytes, b.offset + offset);
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java b/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java
index 346b908..b2bbf12 100644
--- a/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FixedLengthBytesRefArray.java
@@ -105,6 +105,37 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray {
orderedEntries[i] = i;
}
+ if (comp instanceof BytesRefSliceComparator) {
+ int offset = ((BytesRefSliceComparator) comp).offset;
+ int length = ((BytesRefSliceComparator) comp).length;
+
+ new StringMSBRadixSorter() {
+
+ final BytesRef scratch;
+
+ {
+ scratch = new BytesRef();
+ scratch.length = length;
+ }
+
+ @Override
+ protected void swap(int i, int j) {
+ int o = orderedEntries[i];
+ orderedEntries[i] = orderedEntries[j];
+ orderedEntries[j] = o;
+ }
+
+ @Override
+ protected BytesRef get(int i) {
+ int index = orderedEntries[i];
+ scratch.bytes = blocks[index / valuesPerBlock];
+ scratch.offset = (index % valuesPerBlock) * valueLength + offset;
+ return scratch;
+ }
+ }.sort(0, size());
+ return orderedEntries;
+ }
+
final BytesRef pivot = new BytesRef();
final BytesRef scratch1 = new BytesRef();
final BytesRef scratch2 = new BytesRef();
@@ -120,7 +151,7 @@ final class FixedLengthBytesRefArray implements SortableBytesRefArray {
orderedEntries[i] = orderedEntries[j];
orderedEntries[j] = o;
}
-
+
@Override
protected int compare(int i, int j) {
int index1 = orderedEntries[i];
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 09eef26..8daba0a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -25,7 +25,6 @@ import java.util.List;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -33,6 +32,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefSliceComparator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntroSorter;
@@ -718,28 +718,34 @@ public class BKDWriter implements Closeable {
final int offset = bytesPerDim * dim;
- Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
-
- final ByteArrayDataInput reader = new ByteArrayDataInput();
+ Comparator<BytesRef> cmp;
+ if (dim == numDims - 1) {
+ // the bytes for the value and the doc id are contiguous so we can use a
+ // BytesRefSliceComparator which will trigger an optimization when sorting
+ // to use radix sort rather than a comparison-based sort
+ cmp = new BytesRefSliceComparator(packedBytesLength - bytesPerDim, bytesPerDim + Integer.BYTES);
+ } else {
+ cmp = new Comparator<BytesRef>() {
- @Override
- public int compare(BytesRef a, BytesRef b) {
- // First compare by the requested dimension we are sorting by:
- int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset);
+ @Override
+ public int compare(BytesRef a, BytesRef b) {
+ // First compare by the requested dimension we are sorting by:
+ int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + offset, b.bytes, b.offset + offset);
- if (cmp != 0) {
- return cmp;
- }
+ if (cmp != 0) {
+ return cmp;
+ }
- // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
- // the same value in a given dimension indexed more than once: it can't matter at search
- // time since we don't write ords into the index:
+ // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
+ // the same value in a given dimension indexed more than once: it can't matter at search
+ // time since we don't write ords into the index:
- return StringHelper.compare(Integer.BYTES,
- a.bytes, a.offset + packedBytesLength,
- b.bytes, b.offset + packedBytesLength);
- }
- };
+ return StringHelper.compare(Integer.BYTES,
+ a.bytes, a.offset + packedBytesLength,
+ b.bytes, b.offset + packedBytesLength);
+ }
+ };
+ }
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {