blob: e6bfd2aed16ce6251818acdbb3d31d17aaebd745 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import org.apache.lucene.codecs.MutablePointValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FutureArrays;
import org.apache.lucene.util.IntroSelector;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.RadixSelector;
import org.apache.lucene.util.Selector;
import org.apache.lucene.util.StableMSBRadixSorter;
import org.apache.lucene.util.packed.PackedInts;
/** Utility APIs for sorting and partitioning buffered points.
*
* @lucene.internal */
public final class MutablePointsReaderUtils {
MutablePointsReaderUtils() {}
/** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
public static void sort(BKDConfig config, int maxDoc,
MutablePointValues reader, int from, int to) {
boolean sortedByDocID = true;
int prevDoc = 0;
for (int i = from; i < to; ++i) {
int doc = reader.getDocID(i);
if (doc < prevDoc) {
sortedByDocID = false;
break;
}
prevDoc = doc;
}
// No need to tie break on doc IDs if already sorted by doc ID, since we use a stable sort.
// This should be a common situation as IndexWriter accumulates data in doc ID order when
// index sorting is not enabled.
final int bitsPerDocId = sortedByDocID ? 0 : PackedInts.bitsRequired(maxDoc - 1);
new StableMSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void save(int i, int j) {
reader.save(i, j);
}
@Override
protected void restore(int i, int j) {
reader.restore(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < config.packedBytesLength) {
return Byte.toUnsignedInt(reader.getByteAt(i, k));
} else {
final int shift = bitsPerDocId - ((k - config.packedBytesLength + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
}.sort(from, to);
}
/** Sort points on the given dimension. */
public static void sortByDim(BKDConfig config, int sortedDim, int[] commonPrefixLengths,
MutablePointValues reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) {
final int start = sortedDim * config.bytesPerDim + commonPrefixLengths[sortedDim];
final int dimEnd = sortedDim * config.bytesPerDim + config.bytesPerDim;
// No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort
new IntroSorter() {
final BytesRef pivot = scratch1;
int pivotDoc = -1;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
reader.getValue(j, scratch2);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
scratch2.offset + start, scratch2.offset + dimEnd);
if (cmp == 0) {
cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + config.packedIndexBytesLength, pivot.offset + config.packedBytesLength,
scratch2.bytes, scratch2.offset + config.packedIndexBytesLength, scratch2.offset + config.packedBytesLength);
if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j);
}
}
return cmp;
}
}.sort(from, to);
}
/** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or
* equal to it. */
public static void partition(BKDConfig config, int maxDoc, int splitDim, int commonPrefixLen,
MutablePointValues reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) {
final int dimOffset = splitDim * config.bytesPerDim + commonPrefixLen;
final int dimCmpBytes = config.bytesPerDim - commonPrefixLen;
final int dataCmpBytes = (config.numDims - config.numIndexDims) * config.bytesPerDim + dimCmpBytes;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
@Override
protected Selector getFallbackSelector(int k) {
final int dataStart = (k < dimCmpBytes) ? config.packedIndexBytesLength : config.packedIndexBytesLength + k - dimCmpBytes;
final int dataEnd = config.numDims * config.bytesPerDim;
return new IntroSelector() {
final BytesRef pivot = scratch1;
int pivotDoc;
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected void setPivot(int i) {
reader.getValue(i, pivot);
pivotDoc = reader.getDocID(i);
}
@Override
protected int comparePivot(int j) {
if (k < dimCmpBytes) {
reader.getValue(j, scratch2);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dimOffset + k, pivot.offset + dimOffset + dimCmpBytes,
scratch2.bytes, scratch2.offset + dimOffset + k, scratch2.offset + dimOffset + dimCmpBytes);
if (cmp != 0) {
return cmp;
}
}
if (k < dataCmpBytes) {
reader.getValue(j, scratch2);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
if (cmp != 0) {
return cmp;
}
}
return pivotDoc - reader.getDocID(j);
}
};
}
@Override
protected void swap(int i, int j) {
reader.swap(i, j);
}
@Override
protected int byteAt(int i, int k) {
if (k < dimCmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
} else if (k < dataCmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, config.packedIndexBytesLength + k - dimCmpBytes));
} else {
final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
}.select(from, to, mid);
}
}