| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.util; |
| |
| import java.util.Arrays; |
| |
| /** Radix selector. |
| * <p>This implementation works similarly to a MSB radix sort except that it |
| * only recurses into the sub partition that contains the desired value. |
| * @lucene.internal */ |
| public abstract class RadixSelector extends Selector { |
| |
| // after that many levels of recursion we fall back to introselect anyway |
| // this is used as a protection against the fact that radix sort performs |
| // worse when there are long common prefixes (probably because of cache |
| // locality) |
| private static final int LEVEL_THRESHOLD = 8; |
| // size of histograms: 256 + 1 to indicate that the string is finished |
| private static final int HISTOGRAM_SIZE = 257; |
| // buckets below this size will be sorted with introselect |
| private static final int LENGTH_THRESHOLD = 100; |
| |
| // we store one histogram per recursion level |
| private final int[] histogram = new int[HISTOGRAM_SIZE]; |
| |
| private final int maxLength; |
| |
| /** |
| * Sole constructor. |
| * @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown. |
| */ |
| protected RadixSelector(int maxLength) { |
| this.maxLength = maxLength; |
| } |
| |
| /** Return the k-th byte of the entry at index {@code i}, or {@code -1} if |
| * its length is less than or equal to {@code k}. This may only be called |
| * with a value of {@code i} between {@code 0} included and |
| * {@code maxLength} excluded. */ |
| protected abstract int byteAt(int i, int k); |
| |
| /** Get a fall-back selector which may assume that the first {@code d} bytes |
| * of all compared strings are equal. This fallback selector is used when |
| * the range becomes narrow or when the maximum level of recursion has |
| * been exceeded. */ |
| protected Selector getFallbackSelector(int d) { |
| return new IntroSelector() { |
| @Override |
| protected void swap(int i, int j) { |
| RadixSelector.this.swap(i, j); |
| } |
| |
| @Override |
| protected int compare(int i, int j) { |
| for (int o = d; o < maxLength; ++o) { |
| final int b1 = byteAt(i, o); |
| final int b2 = byteAt(j, o); |
| if (b1 != b2) { |
| return b1 - b2; |
| } else if (b1 == -1) { |
| break; |
| } |
| } |
| return 0; |
| } |
| |
| @Override |
| protected void setPivot(int i) { |
| pivot.setLength(0); |
| for (int o = d; o < maxLength; ++o) { |
| final int b = byteAt(i, o); |
| if (b == -1) { |
| break; |
| } |
| pivot.append((byte) b); |
| } |
| } |
| |
| @Override |
| protected int comparePivot(int j) { |
| for (int o = 0; o < pivot.length(); ++o) { |
| final int b1 = pivot.byteAt(o) & 0xff; |
| final int b2 = byteAt(j, d + o); |
| if (b1 != b2) { |
| return b1 - b2; |
| } |
| } |
| if (d + pivot.length() == maxLength) { |
| return 0; |
| } |
| return -1 - byteAt(j, d + pivot.length()); |
| } |
| |
| private final BytesRefBuilder pivot = new BytesRefBuilder(); |
| }; |
| } |
| |
| @Override |
| public void select(int from, int to, int k) { |
| checkArgs(from, to, k); |
| select(from, to, k, 0); |
| } |
| |
| private void select(int from, int to, int k, int d) { |
| if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) { |
| getFallbackSelector(d).select(from, to, k); |
| } else { |
| radixSelect(from, to, k, d); |
| } |
| } |
| |
| private void radixSelect(int from, int to, int k, int d) { |
| final int[] histogram = this.histogram; |
| Arrays.fill(histogram, 0); |
| |
| buildHistogram(from, to, d, histogram); |
| |
| int bucketFrom = from; |
| for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) { |
| final int bucketTo = bucketFrom + histogram[bucket]; |
| |
| if (bucketTo > k) { |
| partition(from, to, bucket, bucketFrom, bucketTo, d); |
| |
| if (bucket != 0 && d + 1 < maxLength) { |
| // all elements in bucket 0 are equal so we only need to recurse if bucket != 0 |
| select(bucketFrom, bucketTo, k, d + 1); |
| } |
| return; |
| } |
| bucketFrom = bucketTo; |
| } |
| throw new AssertionError("Unreachable code"); |
| } |
| |
| /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */ |
| private int getBucket(int i, int k) { |
| return byteAt(i, k) + 1; |
| } |
| |
| /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */ |
| private int[] buildHistogram(int from, int to, int k, int[] histogram) { |
| for (int i = from; i < to; ++i) { |
| histogram[getBucket(i, k)]++; |
| } |
| return histogram; |
| } |
| |
| /** Reorder elements so that all of them that fall into {@code bucket} are |
| * between offsets {@code bucketFrom} and {@code bucketTo}. */ |
| private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) { |
| int left = from; |
| int right = to - 1; |
| |
| int slot = bucketFrom; |
| |
| for (;;) { |
| int leftBucket = getBucket(left, d); |
| int rightBucket = getBucket(right, d); |
| |
| while (leftBucket <= bucket && left < bucketFrom) { |
| if (leftBucket == bucket) { |
| swap(left, slot++); |
| } else { |
| ++left; |
| } |
| leftBucket = getBucket(left, d); |
| } |
| |
| while (rightBucket >= bucket && right >= bucketTo) { |
| if (rightBucket == bucket) { |
| swap(right, slot++); |
| } else { |
| --right; |
| } |
| rightBucket = getBucket(right, d); |
| } |
| |
| if (left < bucketFrom && right >= bucketTo) { |
| swap(left++, right--); |
| } else { |
| assert left == bucketFrom; |
| assert right == bucketTo - 1; |
| break; |
| } |
| } |
| } |
| } |