blob: 5ae4febb8ce86e07db28ebfeecc7acefd5adf242 [file] [log] [blame]
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
/** Selects a value from the document's set to use as the representative value */
public class SortedSetSelector {
/**
* Type of selection to perform.
* <p>
* Limitations:
* <ul>
* <li>Fields containing {@link Integer#MAX_VALUE} or more unique values
* are unsupported.
* <li>Selectors other than ({@link Type#MIN}) require
* optional codec support. However several codecs provided by Lucene,
* including the current default codec, support this.
* </ul>
*/
public enum Type {
/**
* Selects the minimum value in the set
*/
MIN,
/**
* Selects the maximum value in the set
*/
MAX,
/**
* Selects the middle value in the set.
* <p>
* If the set has an even number of values, the lower of the middle two is chosen.
*/
MIDDLE_MIN,
/**
* Selects the middle value in the set.
* <p>
* If the set has an even number of values, the higher of the middle two is chosen
*/
MIDDLE_MAX
}
/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
}
SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
if (singleton != null) {
// it's actually single-valued in practice, but indexed as multi-valued,
// so just sort on the underlying single-valued dv directly.
// regardless of selector type, this optimization is safe!
return singleton;
} else if (selector == Type.MIN) {
return new MinValue(sortedSet);
} else {
if (sortedSet instanceof RandomAccessOrds == false) {
throw new UnsupportedOperationException("codec does not support random access ordinals, cannot use selector: " + selector);
}
RandomAccessOrds randomOrds = (RandomAccessOrds) sortedSet;
switch(selector) {
case MAX: return new MaxValue(randomOrds);
case MIDDLE_MIN: return new MiddleMinValue(randomOrds);
case MIDDLE_MAX: return new MiddleMaxValue(randomOrds);
case MIN:
default:
throw new AssertionError();
}
}
}
/** Wraps a SortedSetDocValues and returns the first ordinal (min) */
static class MinValue extends SortedDocValues {
final SortedSetDocValues in;
MinValue(SortedSetDocValues in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
return (int) in.nextOrd();
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
/** Wraps a SortedSetDocValues and returns the last ordinal (max) */
static class MaxValue extends SortedDocValues {
final RandomAccessOrds in;
MaxValue(RandomAccessOrds in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
final int count = in.cardinality();
if (count == 0) {
return -1;
} else {
return (int) in.ordAt(count-1);
}
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
/** Wraps a SortedSetDocValues and returns the middle ordinal (or min of the two) */
static class MiddleMinValue extends SortedDocValues {
final RandomAccessOrds in;
MiddleMinValue(RandomAccessOrds in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
final int count = in.cardinality();
if (count == 0) {
return -1;
} else {
return (int) in.ordAt((count-1) >>> 1);
}
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
/** Wraps a SortedSetDocValues and returns the middle ordinal (or max of the two) */
static class MiddleMaxValue extends SortedDocValues {
final RandomAccessOrds in;
MiddleMaxValue(RandomAccessOrds in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
final int count = in.cardinality();
if (count == 0) {
return -1;
} else {
return (int) in.ordAt(count >>> 1);
}
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
}