| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search; |
| |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| |
| import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; |
| |
| /** Selects a value from the document's set to use as the representative value */ |
| public class SortedSetSelector { |
| |
| /** |
| * Type of selection to perform. |
| * <p> |
| * Limitations: |
| * <ul> |
| * <li>Fields containing {@link Integer#MAX_VALUE} or more unique values |
| * are unsupported. |
| * <li>Selectors other than ({@link Type#MIN}) require |
| * optional codec support. However several codecs provided by Lucene, |
| * including the current default codec, support this. |
| * </ul> |
| */ |
| public enum Type { |
| /** |
| * Selects the minimum value in the set |
| */ |
| MIN, |
| /** |
| * Selects the maximum value in the set |
| */ |
| MAX, |
| /** |
| * Selects the middle value in the set. |
| * <p> |
| * If the set has an even number of values, the lower of the middle two is chosen. |
| */ |
| MIDDLE_MIN, |
| /** |
| * Selects the middle value in the set. |
| * <p> |
| * If the set has an even number of values, the higher of the middle two is chosen |
| */ |
| MIDDLE_MAX |
| } |
| |
| /** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */ |
| public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) { |
| if (sortedSet.getValueCount() >= Integer.MAX_VALUE) { |
| throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported"); |
| } |
| |
| SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet); |
| if (singleton != null) { |
| // it's actually single-valued in practice, but indexed as multi-valued, |
| // so just sort on the underlying single-valued dv directly. |
| // regardless of selector type, this optimization is safe! |
| return singleton; |
| } else { |
| switch(selector) { |
| case MIN: return new MinValue(sortedSet); |
| case MAX: return new MaxValue(sortedSet); |
| case MIDDLE_MIN: return new MiddleMinValue(sortedSet); |
| case MIDDLE_MAX: return new MiddleMaxValue(sortedSet); |
| default: |
| throw new AssertionError(); |
| } |
| } |
| } |
| |
| /** Wraps a SortedSetDocValues and returns the first ordinal (min) */ |
| static class MinValue extends SortedDocValues { |
| final SortedSetDocValues in; |
| private int ord; |
| |
| MinValue(SortedSetDocValues in) { |
| this.in = in; |
| } |
| |
| @Override |
| public int docID() { |
| return in.docID(); |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| in.nextDoc(); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| in.advance(target); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| if (in.advanceExact(target)) { |
| setOrd(); |
| return true; |
| } |
| return false; |
| } |
| |
| @Override |
| public long cost() { |
| return in.cost(); |
| } |
| |
| @Override |
| public int ordValue() { |
| return ord; |
| } |
| |
| @Override |
| public BytesRef lookupOrd(int ord) throws IOException { |
| return in.lookupOrd(ord); |
| } |
| |
| @Override |
| public int getValueCount() { |
| return (int) in.getValueCount(); |
| } |
| |
| @Override |
| public int lookupTerm(BytesRef key) throws IOException { |
| return (int) in.lookupTerm(key); |
| } |
| |
| private void setOrd() throws IOException { |
| if (docID() != NO_MORE_DOCS) { |
| ord = (int) in.nextOrd(); |
| } else { |
| ord = (int) NO_MORE_ORDS; |
| } |
| } |
| } |
| |
| /** Wraps a SortedSetDocValues and returns the last ordinal (max) */ |
| static class MaxValue extends SortedDocValues { |
| final SortedSetDocValues in; |
| private int ord; |
| |
| MaxValue(SortedSetDocValues in) { |
| this.in = in; |
| } |
| |
| @Override |
| public int docID() { |
| return in.docID(); |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| in.nextDoc(); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| in.advance(target); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| if (in.advanceExact(target)) { |
| setOrd(); |
| return true; |
| } |
| return false; |
| } |
| |
| @Override |
| public long cost() { |
| return in.cost(); |
| } |
| |
| @Override |
| public int ordValue() { |
| return ord; |
| } |
| |
| @Override |
| public BytesRef lookupOrd(int ord) throws IOException { |
| return in.lookupOrd(ord); |
| } |
| |
| @Override |
| public int getValueCount() { |
| return (int) in.getValueCount(); |
| } |
| |
| @Override |
| public int lookupTerm(BytesRef key) throws IOException { |
| return (int) in.lookupTerm(key); |
| } |
| |
| private void setOrd() throws IOException { |
| if (docID() != NO_MORE_DOCS) { |
| while(true) { |
| long nextOrd = in.nextOrd(); |
| if (nextOrd == NO_MORE_ORDS) { |
| break; |
| } |
| ord = (int) nextOrd; |
| } |
| } else { |
| ord = (int) NO_MORE_ORDS; |
| } |
| } |
| } |
| |
| /** Wraps a SortedSetDocValues and returns the middle ordinal (or min of the two) */ |
| static class MiddleMinValue extends SortedDocValues { |
| final SortedSetDocValues in; |
| private int ord; |
| private int[] ords = new int[8]; |
| |
| MiddleMinValue(SortedSetDocValues in) { |
| this.in = in; |
| } |
| |
| @Override |
| public int docID() { |
| return in.docID(); |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| in.nextDoc(); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| in.advance(target); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| if (in.advanceExact(target)) { |
| setOrd(); |
| return true; |
| } |
| return false; |
| } |
| |
| @Override |
| public long cost() { |
| return in.cost(); |
| } |
| |
| @Override |
| public int ordValue() { |
| return ord; |
| } |
| |
| @Override |
| public BytesRef lookupOrd(int ord) throws IOException { |
| return in.lookupOrd(ord); |
| } |
| |
| @Override |
| public int getValueCount() { |
| return (int) in.getValueCount(); |
| } |
| |
| @Override |
| public int lookupTerm(BytesRef key) throws IOException { |
| return (int) in.lookupTerm(key); |
| } |
| |
| private void setOrd() throws IOException { |
| if (docID() != NO_MORE_DOCS) { |
| int upto = 0; |
| while (true) { |
| long nextOrd = in.nextOrd(); |
| if (nextOrd == NO_MORE_ORDS) { |
| break; |
| } |
| if (upto == ords.length) { |
| ords = ArrayUtil.grow(ords); |
| } |
| ords[upto++] = (int) nextOrd; |
| } |
| |
| if (upto == 0) { |
| // iterator should not have returned this docID if it has no ords: |
| assert false; |
| ord = (int) NO_MORE_ORDS; |
| } else { |
| ord = ords[(upto-1) >>> 1]; |
| } |
| } else { |
| ord = (int) NO_MORE_ORDS; |
| } |
| } |
| } |
| |
| /** Wraps a SortedSetDocValues and returns the middle ordinal (or max of the two) */ |
| static class MiddleMaxValue extends SortedDocValues { |
| final SortedSetDocValues in; |
| private int ord; |
| private int[] ords = new int[8]; |
| |
| MiddleMaxValue(SortedSetDocValues in) { |
| this.in = in; |
| } |
| |
| @Override |
| public int docID() { |
| return in.docID(); |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| in.nextDoc(); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| in.advance(target); |
| setOrd(); |
| return docID(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| if (in.advanceExact(target)) { |
| setOrd(); |
| return true; |
| } |
| return false; |
| } |
| |
| @Override |
| public long cost() { |
| return in.cost(); |
| } |
| |
| @Override |
| public int ordValue() { |
| return ord; |
| } |
| |
| @Override |
| public BytesRef lookupOrd(int ord) throws IOException { |
| return in.lookupOrd(ord); |
| } |
| |
| @Override |
| public int getValueCount() { |
| return (int) in.getValueCount(); |
| } |
| |
| @Override |
| public int lookupTerm(BytesRef key) throws IOException { |
| return (int) in.lookupTerm(key); |
| } |
| |
| private void setOrd() throws IOException { |
| if (docID() != NO_MORE_DOCS) { |
| int upto = 0; |
| while (true) { |
| long nextOrd = in.nextOrd(); |
| if (nextOrd == NO_MORE_ORDS) { |
| break; |
| } |
| if (upto == ords.length) { |
| ords = ArrayUtil.grow(ords); |
| } |
| ords[upto++] = (int) nextOrd; |
| } |
| |
| if (upto == 0) { |
| // iterator should not have returned this docID if it has no ords: |
| assert false; |
| ord = (int) NO_MORE_ORDS; |
| } else { |
| ord = ords[upto >>> 1]; |
| } |
| } else { |
| ord = (int) NO_MORE_ORDS; |
| } |
| } |
| } |
| } |