| package org.apache.lucene.search; |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.document.DoubleField; // for javadocs |
| import org.apache.lucene.document.FloatField; // for javadocs |
| import org.apache.lucene.document.IntField; // for javadocs |
| import org.apache.lucene.document.LongField; // for javadocs |
| import org.apache.lucene.index.AtomicReader; // for javadocs |
| import org.apache.lucene.index.AtomicReaderContext; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.NumericUtils; |
| |
| /** |
| * A range filter built on top of numeric doc values field |
| * (from {@link AtomicReader#getNumericDocValues(String)}). |
| * |
| * <p>{@code DocValuesRangeFilter} builds a single cache for the field the first time it is used. |
| * Each subsequent {@code DocValuesRangeFilter} on the same field then reuses this cache, |
| * even if the range itself changes. |
| * |
| * <p>This means that {@code DocValuesRangeFilter} is much faster (sometimes more than 100x as fast) |
| * as building a {@link TermRangeFilter}, if using a {@link #newStringRange}. |
| * However, if the range never changes it is slower (around 2x as slow) than building |
| * a CachingWrapperFilter on top of a single {@link TermRangeFilter}. |
| * |
| * For numeric data types, this filter may be significantly faster than {@link NumericRangeFilter}. |
| * Furthermore, it does not need the numeric values encoded |
| * by {@link IntField}, {@link FloatField}, {@link |
| * LongField} or {@link DoubleField}. But |
| * it has the problem that it only works with exact one value/document (see below). |
| * |
| * <p>As with all {@link AtomicReader#getNumericDocValues} based functionality, |
| * {@code DocValuesRangeFilter} is only valid for |
| * fields which exact one term for each document (except for {@link #newStringRange} |
| * where 0 terms are also allowed). Due to historical reasons, for numeric ranges |
| * all terms that do not have a numeric value, 0 is assumed. |
| * |
| * <p>Thus it works on dates, prices and other single value fields but will not work on |
| * regular text fields. It is preferable to use a <code>NOT_ANALYZED</code> field to ensure that |
| * there is only a single term. |
| * |
| * <p>This class does not have an constructor, use one of the static factory methods available, |
| * that create a correct instance for different data types. |
| */ |
| // TODO: use docsWithField to handle empty properly |
| public abstract class DocValuesRangeFilter<T> extends Filter { |
| final String field; |
| final T lowerVal; |
| final T upperVal; |
| final boolean includeLower; |
| final boolean includeUpper; |
| |
| private DocValuesRangeFilter(String field, T lowerVal, T upperVal, boolean includeLower, boolean includeUpper) { |
| this.field = field; |
| this.lowerVal = lowerVal; |
| this.upperVal = upperVal; |
| this.includeLower = includeLower; |
| this.includeUpper = includeUpper; |
| } |
| |
| /** This method is implemented for each data type */ |
| @Override |
| public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; |
| |
| /** |
| * Creates a string range filter using {@link AtomicReader#getSortedDocValues(String)}. This works with all |
| * fields containing zero or one term in the field. The range can be half-open by setting one |
| * of the values to <code>null</code>. |
| */ |
| public static DocValuesRangeFilter<String> newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { |
| return new DocValuesRangeFilter<String>(field, lowerVal, upperVal, includeLower, includeUpper) { |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field); |
| final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(new BytesRef(lowerVal)); |
| final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(new BytesRef(upperVal)); |
| |
| final int inclusiveLowerPoint, inclusiveUpperPoint; |
| |
| // Hints: |
| // * binarySearchLookup returns -1, if value was null. |
| // * the value is <0 if no exact hit was found, the returned value |
| // is (-(insertion point) - 1) |
| if (lowerPoint == -1 && lowerVal == null) { |
| inclusiveLowerPoint = 0; |
| } else if (includeLower && lowerPoint >= 0) { |
| inclusiveLowerPoint = lowerPoint; |
| } else if (lowerPoint >= 0) { |
| inclusiveLowerPoint = lowerPoint + 1; |
| } else { |
| inclusiveLowerPoint = Math.max(0, -lowerPoint - 1); |
| } |
| |
| if (upperPoint == -1 && upperVal == null) { |
| inclusiveUpperPoint = Integer.MAX_VALUE; |
| } else if (includeUpper && upperPoint >= 0) { |
| inclusiveUpperPoint = upperPoint; |
| } else if (upperPoint >= 0) { |
| inclusiveUpperPoint = upperPoint - 1; |
| } else { |
| inclusiveUpperPoint = -upperPoint - 2; |
| } |
| |
| if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { |
| return null; |
| } |
| |
| assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0; |
| |
| return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) { |
| @Override |
| protected final boolean matchDoc(int doc) { |
| final int docOrd = fcsi.getOrd(doc); |
| return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; |
| } |
| }; |
| } |
| }; |
| } |
| |
| /** |
| * Creates a BytesRef range filter using {@link AtomicReader#getSortedDocValues(String)}. This works with all |
| * fields containing zero or one term in the field. The range can be half-open by setting one |
| * of the values to <code>null</code>. |
| */ |
| // TODO: bogus that newStringRange doesnt share this code... generics hell |
| public static DocValuesRangeFilter<BytesRef> newBytesRefRange(String field, BytesRef lowerVal, BytesRef upperVal, boolean includeLower, boolean includeUpper) { |
| return new DocValuesRangeFilter<BytesRef>(field, lowerVal, upperVal, includeLower, includeUpper) { |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field); |
| final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(lowerVal); |
| final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(upperVal); |
| |
| final int inclusiveLowerPoint, inclusiveUpperPoint; |
| |
| // Hints: |
| // * binarySearchLookup returns -1, if value was null. |
| // * the value is <0 if no exact hit was found, the returned value |
| // is (-(insertion point) - 1) |
| if (lowerPoint == -1 && lowerVal == null) { |
| inclusiveLowerPoint = 0; |
| } else if (includeLower && lowerPoint >= 0) { |
| inclusiveLowerPoint = lowerPoint; |
| } else if (lowerPoint >= 0) { |
| inclusiveLowerPoint = lowerPoint + 1; |
| } else { |
| inclusiveLowerPoint = Math.max(0, -lowerPoint - 1); |
| } |
| |
| if (upperPoint == -1 && upperVal == null) { |
| inclusiveUpperPoint = Integer.MAX_VALUE; |
| } else if (includeUpper && upperPoint >= 0) { |
| inclusiveUpperPoint = upperPoint; |
| } else if (upperPoint >= 0) { |
| inclusiveUpperPoint = upperPoint - 1; |
| } else { |
| inclusiveUpperPoint = -upperPoint - 2; |
| } |
| |
| if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { |
| return null; |
| } |
| |
| assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0; |
| |
| return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) { |
| @Override |
| protected final boolean matchDoc(int doc) { |
| final int docOrd = fcsi.getOrd(doc); |
| return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; |
| } |
| }; |
| } |
| }; |
| } |
| |
| /** |
| * Creates a numeric range filter using {@link AtomicReader#getSortedDocValues(String)}. This works with all |
| * int fields containing exactly one numeric term in the field. The range can be half-open by setting one |
| * of the values to <code>null</code>. |
| */ |
| public static DocValuesRangeFilter<Integer> newIntRange(String field, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) { |
| return new DocValuesRangeFilter<Integer>(field, lowerVal, upperVal, includeLower, includeUpper) { |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| final int inclusiveLowerPoint, inclusiveUpperPoint; |
| if (lowerVal != null) { |
| int i = lowerVal.intValue(); |
| if (!includeLower && i == Integer.MAX_VALUE) |
| return null; |
| inclusiveLowerPoint = includeLower ? i : (i + 1); |
| } else { |
| inclusiveLowerPoint = Integer.MIN_VALUE; |
| } |
| if (upperVal != null) { |
| int i = upperVal.intValue(); |
| if (!includeUpper && i == Integer.MIN_VALUE) |
| return null; |
| inclusiveUpperPoint = includeUpper ? i : (i - 1); |
| } else { |
| inclusiveUpperPoint = Integer.MAX_VALUE; |
| } |
| |
| if (inclusiveLowerPoint > inclusiveUpperPoint) |
| return null; |
| |
| final NumericDocValues values = DocValues.getNumeric(context.reader(), field); |
| return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) { |
| @Override |
| protected boolean matchDoc(int doc) { |
| final int value = (int) values.get(doc); |
| return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint; |
| } |
| }; |
| } |
| }; |
| } |
| |
| /** |
| * Creates a numeric range filter using {@link AtomicReader#getNumericDocValues(String)}. This works with all |
| * long fields containing exactly one numeric term in the field. The range can be half-open by setting one |
| * of the values to <code>null</code>. |
| */ |
| public static DocValuesRangeFilter<Long> newLongRange(String field, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) { |
| return new DocValuesRangeFilter<Long>(field, lowerVal, upperVal, includeLower, includeUpper) { |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| final long inclusiveLowerPoint, inclusiveUpperPoint; |
| if (lowerVal != null) { |
| long i = lowerVal.longValue(); |
| if (!includeLower && i == Long.MAX_VALUE) |
| return null; |
| inclusiveLowerPoint = includeLower ? i : (i + 1L); |
| } else { |
| inclusiveLowerPoint = Long.MIN_VALUE; |
| } |
| if (upperVal != null) { |
| long i = upperVal.longValue(); |
| if (!includeUpper && i == Long.MIN_VALUE) |
| return null; |
| inclusiveUpperPoint = includeUpper ? i : (i - 1L); |
| } else { |
| inclusiveUpperPoint = Long.MAX_VALUE; |
| } |
| |
| if (inclusiveLowerPoint > inclusiveUpperPoint) |
| return null; |
| |
| final NumericDocValues values = DocValues.getNumeric(context.reader(), field); |
| return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) { |
| @Override |
| protected boolean matchDoc(int doc) { |
| final long value = values.get(doc); |
| return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint; |
| } |
| }; |
| } |
| }; |
| } |
| |
| /** |
| * Creates a numeric range filter using {@link AtomicReader#getNumericDocValues(String)}. This works with all |
| * float fields containing exactly one numeric term in the field. The range can be half-open by setting one |
| * of the values to <code>null</code>. |
| */ |
| public static DocValuesRangeFilter<Float> newFloatRange(String field, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) { |
| return new DocValuesRangeFilter<Float>(field, lowerVal, upperVal, includeLower, includeUpper) { |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| // we transform the floating point numbers to sortable integers |
| // using NumericUtils to easier find the next bigger/lower value |
| final float inclusiveLowerPoint, inclusiveUpperPoint; |
| if (lowerVal != null) { |
| float f = lowerVal.floatValue(); |
| if (!includeUpper && f > 0.0f && Float.isInfinite(f)) |
| return null; |
| int i = NumericUtils.floatToSortableInt(f); |
| inclusiveLowerPoint = NumericUtils.sortableIntToFloat( includeLower ? i : (i + 1) ); |
| } else { |
| inclusiveLowerPoint = Float.NEGATIVE_INFINITY; |
| } |
| if (upperVal != null) { |
| float f = upperVal.floatValue(); |
| if (!includeUpper && f < 0.0f && Float.isInfinite(f)) |
| return null; |
| int i = NumericUtils.floatToSortableInt(f); |
| inclusiveUpperPoint = NumericUtils.sortableIntToFloat( includeUpper ? i : (i - 1) ); |
| } else { |
| inclusiveUpperPoint = Float.POSITIVE_INFINITY; |
| } |
| |
| if (inclusiveLowerPoint > inclusiveUpperPoint) |
| return null; |
| |
| final NumericDocValues values = DocValues.getNumeric(context.reader(), field); |
| return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) { |
| @Override |
| protected boolean matchDoc(int doc) { |
| final float value = Float.intBitsToFloat((int)values.get(doc)); |
| return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint; |
| } |
| }; |
| } |
| }; |
| } |
| |
| /** |
| * Creates a numeric range filter using {@link AtomicReader#getNumericDocValues(String)}. This works with all |
| * double fields containing exactly one numeric term in the field. The range can be half-open by setting one |
| * of the values to <code>null</code>. |
| */ |
| public static DocValuesRangeFilter<Double> newDoubleRange(String field, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) { |
| return new DocValuesRangeFilter<Double>(field, lowerVal, upperVal, includeLower, includeUpper) { |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| // we transform the floating point numbers to sortable integers |
| // using NumericUtils to easier find the next bigger/lower value |
| final double inclusiveLowerPoint, inclusiveUpperPoint; |
| if (lowerVal != null) { |
| double f = lowerVal.doubleValue(); |
| if (!includeUpper && f > 0.0 && Double.isInfinite(f)) |
| return null; |
| long i = NumericUtils.doubleToSortableLong(f); |
| inclusiveLowerPoint = NumericUtils.sortableLongToDouble( includeLower ? i : (i + 1L) ); |
| } else { |
| inclusiveLowerPoint = Double.NEGATIVE_INFINITY; |
| } |
| if (upperVal != null) { |
| double f = upperVal.doubleValue(); |
| if (!includeUpper && f < 0.0 && Double.isInfinite(f)) |
| return null; |
| long i = NumericUtils.doubleToSortableLong(f); |
| inclusiveUpperPoint = NumericUtils.sortableLongToDouble( includeUpper ? i : (i - 1L) ); |
| } else { |
| inclusiveUpperPoint = Double.POSITIVE_INFINITY; |
| } |
| |
| if (inclusiveLowerPoint > inclusiveUpperPoint) |
| return null; |
| |
| final NumericDocValues values = DocValues.getNumeric(context.reader(), field); |
| // ignore deleted docs if range doesn't contain 0 |
| return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) { |
| @Override |
| protected boolean matchDoc(int doc) { |
| final double value = Double.longBitsToDouble(values.get(doc)); |
| return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint; |
| } |
| }; |
| } |
| }; |
| } |
| |
| @Override |
| public final String toString() { |
| final StringBuilder sb = new StringBuilder(field).append(":"); |
| return sb.append(includeLower ? '[' : '{') |
| .append((lowerVal == null) ? "*" : lowerVal.toString()) |
| .append(" TO ") |
| .append((upperVal == null) ? "*" : upperVal.toString()) |
| .append(includeUpper ? ']' : '}') |
| .toString(); |
| } |
| |
| @Override |
| @SuppressWarnings({"rawtypes"}) |
| public final boolean equals(Object o) { |
| if (this == o) return true; |
| if (!(o instanceof DocValuesRangeFilter)) return false; |
| DocValuesRangeFilter other = (DocValuesRangeFilter) o; |
| |
| if (!this.field.equals(other.field) |
| || this.includeLower != other.includeLower |
| || this.includeUpper != other.includeUpper |
| ) { return false; } |
| if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false; |
| if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false; |
| return true; |
| } |
| |
| @Override |
| public final int hashCode() { |
| int h = field.hashCode(); |
| h ^= (lowerVal != null) ? lowerVal.hashCode() : 550356204; |
| h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper |
| h ^= (upperVal != null) ? upperVal.hashCode() : -1674416163; |
| h ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); |
| return h; |
| } |
| |
| /** Returns the field name for this filter */ |
| public String getField() { return field; } |
| |
| /** Returns <code>true</code> if the lower endpoint is inclusive */ |
| public boolean includesLower() { return includeLower; } |
| |
| /** Returns <code>true</code> if the upper endpoint is inclusive */ |
| public boolean includesUpper() { return includeUpper; } |
| |
| /** Returns the lower value of this range filter */ |
| public T getLowerVal() { return lowerVal; } |
| |
| /** Returns the upper value of this range filter */ |
| public T getUpperVal() { return upperVal; } |
| } |