blob: b96e99dd40905f7bd72e4a8c6f151a1d91311524 [file] [log] [blame]
package org.apache.lucene.search.trie;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.ToStringUtils;
/**
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering.
* This filter depends on a specific structure of terms in the index that can only be created
* by {@link TrieUtils} methods.
* For more information, how the algorithm works, see the package description {@link org.apache.lucene.search.trie}.
*/
public final class TrieRangeFilter extends Filter {
/**
* Universal constructor (expert use only): Uses already trie-converted min/max values.
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
* bound should be included or excluded from the range.
*/
public TrieRangeFilter(final String[] fields, final int precisionStep,
Long min, Long max, final boolean minInclusive, final boolean maxInclusive
) {
if (min==null && max==null) throw new IllegalArgumentException("The min and max values cannot be both null.");
this.fields=fields;
this.precisionStep=precisionStep;
this.min=min;
this.max=max;
this.minInclusive=minInclusive;
this.maxInclusive=maxInclusive;
this.minUnconverted=min;
this.maxUnconverted=max;
}
/**
* Generates a trie filter using the supplied field with range bounds in integer form (long).
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
* bound should be included or excluded from the range.
*/
public TrieRangeFilter(final String field, final String lowerPrecisionField, final int precisionStep,
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
) {
this(
new String[]{field, lowerPrecisionField==null ? (field+TrieUtils.LOWER_PRECISION_FIELD_NAME_SUFFIX) : lowerPrecisionField},
precisionStep,min,max,minInclusive,maxInclusive
);
}
/**
* Generates a trie filter using the supplied field with range bounds in integer form (long).
* You can set <code>min</code> or <code>max</code> (but not both) to <code>null</code> to leave one bound open.
* With <code>minInclusive</code> and <code>maxInclusive</code> can be choosen, if the corresponding
* bound should be included or excluded from the range.
*/
public TrieRangeFilter(final String field, final int precisionStep,
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
) {
this(
new String[]{field, field+TrieUtils.LOWER_PRECISION_FIELD_NAME_SUFFIX},
precisionStep,min,max,minInclusive,maxInclusive
);
}
//@Override
public String toString() {
return toString(null);
}
public String toString(final String field) {
/*final StringBuffer sb=new StringBuffer();
if (!this.field.equals(field)) sb.append(this.field).append(':');
return sb.append(minInclusive ? '[' : '{')
.append((minUnconverted==null) ? "*" : minUnconverted.toString())
.append(" TO ")
.append((maxUnconverted==null) ? "*" : maxUnconverted.toString())
.append(maxInclusive ? ']' : '}').toString();
*/ return "dummy";
}
/**
* Two instances are equal if they have the same trie-encoded range bounds, same field, and same variant.
* If one of the instances uses an exclusive lower bound, it is equal to a range with inclusive bound,
* when the inclusive lower bound is equal to the incremented exclusive lower bound of the other one.
* The same applys for the upper bound in other direction.
*/
//@Override
/*public final boolean equals(final Object o) {
if (o instanceof TrieRangeFilter) {
TrieRangeFilter q=(TrieRangeFilter)o;
// trieVariants are singleton per type, so no equals needed.
return (field==q.field && min.equals(q.min) && max.equals(q.max) && trieVariant==q.trieVariant);
} else return false;
}*/
//@Override
/*public final int hashCode() {
// the hash code uses from the variant only the number of bits, as this is unique for the variant
return field.hashCode()+(min.hashCode()^0x14fa55fb)+(max.hashCode()^0x733fa5fe)+(trieVariant.TRIE_BITS^0x64365465);
}*/
/** Marks documents in a specific range. Code borrowed from original RangeFilter and simplified (and returns number of terms) */
private int setBits(
final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits,
String field, String lowerTerm, String upperTerm
) throws IOException {
System.out.println(Long.toHexString(TrieUtils.prefixCodedToLong(lowerTerm)^0x8000000000000000L)+".."+Long.toHexString(TrieUtils.prefixCodedToLong(upperTerm)^0x8000000000000000L));
int count=0;
final int len=lowerTerm.length();
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
try {
do {
final Term term = enumerator.term();
if (term!=null && term.field()==field) {
// break out when upperTerm reached or length of term is different
final String t=term.text();
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
// we have a good term, find the docs
count++;
termDocs.seek(enumerator);
while (termDocs.next()) bits.set(termDocs.doc());
} else break;
} while (enumerator.next());
} finally {
enumerator.close();
}
return count;
}
/** Splits range recursively (and returns number of terms) */
private int splitRange(
final IndexReader reader, final TermDocs termDocs, final OpenBitSet bits,
final long minBound, final boolean minBoundOpen, final long maxBound, final boolean maxBoundOpen,
final int shift
) throws IOException {
int count=0;
final String field = fields[Math.min(fields.length-1, shift/precisionStep)].intern();
// calculate new bounds for inner precision
final long diff = 1L << (shift+precisionStep),
mask = diff-1L,
nextMinBound = (minBoundOpen ? Long.MIN_VALUE : (minBound + diff)) & ~mask,
nextMaxBound = (maxBoundOpen ? Long.MAX_VALUE : (maxBound - diff)) & ~mask;
if (shift+precisionStep>63 || nextMinBound>=nextMaxBound) {
// we are in the lowest precision or the next precision is not available
count+=setBits(
reader, termDocs, bits, field,
TrieUtils.longToPrefixCoded(minBound, shift),
TrieUtils.longToPrefixCoded(maxBound, shift)
);
} else {
if (!minBoundOpen) {
count+=setBits(reader, termDocs, bits, field,
TrieUtils.longToPrefixCoded(minBound, shift),
TrieUtils.longToPrefixCoded((nextMinBound - diff) | mask, shift)
);
}
if (!maxBoundOpen) {
count+=setBits(reader, termDocs, bits, field,
TrieUtils.longToPrefixCoded((nextMaxBound + diff) & ~mask, shift),
TrieUtils.longToPrefixCoded(maxBound, shift)
);
}
count+=splitRange(
reader,termDocs,bits,
nextMinBound,minBoundOpen,
nextMaxBound,maxBoundOpen,
shift+precisionStep
);
}
return count;
}
/**
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
*/
//@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
// calculate the upper and lower bounds respecting the inclusive and null values.
long minBound=(this.min==null) ? Long.MIN_VALUE : (
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
);
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
);
if (minBound > maxBound) {
// shortcut, no docs will match this
lastNumberOfTerms=0;
return DocIdSet.EMPTY_DOCIDSET;
} else {
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
final TermDocs termDocs = reader.termDocs();
try {
lastNumberOfTerms=splitRange(
reader,termDocs,bits,
minBound, minBound==Long.MIN_VALUE, maxBound, maxBound==Long.MAX_VALUE,
0 /* start with no shift */
);
} finally {
termDocs.close();
}
return bits;
}
}
/**
* EXPERT: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
* This may be used for performance comparisons of different trie variants and their effectiveness.
* This method is not thread safe, be sure to only call it when no query is running!
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
*/
public int getLastNumberOfTerms() {
if (lastNumberOfTerms < 0) throw new IllegalStateException();
return lastNumberOfTerms;
}
/** Returns this range filter as a query.
* Using this method, it is possible to create a Query using <code>new TrieRangeFilter(....).asQuery()</code>.
* This is a synonym for wrapping with a {@link ConstantScoreQuery},
* but this query returns a better <code>toString()</code> variant.
*/
public Query asQuery() {
return new ConstantScoreQuery(this) {
/** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */
//@Override
public String toString(final String field) {
// return a more convenient representation of this query than ConstantScoreQuery does:
return ((TrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost());
}
};
}
// members
private final String[] fields;
private final int precisionStep;
private final Long min,max;
private final boolean minInclusive,maxInclusive;
private Object minUnconverted,maxUnconverted;
private int lastNumberOfTerms=-1;
}