blob: 6660b3a2cfcbb5ec0b3dd1517a29c5b2ba36e6fd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.FutureArrays;
/**
* Abstract class for range queries against single or multidimensional points such as
* {@link IntPoint}.
* <p>
* This is for subclasses and works on the underlying binary encoding: to
* create range queries for lucene's standard {@code Point} types, refer to factory
* methods on those classes, e.g. {@link IntPoint#newRangeQuery IntPoint.newRangeQuery()} for
* fields indexed with {@link IntPoint}.
* <p>
* For a single-dimensional field this query is a simple range query; in a multi-dimensional field it's a box shape.
* @see PointValues
* @lucene.experimental
*/
public abstract class PointRangeQuery extends Query {
final String field;
final int numDims;
final int bytesPerDim;
final byte[] lowerPoint;
final byte[] upperPoint;
/**
* Expert: create a multidimensional range query for point values.
*
* @param field field name. must not be {@code null}.
* @param lowerPoint lower portion of the range (inclusive).
* @param upperPoint upper portion of the range (inclusive).
* @param numDims number of dimensions.
* @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != upperValue.length}
*/
protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) {
checkArgs(field, lowerPoint, upperPoint);
this.field = field;
if (numDims <= 0) {
throw new IllegalArgumentException("numDims must be positive, got " + numDims);
}
if (lowerPoint.length == 0) {
throw new IllegalArgumentException("lowerPoint has length of zero");
}
if (lowerPoint.length % numDims != 0) {
throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims");
}
if (lowerPoint.length != upperPoint.length) {
throw new IllegalArgumentException("lowerPoint has length=" + lowerPoint.length + " but upperPoint has different length=" + upperPoint.length);
}
this.numDims = numDims;
this.bytesPerDim = lowerPoint.length / numDims;
this.lowerPoint = lowerPoint;
this.upperPoint = upperPoint;
}
/**
* Check preconditions for all factory methods
* @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are null.
*/
public static void checkArgs(String field, Object lowerPoint, Object upperPoint) {
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
if (lowerPoint == null) {
throw new IllegalArgumentException("lowerPoint must not be null");
}
if (upperPoint == null) {
throw new IllegalArgumentException("upperPoint must not be null");
}
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
// This is an inverted structure and should be used in the first pass:
return new ConstantScoreWeight(this, boost) {
private boolean matches(byte[] packedValue) {
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (FutureArrays.compareUnsigned(packedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0) {
// Doc's value is too low, in this dimension
return false;
}
if (FutureArrays.compareUnsigned(packedValue, offset, offset + bytesPerDim, upperPoint, offset, offset + bytesPerDim) > 0) {
// Doc's value is too high, in this dimension
return false;
}
}
return true;
}
private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (FutureArrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim, upperPoint, offset, offset + bytesPerDim) > 0 ||
FutureArrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |= FutureArrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0 ||
FutureArrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, upperPoint, offset, offset + bytesPerDim) > 0;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (matches(packedValue)) {
visit(docID);
}
}
@Override
public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
if (matches(packedValue)) {
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
visit(docID);
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return relate(minPackedValue, maxPackedValue);
}
};
}
/**
* Create a visitor that clears documents that do NOT match the range.
*/
private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
return new IntersectVisitor() {
@Override
public void visit(int docID) {
result.clear(docID);
cost[0]--;
}
@Override
public void visit(int docID, byte[] packedValue) {
if (matches(packedValue) == false) {
visit(docID);
}
}
@Override
public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
if (matches(packedValue) == false) {
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
visit(docID);
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
Relation relation = relate(minPackedValue, maxPackedValue);
switch (relation) {
case CELL_INSIDE_QUERY:
// all points match, skip this subtree
return Relation.CELL_OUTSIDE_QUERY;
case CELL_OUTSIDE_QUERY:
// none of the points match, clear all documents
return Relation.CELL_INSIDE_QUERY;
default:
return relation;
}
}
};
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment/field indexed any points
return null;
}
if (values.getNumIndexDimensions() != numDims) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numIndexDimensions=" + values.getNumIndexDimensions() + " but this query has numDims=" + numDims);
}
if (bytesPerDim != values.getBytesPerDimension()) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
}
boolean allDocsMatch;
if (values.getDocCount() == reader.maxDoc()) {
final byte[] fieldPackedLower = values.getMinPackedValue();
final byte[] fieldPackedUpper = values.getMaxPackedValue();
allDocsMatch = true;
for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim;
if (FutureArrays.compareUnsigned(lowerPoint, offset, offset + bytesPerDim, fieldPackedLower, offset, offset + bytesPerDim) > 0
|| FutureArrays.compareUnsigned(upperPoint, offset, offset + bytesPerDim, fieldPackedUpper, offset, offset + bytesPerDim) < 0) {
allDocsMatch = false;
break;
}
}
} else {
allDocsMatch = false;
}
final Weight weight = this;
if (allDocsMatch) {
// all docs have a value and all points are within bounds, so everything matches
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) {
return new ConstantScoreScorer(weight, score(), scoreMode, DocIdSetIterator.all(reader.maxDoc()));
}
@Override
public long cost() {
return reader.maxDoc();
}
};
} else {
return new ScorerSupplier() {
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;
@Override
public Scorer get(long leadCost) throws IOException {
if (values.getDocCount() == reader.maxDoc()
&& values.getDocCount() == values.size()
&& cost() > reader.maxDoc() / 2) {
// If all docs have exactly one value and the cost is greater
// than half the leaf size then maybe we can make things faster
// by computing the set of documents that do NOT match the range
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.set(0, reader.maxDoc());
int[] cost = new int[] { reader.maxDoc() };
values.intersect(getInverseIntersectVisitor(result, cost));
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
}
values.intersect(visitor);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
}
@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimateDocCount(visitor);
assert cost >= 0;
}
return cost;
}
};
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}
public String getField() {
return field;
}
public int getNumDims() {
return numDims;
}
public int getBytesPerDim() {
return bytesPerDim;
}
public byte[] getLowerPoint() {
return lowerPoint.clone();
}
public byte[] getUpperPoint() {
return upperPoint.clone();
}
@Override
public final int hashCode() {
int hash = classHash();
hash = 31 * hash + field.hashCode();
hash = 31 * hash + Arrays.hashCode(lowerPoint);
hash = 31 * hash + Arrays.hashCode(upperPoint);
hash = 31 * hash + numDims;
hash = 31 * hash + Objects.hashCode(bytesPerDim);
return hash;
}
@Override
public final boolean equals(Object o) {
return sameClassAs(o) &&
equalsTo(getClass().cast(o));
}
private boolean equalsTo(PointRangeQuery other) {
return Objects.equals(field, other.field) &&
numDims == other.numDims &&
bytesPerDim == other.bytesPerDim &&
Arrays.equals(lowerPoint, other.lowerPoint) &&
Arrays.equals(upperPoint, other.upperPoint);
}
@Override
public final String toString(String field) {
final StringBuilder sb = new StringBuilder();
if (this.field.equals(field) == false) {
sb.append(this.field);
sb.append(':');
}
// print ourselves as "range per dimension"
for (int i = 0; i < numDims; i++) {
if (i > 0) {
sb.append(',');
}
int startOffset = bytesPerDim * i;
sb.append('[');
sb.append(toString(i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim)));
sb.append(" TO ");
sb.append(toString(i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim)));
sb.append(']');
}
return sb.toString();
}
/**
* Returns a string of a single value in a human-readable format for debugging.
* This is used by {@link #toString()}.
*
* @param dimension dimension of the particular value
* @param value single value, never null
* @return human readable value for debugging
*/
protected abstract String toString(int dimension, byte[] value);
}