blob: 693a00f86df944b60985acde71a6f90384149a71 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FutureArrays;
/**
* An indexed binary field for fast range filters. If you also
* need to store the value, you should add a separate {@link StoredField} instance.
* <p>
* Finding all documents within an N-dimensional shape or range at search time is
* efficient. Multiple values for the same field in one document
* is allowed.
* <p>
* This field defines static factory methods for creating common queries:
* <ul>
* <li>{@link #newExactQuery(String, byte[])} for matching an exact 1D point.
* <li>{@link #newSetQuery(String, byte[][]) newSetQuery(String, byte[]...)} for matching a set of 1D values.
* <li>{@link #newRangeQuery(String, byte[], byte[])} for matching a 1D range.
* <li>{@link #newRangeQuery(String, byte[][], byte[][])} for matching points/ranges in n-dimensional space.
* </ul>
* @see PointValues
*/
public final class BinaryPoint extends Field {
private static FieldType getType(byte[][] point) {
if (point == null) {
throw new IllegalArgumentException("point must not be null");
}
if (point.length == 0) {
throw new IllegalArgumentException("point must not be 0 dimensions");
}
int bytesPerDim = -1;
for(int i=0;i<point.length;i++) {
byte[] oneDim = point[i];
if (oneDim == null) {
throw new IllegalArgumentException("point must not have null values");
}
if (oneDim.length == 0) {
throw new IllegalArgumentException("point must not have 0-length values");
}
if (bytesPerDim == -1) {
bytesPerDim = oneDim.length;
} else if (bytesPerDim != oneDim.length) {
throw new IllegalArgumentException("all dimensions must have same bytes length; got " + bytesPerDim + " and " + oneDim.length);
}
}
return getType(point.length, bytesPerDim);
}
private static FieldType getType(int numDims, int bytesPerDim) {
FieldType type = new FieldType();
type.setDimensions(numDims, bytesPerDim);
type.freeze();
return type;
}
private static BytesRef pack(byte[]... point) {
if (point == null) {
throw new IllegalArgumentException("point must not be null");
}
if (point.length == 0) {
throw new IllegalArgumentException("point must not be 0 dimensions");
}
if (point.length == 1) {
return new BytesRef(point[0]);
}
int bytesPerDim = -1;
for(byte[] dim : point) {
if (dim == null) {
throw new IllegalArgumentException("point must not have null values");
}
if (bytesPerDim == -1) {
if (dim.length == 0) {
throw new IllegalArgumentException("point must not have 0-length values");
}
bytesPerDim = dim.length;
} else if (dim.length != bytesPerDim) {
throw new IllegalArgumentException("all dimensions must have same bytes length; got " + bytesPerDim + " and " + dim.length);
}
}
byte[] packed = new byte[bytesPerDim*point.length];
for(int i=0;i<point.length;i++) {
System.arraycopy(point[i], 0, packed, i*bytesPerDim, bytesPerDim);
}
return new BytesRef(packed);
}
/** General purpose API: creates a new BinaryPoint, indexing the
* provided N-dimensional binary point.
*
* @param name field name
* @param point byte[][] value
* @throws IllegalArgumentException if the field name or value is null.
*/
public BinaryPoint(String name, byte[]... point) {
super(name, pack(point), getType(point));
}
/** Expert API */
public BinaryPoint(String name, byte[] packedPoint, IndexableFieldType type) {
super(name, packedPoint, type);
if (packedPoint.length != type.pointDimensionCount() * type.pointNumBytes()) {
throw new IllegalArgumentException("packedPoint is length=" + packedPoint.length + " but type.pointDimensionCount()=" + type.pointDimensionCount() + " and type.pointNumBytes()=" + type.pointNumBytes());
}
}
// static methods for generating queries
/**
* Create a query for matching an exact binary value.
* <p>
* This is for simple one-dimension points, for multidimensional points use
* {@link #newRangeQuery(String, byte[][], byte[][])} instead.
*
* @param field field name. must not be {@code null}.
* @param value binary value
* @throws IllegalArgumentException if {@code field} is null or {@code value} is null
* @return a query matching documents with this exact value
*/
public static Query newExactQuery(String field, byte[] value) {
return newRangeQuery(field, value, value);
}
/**
* Create a range query for binary values.
* <p>
* This is for simple one-dimension ranges, for multidimensional ranges use
* {@link #newRangeQuery(String, byte[][], byte[][])} instead.
*
* @param field field name. must not be {@code null}.
* @param lowerValue lower portion of the range (inclusive). must not be {@code null}
* @param upperValue upper portion of the range (inclusive). must not be {@code null}
* @throws IllegalArgumentException if {@code field} is null, if {@code lowerValue} is null,
* or if {@code upperValue} is null
* @return a query matching documents within this range.
*/
public static Query newRangeQuery(String field, byte[] lowerValue, byte[] upperValue) {
PointRangeQuery.checkArgs(field, lowerValue, upperValue);
return newRangeQuery(field, new byte[][] {lowerValue}, new byte[][] {upperValue});
}
/**
* Create a range query for n-dimensional binary values.
*
* @param field field name. must not be {@code null}.
* @param lowerValue lower portion of the range (inclusive). must not be null.
* @param upperValue upper portion of the range (inclusive). must not be null.
* @throws IllegalArgumentException if {@code field} is null, if {@code lowerValue} is null, if {@code upperValue} is null,
* or if {@code lowerValue.length != upperValue.length}
* @return a query matching documents within this range.
*/
public static Query newRangeQuery(String field, byte[][] lowerValue, byte[][] upperValue) {
return new PointRangeQuery(field, pack(lowerValue).bytes, pack(upperValue).bytes, lowerValue.length) {
@Override
protected String toString(int dimension, byte[] value) {
assert value != null;
StringBuilder sb = new StringBuilder();
sb.append("binary(");
for (int i = 0; i < value.length; i++) {
if (i > 0) {
sb.append(' ');
}
sb.append(Integer.toHexString(value[i] & 0xFF));
}
sb.append(')');
return sb.toString();
}
};
}
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param values all values to match
*/
public static Query newSetQuery(String field, byte[]... values) {
// Make sure all byte[] have the same length
int bytesPerDim = -1;
for(byte[] value : values) {
if (bytesPerDim == -1) {
bytesPerDim = value.length;
} else if (value.length != bytesPerDim) {
throw new IllegalArgumentException("all byte[] must be the same length, but saw " + bytesPerDim + " and " + value.length);
}
}
if (bytesPerDim == -1) {
// There are no points, and we cannot guess the bytesPerDim here, so we return an equivalent query:
return new MatchNoDocsQuery("empty BinaryPoint.newSetQuery");
}
// Don't unexpectedly change the user's incoming values array:
byte[][] sortedValues = values.clone();
Arrays.sort(sortedValues,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return FutureArrays.compareUnsigned(a, 0, a.length, b, 0, b.length);
}
});
final BytesRef encoded = new BytesRef(new byte[bytesPerDim]);
return new PointInSetQuery(field, 1, bytesPerDim,
new PointInSetQuery.Stream() {
int upto;
@Override
public BytesRef next() {
if (upto == sortedValues.length) {
return null;
} else {
encoded.bytes = sortedValues[upto];
upto++;
return encoded;
}
}
}) {
@Override
protected String toString(byte[] value) {
return new BytesRef(value).toString();
}
};
}
}