blob: b0864f67cd3588f03dfe06a42bc69a964f737ed4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.Arrays;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SentinelIntSet;
/**
* Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains
* CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}.
*
* @lucene.experimental
*/
public class ContainsPrefixTreeQuery extends AbstractPrefixTreeQuery {
/**
* If the spatial data for a document is comprised of multiple overlapping or adjacent parts,
* it might fail to match a query shape when doing the CONTAINS predicate when the sum of
* those shapes contain the query shape but none do individually. Set this to false to
* increase performance if you don't care about that circumstance (such as if your indexed
* data doesn't even have such conditions). See LUCENE-5062.
*/
protected final boolean multiOverlappingIndexedShapes;
public ContainsPrefixTreeQuery(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel, boolean multiOverlappingIndexedShapes) {
super(queryShape, fieldName, grid, detailLevel);
this.multiOverlappingIndexedShapes = multiOverlappingIndexedShapes;
}
@Override
public boolean equals(Object o) {
if (!super.equals(o))
return false;
return multiOverlappingIndexedShapes == ((ContainsPrefixTreeQuery)o).multiOverlappingIndexedShapes;
}
@Override
public int hashCode() {
return super.hashCode() + (multiOverlappingIndexedShapes ? 1 : 0);
}
@Override
public String toString(String field) {
return getClass().getSimpleName() + "(" +
"fieldName=" + fieldName + "," +
"queryShape=" + queryShape + "," +
"detailLevel=" + detailLevel + "," +
"multiOverlappingIndexedShapes=" + multiOverlappingIndexedShapes +
")";
}
@Override
protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
return new ContainsVisitor(context).visit(grid.getWorldCell(), null);
}
private class ContainsVisitor extends BaseTermsEnumTraverser {
public ContainsVisitor(LeafReaderContext context) throws IOException {
super(context);
if (termsEnum != null) {
nextTerm();//advance to first
}
}
BytesRef seekTerm = new BytesRef();//temp; see seek()
BytesRef thisTerm;//current term in termsEnum
Cell indexedCell;//the cell wrapper around thisTerm
/** This is the primary algorithm; recursive. Returns null if finds none. */
private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {
if (thisTerm == null)//signals all done
return null;
// Get the AND of all child results (into combinedSubResults)
SmallDocSet combinedSubResults = null;
// Optimization: use null subCellsFilter when we know cell is within the query shape.
Shape subCellsFilter = queryShape;
if (cell.getLevel() != 0 && ((cell.getShapeRel() == null || cell.getShapeRel() == SpatialRelation.WITHIN))) {
subCellsFilter = null;
assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN;
}
CellIterator subCells = cell.getNextLevelCells(subCellsFilter);
while (subCells.hasNext()) {
Cell subCell = subCells.next();
if (!seek(subCell)) {
combinedSubResults = null;
} else if (subCell.getLevel() == detailLevel) {
combinedSubResults = getDocs(subCell, acceptContains);
} else if (!multiOverlappingIndexedShapes &&
subCell.getShapeRel() == SpatialRelation.WITHIN) {
combinedSubResults = getLeafDocs(subCell, acceptContains);
} else {
//OR the leaf docs with all child results
SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains);
SmallDocSet subDocs = visit(subCell, acceptContains); //recursion
combinedSubResults = union(leafDocs, subDocs);
}
if (combinedSubResults == null)
break;
acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
}
return combinedSubResults;
}
private boolean seek(Cell cell) throws IOException {
if (thisTerm == null)
return false;
final int compare = indexedCell.compareToNoLeaf(cell);
if (compare > 0) {
return false;//leap-frog effect
} else if (compare == 0) {
return true; // already there!
} else {//compare > 0
//seek!
seekTerm = cell.getTokenBytesNoLeaf(seekTerm);
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm);
if (seekStatus == TermsEnum.SeekStatus.END) {
thisTerm = null;//all done
return false;
}
thisTerm = termsEnum.term();
indexedCell = grid.readCell(thisTerm, indexedCell);
if (seekStatus == TermsEnum.SeekStatus.FOUND) {
return true;
}
return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0;
}
}
/** Get prefix & leaf docs at this cell. */
private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
assert indexedCell.compareToNoLeaf(cell) == 0;
//called when we've reached detailLevel.
if (indexedCell.isLeaf()) {//only a leaf
SmallDocSet result = collectDocs(acceptContains);
nextTerm();
return result;
} else {
SmallDocSet docsAtPrefix = collectDocs(acceptContains);
if (!nextTerm()) {
return docsAtPrefix;
}
//collect leaf too
if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) {
SmallDocSet docsAtLeaf = collectDocs(acceptContains);
nextTerm();
return union(docsAtPrefix, docsAtLeaf);
} else {
return docsAtPrefix;
}
}
}
/** Gets docs on the leaf of the given cell, _if_ there is a leaf cell, otherwise null. */
private SmallDocSet getLeafDocs(Cell cell, Bits acceptContains) throws IOException {
assert indexedCell.compareToNoLeaf(cell) == 0;
//Advance past prefix if we're at a prefix; return null if no leaf
if (!indexedCell.isLeaf()) {
if (!nextTerm() || !indexedCell.isLeaf() || indexedCell.getLevel() != cell.getLevel()) {
return null;
}
}
SmallDocSet result = collectDocs(acceptContains);
nextTerm();
return result;
}
private boolean nextTerm() throws IOException {
if ((thisTerm = termsEnum.next()) == null)
return false;
indexedCell = grid.readCell(thisTerm, indexedCell);
return true;
}
private SmallDocSet union(SmallDocSet aSet, SmallDocSet bSet) {
if (bSet != null) {
if (aSet == null)
return bSet;
return aSet.union(bSet);//union is 'or'
}
return aSet;
}
private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
SmallDocSet set = null;
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
int docid;
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (acceptContains != null && acceptContains.get(docid) == false) {
continue;
}
if (set == null) {
int size = termsEnum.docFreq();
if (size <= 0)
size = 16;
set = new SmallDocSet(size);
}
set.set(docid);
}
return set;
}
}//class ContainsVisitor
/** A hash based mutable set of docIds. If this were Solr code then we might
* use a combination of HashDocSet and SortedIntDocSet instead. */
// TODO use DocIdSetBuilder?
private static class SmallDocSet extends DocIdSet implements Bits {
private final SentinelIntSet intSet;
private int maxInt = 0;
public SmallDocSet(int size) {
intSet = new SentinelIntSet(size, -1);
}
@Override
public boolean get(int index) {
return intSet.exists(index);
}
public void set(int index) {
intSet.put(index);
if (index > maxInt)
maxInt = index;
}
/** Largest docid. */
@Override
public int length() {
return maxInt;
}
/** Number of docids. */
public int size() {
return intSet.size();
}
/** NOTE: modifies and returns either "this" or "other" */
public SmallDocSet union(SmallDocSet other) {
SmallDocSet bigger;
SmallDocSet smaller;
if (other.intSet.size() > this.intSet.size()) {
bigger = other;
smaller = this;
} else {
bigger = this;
smaller = other;
}
//modify bigger
for (int v : smaller.intSet.keys) {
if (v == smaller.intSet.emptyVal)
continue;
bigger.set(v);
}
return bigger;
}
@Override
public Bits bits() throws IOException {
//if the # of docids is super small, return null since iteration is going
// to be faster
return size() > 4 ? this : null;
}
@Override
public DocIdSetIterator iterator() throws IOException {
if (size() == 0)
return null;
//copy the unsorted values to a new array then sort them
int d = 0;
final int[] docs = new int[intSet.size()];
for (int v : intSet.keys) {
if (v == intSet.emptyVal)
continue;
docs[d++] = v;
}
assert d == intSet.size();
final int size = d;
//sort them
Arrays.sort(docs, 0, size);
return new DocIdSetIterator() {
int idx = -1;
@Override
public int docID() {
if (idx < 0) {
return -1;
} else if (idx < size) {
return docs[idx];
} else {
return NO_MORE_DOCS;
}
}
@Override
public int nextDoc() throws IOException {
if (++idx < size)
return docs[idx];
return NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
//for this small set this is likely faster vs. a binary search
// into the sorted array
return slowAdvance(target);
}
@Override
public long cost() {
return size;
}
};
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ Integer.BYTES)
+ intSet.ramBytesUsed();
}
}//class SmallDocSet
}