blob: 74f0a688904ac20fb1c4c9e6038158ebd564f755 [file] [log] [blame]
package org.apache.lucene.spatial.prefix;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
/**
* Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains
* CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}.
*
* @lucene.experimental
*/
public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter {
public ContainsPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel) {
super(queryShape, fieldName, grid, detailLevel);
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return new ContainsVisitor(context, acceptDocs).visit(grid.getWorldCell(), acceptDocs);
}
private class ContainsVisitor extends BaseTermsEnumTraverser {
public ContainsVisitor(AtomicReaderContext context, Bits acceptDocs) throws IOException {
super(context, acceptDocs);
}
BytesRef termBytes = new BytesRef();
Cell nextCell;//see getLeafDocs
/** This is the primary algorithm; recursive. Returns null if finds none. */
private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {
if (termsEnum == null)//signals all done
return null;
//Leaf docs match all query shape
SmallDocSet leafDocs = getLeafDocs(cell, acceptContains);
// Get the AND of all child results
SmallDocSet combinedSubResults = null;
Collection<Cell> subCells = cell.getSubCells(queryShape);
for (Cell subCell : subCells) {
if (!seekExact(subCell))
combinedSubResults = null;
else if (subCell.getLevel() == detailLevel)
combinedSubResults = getDocs(subCell, acceptContains);
else if (subCell.getShapeRel() == SpatialRelation.WITHIN)
combinedSubResults = getLeafDocs(subCell, acceptContains);
else
combinedSubResults = visit(subCell, acceptContains); //recursion
if (combinedSubResults == null)
break;
acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
}
// Result: OR the leaf docs with AND of all child results
if (combinedSubResults != null) {
if (leafDocs == null)
return combinedSubResults;
return leafDocs.union(combinedSubResults);
}
return leafDocs;
}
private boolean seekExact(Cell cell) throws IOException {
assert new BytesRef(cell.getTokenBytes()).compareTo(termBytes) > 0;
termBytes.bytes = cell.getTokenBytes();
termBytes.length = termBytes.bytes.length;
return termsEnum.seekExact(termBytes, cell.getLevel() <= 2);
}
private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
assert new BytesRef(cell.getTokenBytes()).equals(termBytes);
return collectDocs(acceptContains);
}
private SmallDocSet getLeafDocs(Cell leafCell, Bits acceptContains) throws IOException {
assert new BytesRef(leafCell.getTokenBytes()).equals(termBytes);
BytesRef nextTerm = termsEnum.next();
if (nextTerm == null) {
termsEnum = null;//signals all done
return null;
}
nextCell = grid.getCell(nextTerm.bytes, nextTerm.offset, nextTerm.length, nextCell);
if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) {
return collectDocs(acceptContains);
} else {
return null;
}
}
private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
SmallDocSet set = null;
docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE);
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (set == null) {
int size = termsEnum.docFreq();
if (size <= 0)
size = 16;
set = new SmallDocSet(size);
}
set.set(docid);
}
return set;
}
}//class ContainsVisitor
/** A hash based mutable set of docIds. If this were Solr code then we might
* use a combination of HashDocSet and SortedIntDocSet instead. */
private static class SmallDocSet extends DocIdSet implements Bits {
private final SentinelIntSet intSet;
private int maxInt = 0;
public SmallDocSet(int size) {
intSet = new SentinelIntSet(size, -1);
}
@Override
public boolean get(int index) {
return intSet.exists(index);
}
public void set(int index) {
intSet.put(index);
if (index > maxInt)
maxInt = index;
}
/** Largest docid. */
@Override
public int length() {
return maxInt;
}
/** Number of docids. */
public int size() {
return intSet.size();
}
/** NOTE: modifies and returns either "this" or "other" */
public SmallDocSet union(SmallDocSet other) {
SmallDocSet bigger;
SmallDocSet smaller;
if (other.intSet.size() > this.intSet.size()) {
bigger = other;
smaller = this;
} else {
bigger = this;
smaller = other;
}
//modify bigger
for (int v : smaller.intSet.keys) {
if (v == smaller.intSet.emptyVal)
continue;
bigger.set(v);
}
return bigger;
}
@Override
public Bits bits() throws IOException {
//if the # of docids is super small, return null since iteration is going
// to be faster
return size() > 4 ? this : null;
}
@Override
public DocIdSetIterator iterator() throws IOException {
if (size() == 0)
return null;
//copy the unsorted values to a new array then sort them
int d = 0;
final int[] docs = new int[intSet.size()];
for (int v : intSet.keys) {
if (v == intSet.emptyVal)
continue;
docs[d++] = v;
}
assert d == intSet.size();
final int size = d;
//sort them
Arrays.sort(docs, 0, size);
return new DocIdSetIterator() {
int idx = -1;
@Override
public int docID() {
if (idx >= 0 && idx < size)
return docs[idx];
else
return -1;
}
@Override
public int nextDoc() throws IOException {
if (++idx < size)
return docs[idx];
return NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
//for this small set this is likely faster vs. a binary search
// into the sorted array
return slowAdvance(target);
}
@Override
public long cost() {
return size;
}
};
}
}//class SmallDocSet
}