lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java - lucene-solr - Git at Google

 package org.apache.lucene.spatial.prefix;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import com.spatial4j.core.shape.Shape;
 import com.spatial4j.core.shape.SpatialRelation;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.spatial.prefix.tree.Cell;
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SentinelIntSet;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;

 /**
  * Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains
  * CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}.
  *
  * @lucene.experimental
  */
 public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter {

   public ContainsPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel) {
     super(queryShape, fieldName, grid, detailLevel);
   }

   @Override
   public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
     return new ContainsVisitor(context, acceptDocs).visit(grid.getWorldCell(), acceptDocs);
   }

   private class ContainsVisitor extends BaseTermsEnumTraverser {

     public ContainsVisitor(AtomicReaderContext context, Bits acceptDocs) throws IOException {
       super(context, acceptDocs);
     }

     BytesRef termBytes = new BytesRef();
     Cell nextCell;//see getLeafDocs

     /** This is the primary algorithm; recursive.  Returns null if finds none. */
     private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {

       if (termsEnum == null)//signals all done
         return null;

       //Leaf docs match all query shape
       SmallDocSet leafDocs = getLeafDocs(cell, acceptContains);

       // Get the AND of all child results
       SmallDocSet combinedSubResults = null;
       Collection<Cell> subCells = cell.getSubCells(queryShape);
       for (Cell subCell : subCells) {
         if (!seekExact(subCell))
           combinedSubResults = null;
         else if (subCell.getLevel() == detailLevel)
           combinedSubResults = getDocs(subCell, acceptContains);
         else if (subCell.getShapeRel() == SpatialRelation.WITHIN)
           combinedSubResults = getLeafDocs(subCell, acceptContains);
         else
           combinedSubResults = visit(subCell, acceptContains); //recursion

         if (combinedSubResults == null)
           break;
         acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
       }

       // Result: OR the leaf docs with AND of all child results
       if (combinedSubResults != null) {
         if (leafDocs == null)
           return combinedSubResults;
         return leafDocs.union(combinedSubResults);
       }
       return leafDocs;
     }

     private boolean seekExact(Cell cell) throws IOException {
       assert new BytesRef(cell.getTokenBytes()).compareTo(termBytes) > 0;

       termBytes.bytes = cell.getTokenBytes();
       termBytes.length = termBytes.bytes.length;
       return termsEnum.seekExact(termBytes, cell.getLevel() <= 2);
     }

     private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
       assert new BytesRef(cell.getTokenBytes()).equals(termBytes);

       return collectDocs(acceptContains);
     }

     private SmallDocSet getLeafDocs(Cell leafCell, Bits acceptContains) throws IOException {
       assert new BytesRef(leafCell.getTokenBytes()).equals(termBytes);

       BytesRef nextTerm = termsEnum.next();
       if (nextTerm == null) {
         termsEnum = null;//signals all done
         return null;
       }
       nextCell = grid.getCell(nextTerm.bytes, nextTerm.offset, nextTerm.length, nextCell);
       if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) {
         return collectDocs(acceptContains);
       } else {
         return null;
       }
     }

     private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
       SmallDocSet set = null;

       docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE);
       int docid;
       while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
         if (set == null) {
           int size = termsEnum.docFreq();
           if (size <= 0)
             size = 16;
           set = new SmallDocSet(size);
         }
         set.set(docid);
       }
       return set;
     }

   }//class ContainsVisitor

   /** A hash based mutable set of docIds. If this were Solr code then we might
    * use a combination of HashDocSet and SortedIntDocSet instead. */
   private static class SmallDocSet extends DocIdSet implements Bits {

     private final SentinelIntSet intSet;
     private int maxInt = 0;

     public SmallDocSet(int size) {
       intSet = new SentinelIntSet(size, -1);
     }

     @Override
     public boolean get(int index) {
       return intSet.exists(index);
     }

     public void set(int index) {
       intSet.put(index);
       if (index > maxInt)
         maxInt = index;
     }

     /** Largest docid. */
     @Override
     public int length() {
       return maxInt;
     }

     /** Number of docids. */
     public int size() {
       return intSet.size();
     }

     /** NOTE: modifies and returns either "this" or "other" */
     public SmallDocSet union(SmallDocSet other) {
       SmallDocSet bigger;
       SmallDocSet smaller;
       if (other.intSet.size() > this.intSet.size()) {
         bigger = other;
         smaller = this;
       } else {
         bigger = this;
         smaller = other;
       }
       //modify bigger
       for (int v : smaller.intSet.keys) {
         if (v == smaller.intSet.emptyVal)
           continue;
         bigger.set(v);
       }
       return bigger;
     }

     @Override
     public Bits bits() throws IOException {
       //if the # of docids is super small, return null since iteration is going
       // to be faster
       return size() > 4 ? this : null;
     }

     @Override
     public DocIdSetIterator iterator() throws IOException {
       if (size() == 0)
         return null;
       //copy the unsorted values to a new array then sort them
       int d = 0;
       final int[] docs = new int[intSet.size()];
       for (int v : intSet.keys) {
         if (v == intSet.emptyVal)
           continue;
         docs[d++] = v;
       }
       assert d == intSet.size();
       final int size = d;

       //sort them
       Arrays.sort(docs, 0, size);

       return new DocIdSetIterator() {
         int idx = -1;
         @Override
         public int docID() {
           if (idx >= 0 && idx < size)
             return docs[idx];
           else
             return -1;
         }

         @Override
         public int nextDoc() throws IOException {
           if (++idx < size)
             return docs[idx];
           return NO_MORE_DOCS;
         }

         @Override
         public int advance(int target) throws IOException {
           //for this small set this is likely faster vs. a binary search
           // into the sorted array
           return slowAdvance(target);
         }

         @Override
         public long cost() {
           return size;
         }
       };
     }

   }//class SmallDocSet

 }
	package org.apache.lucene.spatial.prefix;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import com.spatial4j.core.shape.Shape;
	import com.spatial4j.core.shape.SpatialRelation;
	import org.apache.lucene.index.AtomicReaderContext;
	import org.apache.lucene.index.DocsEnum;
	import org.apache.lucene.search.DocIdSet;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.spatial.prefix.tree.Cell;
	import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.SentinelIntSet;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Collection;

	/**
	* Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains
	* CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}.
	*
	* @lucene.experimental
	*/
	public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter {

	public ContainsPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel) {
	super(queryShape, fieldName, grid, detailLevel);
	}

	@Override
	public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
	return new ContainsVisitor(context, acceptDocs).visit(grid.getWorldCell(), acceptDocs);
	}

	private class ContainsVisitor extends BaseTermsEnumTraverser {

	public ContainsVisitor(AtomicReaderContext context, Bits acceptDocs) throws IOException {
	super(context, acceptDocs);
	}

	BytesRef termBytes = new BytesRef();
	Cell nextCell;//see getLeafDocs

	/** This is the primary algorithm; recursive. Returns null if finds none. */
	private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {

	if (termsEnum == null)//signals all done
	return null;

	//Leaf docs match all query shape
	SmallDocSet leafDocs = getLeafDocs(cell, acceptContains);

	// Get the AND of all child results
	SmallDocSet combinedSubResults = null;
	Collection<Cell> subCells = cell.getSubCells(queryShape);
	for (Cell subCell : subCells) {
	if (!seekExact(subCell))
	combinedSubResults = null;
	else if (subCell.getLevel() == detailLevel)
	combinedSubResults = getDocs(subCell, acceptContains);
	else if (subCell.getShapeRel() == SpatialRelation.WITHIN)
	combinedSubResults = getLeafDocs(subCell, acceptContains);
	else
	combinedSubResults = visit(subCell, acceptContains); //recursion

	if (combinedSubResults == null)
	break;
	acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
	}

	// Result: OR the leaf docs with AND of all child results
	if (combinedSubResults != null) {
	if (leafDocs == null)
	return combinedSubResults;
	return leafDocs.union(combinedSubResults);
	}
	return leafDocs;
	}

	private boolean seekExact(Cell cell) throws IOException {
	assert new BytesRef(cell.getTokenBytes()).compareTo(termBytes) > 0;

	termBytes.bytes = cell.getTokenBytes();
	termBytes.length = termBytes.bytes.length;
	return termsEnum.seekExact(termBytes, cell.getLevel() <= 2);
	}

	private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
	assert new BytesRef(cell.getTokenBytes()).equals(termBytes);

	return collectDocs(acceptContains);
	}

	private SmallDocSet getLeafDocs(Cell leafCell, Bits acceptContains) throws IOException {
	assert new BytesRef(leafCell.getTokenBytes()).equals(termBytes);

	BytesRef nextTerm = termsEnum.next();
	if (nextTerm == null) {
	termsEnum = null;//signals all done
	return null;
	}
	nextCell = grid.getCell(nextTerm.bytes, nextTerm.offset, nextTerm.length, nextCell);
	if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) {
	return collectDocs(acceptContains);
	} else {
	return null;
	}
	}

	private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
	SmallDocSet set = null;

	docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE);
	int docid;
	while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
	if (set == null) {
	int size = termsEnum.docFreq();
	if (size <= 0)
	size = 16;
	set = new SmallDocSet(size);
	}
	set.set(docid);
	}
	return set;
	}

	}//class ContainsVisitor

	/** A hash based mutable set of docIds. If this were Solr code then we might
	* use a combination of HashDocSet and SortedIntDocSet instead. */
	private static class SmallDocSet extends DocIdSet implements Bits {

	private final SentinelIntSet intSet;
	private int maxInt = 0;

	public SmallDocSet(int size) {
	intSet = new SentinelIntSet(size, -1);
	}

	@Override
	public boolean get(int index) {
	return intSet.exists(index);
	}

	public void set(int index) {
	intSet.put(index);
	if (index > maxInt)
	maxInt = index;
	}

	/** Largest docid. */
	@Override
	public int length() {
	return maxInt;
	}

	/** Number of docids. */
	public int size() {
	return intSet.size();
	}

	/** NOTE: modifies and returns either "this" or "other" */
	public SmallDocSet union(SmallDocSet other) {
	SmallDocSet bigger;
	SmallDocSet smaller;
	if (other.intSet.size() > this.intSet.size()) {
	bigger = other;
	smaller = this;
	} else {
	bigger = this;
	smaller = other;
	}
	//modify bigger
	for (int v : smaller.intSet.keys) {
	if (v == smaller.intSet.emptyVal)
	continue;
	bigger.set(v);
	}
	return bigger;
	}

	@Override
	public Bits bits() throws IOException {
	//if the # of docids is super small, return null since iteration is going
	// to be faster
	return size() > 4 ? this : null;
	}

	@Override
	public DocIdSetIterator iterator() throws IOException {
	if (size() == 0)
	return null;
	//copy the unsorted values to a new array then sort them
	int d = 0;
	final int[] docs = new int[intSet.size()];
	for (int v : intSet.keys) {
	if (v == intSet.emptyVal)
	continue;
	docs[d++] = v;
	}
	assert d == intSet.size();
	final int size = d;

	//sort them
	Arrays.sort(docs, 0, size);

	return new DocIdSetIterator() {
	int idx = -1;
	@Override
	public int docID() {
	if (idx >= 0 && idx < size)
	return docs[idx];
	else
	return -1;
	}

	@Override
	public int nextDoc() throws IOException {
	if (++idx < size)
	return docs[idx];
	return NO_MORE_DOCS;
	}

	@Override
	public int advance(int target) throws IOException {
	//for this small set this is likely faster vs. a binary search
	// into the sorted array
	return slowAdvance(target);
	}

	@Override
	public long cost() {
	return size;
	}
	};
	}

	}//class SmallDocSet

	}