blob: 11b55bf0050cbdd4e1bd9b6a87f72c48e30d175e [file] [log] [blame]
using Lucene.Net.Diagnostics;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Spatial.Prefix.Tree;
using Lucene.Net.Util;
using Spatial4n.Core.Shapes;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
namespace Lucene.Net.Spatial.Prefix
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Finds docs where its indexed shape <see cref="Queries.SpatialOperation.Contains"/>
/// the query shape. For use on <see cref="RecursivePrefixTreeStrategy"/>.
///
/// @lucene.experimental
/// </summary>
public class ContainsPrefixTreeFilter : AbstractPrefixTreeFilter
{
// Future optimizations:
// Instead of seekExact, use seekCeil with some leap-frogging, like Intersects does.
/// <summary>
/// If the spatial data for a document is comprised of multiple overlapping or adjacent parts,
/// it might fail to match a query shape when doing the CONTAINS predicate when the sum of
/// those shapes contain the query shape but none do individually. Set this to false to
/// increase performance if you don't care about that circumstance (such as if your indexed
/// data doesn't even have such conditions). See LUCENE-5062.
/// </summary>
protected readonly bool m_multiOverlappingIndexedShapes;
public ContainsPrefixTreeFilter(IShape queryShape, string fieldName, SpatialPrefixTree grid, int detailLevel, bool multiOverlappingIndexedShapes)
: base(queryShape, fieldName, grid, detailLevel)
{
this.m_multiOverlappingIndexedShapes = multiOverlappingIndexedShapes;
}
public override bool Equals(object o)
{
if (!base.Equals(o))
return false;
return m_multiOverlappingIndexedShapes == ((ContainsPrefixTreeFilter)o).m_multiOverlappingIndexedShapes;
}
public override int GetHashCode()
{
return base.GetHashCode() + (m_multiOverlappingIndexedShapes ? 1 : 0);
}
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
{
return new ContainsVisitor(this, context, acceptDocs).Visit(m_grid.WorldCell, acceptDocs);
}
private class ContainsVisitor : BaseTermsEnumTraverser
{
public ContainsVisitor(ContainsPrefixTreeFilter outerInstance, AtomicReaderContext context, IBits acceptDocs)
: base(outerInstance, context, acceptDocs)
{
}
internal BytesRef termBytes = new BytesRef();
internal Cell nextCell;//see getLeafDocs
/// <remarks>This is the primary algorithm; recursive. Returns null if finds none.</remarks>
/// <exception cref="IOException"></exception>
internal SmallDocSet Visit(Cell cell, IBits acceptContains)
{
if (m_termsEnum == null)
{
//signals all done
return null;
}
ContainsPrefixTreeFilter outerInstance = (ContainsPrefixTreeFilter)base.m_outerInstance;
//Leaf docs match all query shape
SmallDocSet leafDocs = GetLeafDocs(cell, acceptContains);
// Get the AND of all child results (into combinedSubResults)
SmallDocSet combinedSubResults = null;
// Optimization: use null subCellsFilter when we know cell is within the query shape.
IShape subCellsFilter = outerInstance.m_queryShape;
if (cell.Level != 0 && ((cell.ShapeRel == SpatialRelation.NOT_SET || cell.ShapeRel == SpatialRelation.WITHIN)))
{
subCellsFilter = null;
if (Debugging.AssertsEnabled) Debugging.Assert(cell.Shape.Relate(outerInstance.m_queryShape) == SpatialRelation.WITHIN);
}
ICollection<Cell> subCells = cell.GetSubCells(subCellsFilter);
foreach (Cell subCell in subCells)
{
if (!SeekExact(subCell))
{
combinedSubResults = null;
}
else if (subCell.Level == outerInstance.m_detailLevel)
{
combinedSubResults = GetDocs(subCell, acceptContains);
}
else if (!outerInstance.m_multiOverlappingIndexedShapes &&
subCell.ShapeRel == SpatialRelation.WITHIN)
{
combinedSubResults = GetLeafDocs(subCell, acceptContains); //recursion
}
else
{
combinedSubResults = Visit(subCell, acceptContains);
}
if (combinedSubResults == null)
{
break;
}
acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
}
// Result: OR the leaf docs with AND of all child results
if (combinedSubResults != null)
{
if (leafDocs == null)
{
return combinedSubResults;
}
return leafDocs.Union(combinedSubResults);//union is 'or'
}
return leafDocs;
}
private bool SeekExact(Cell cell)
{
if (Debugging.AssertsEnabled) Debugging.Assert(new BytesRef(cell.GetTokenBytes()).CompareTo(termBytes) > 0);
this.termBytes.Bytes = cell.GetTokenBytes();
this.termBytes.Length = this.termBytes.Bytes.Length;
if (m_termsEnum == null)
return false;
return this.m_termsEnum.SeekExact(termBytes);
}
private SmallDocSet GetDocs(Cell cell, IBits acceptContains)
{
if (Debugging.AssertsEnabled) Debugging.Assert(new BytesRef(cell.GetTokenBytes()).Equals(termBytes));
return this.CollectDocs(acceptContains);
}
private Cell lastLeaf = null;//just for assertion
private SmallDocSet GetLeafDocs(Cell leafCell, IBits acceptContains)
{
if (Debugging.AssertsEnabled)
{
Debugging.Assert(new BytesRef(leafCell.GetTokenBytes()).Equals(termBytes));
Debugging.Assert(!leafCell.Equals(lastLeaf));//don't call for same leaf again
}
lastLeaf = leafCell;
if (m_termsEnum == null)
return null;
if (!m_termsEnum.MoveNext())
{
m_termsEnum = null;//signals all done
return null;
}
BytesRef nextTerm = m_termsEnum.Term;
nextCell = m_outerInstance.m_grid.GetCell(nextTerm.Bytes, nextTerm.Offset, nextTerm.Length, this.nextCell);
if (nextCell.Level == leafCell.Level && nextCell.IsLeaf)
{
return CollectDocs(acceptContains);
}
else
{
return null;
}
}
private SmallDocSet CollectDocs(IBits acceptContains)
{
SmallDocSet set = null;
m_docsEnum = m_termsEnum.Docs(acceptContains, m_docsEnum, DocsFlags.NONE);
int docid;
while ((docid = m_docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
{
if (set == null)
{
int size = this.m_termsEnum.DocFreq;
if (size <= 0)
{
size = 16;
}
set = new SmallDocSet(size);
}
set.Set(docid);
}
return set;
}
}//class ContainsVisitor
/// <summary>A hash based mutable set of docIds.</summary>
/// <remarks>
/// A hash based mutable set of docIds. If this were Solr code then we might
/// use a combination of HashDocSet and SortedIntDocSet instead.
/// </remarks>
private class SmallDocSet : DocIdSet, IBits
{
private readonly SentinelInt32Set intSet;
private int maxInt = 0;
public SmallDocSet(int size)
{
intSet = new SentinelInt32Set(size, -1);
}
public virtual bool Get(int index)
{
return intSet.Exists(index);
}
public virtual void Set(int index)
{
intSet.Put(index);
if (index > maxInt)
{
maxInt = index;
}
}
/// <summary>Largest docid.</summary>
public virtual int Length => maxInt;
/// <summary>
/// Number of docids.
/// NOTE: This was size() in Lucene.
/// </summary>
public virtual int Count => intSet.Count;
/// <summary>NOTE: modifies and returns either "this" or "other"</summary>
public virtual SmallDocSet Union(SmallDocSet other)
{
SmallDocSet bigger;
SmallDocSet smaller;
if (other.intSet.Count > this.intSet.Count)
{
bigger = other;
smaller = this;
}
else
{
bigger = this;
smaller = other;
}
//modify bigger
foreach (int v in smaller.intSet.Keys)
{
if (v == smaller.intSet.EmptyVal)
{
continue;
}
bigger.Set(v);
}
return bigger;
}
public override IBits Bits =>
//if the # of docids is super small, return null since iteration is going
// to be faster
Count > 4 ? this : null;
public override DocIdSetIterator GetIterator()
{
if (Count == 0)
{
return null;
}
//copy the unsorted values to a new array then sort them
int d = 0;
int[] docs = new int[intSet.Count];
foreach (int v in intSet.Keys)
{
if (v == intSet.EmptyVal)
{
continue;
}
docs[d++] = v;
}
if (Debugging.AssertsEnabled) Debugging.Assert(d == intSet.Count);
int size = d;
//sort them
Array.Sort(docs, 0, size);
return new DocIdSetIteratorAnonymousClass(size, docs);
}
#region Nested Type: DocIdSetIteratorAnonymousClass
private sealed class DocIdSetIteratorAnonymousClass : DocIdSetIterator
{
private readonly int size;
private readonly int[] docs;
public DocIdSetIteratorAnonymousClass(int size, int[] docs)
{
this.size = size;
this.docs = docs;
}
internal int idx = -1;
public override int DocID
{
get
{
if (idx >= 0 && idx < size)
{
return docs[idx];
}
else
{
return -1;
}
}
}
public override int NextDoc()
{
if (++idx < size)
{
return docs[idx];
}
return NO_MORE_DOCS;
}
public override int Advance(int target)
{
//for this small set this is likely faster vs. a binary search
// into the sorted array
return SlowAdvance(target);
}
public override long GetCost()
{
return size;
}
}
#endregion
}
}
}