| using Lucene.Net.Diagnostics; |
| using Lucene.Net.Index; |
| using Lucene.Net.Search; |
| using Lucene.Net.Spatial.Prefix.Tree; |
| using Lucene.Net.Util; |
| using Spatial4n.Core.Shapes; |
| using System; |
| using System.Collections.Generic; |
| using System.Diagnostics; |
| using System.IO; |
| |
| namespace Lucene.Net.Spatial.Prefix |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// Finds docs where its indexed shape <see cref="Queries.SpatialOperation.Contains"/> |
| /// the query shape. For use on <see cref="RecursivePrefixTreeStrategy"/>. |
| /// |
| /// @lucene.experimental |
| /// </summary> |
| public class ContainsPrefixTreeFilter : AbstractPrefixTreeFilter |
| { |
| // Future optimizations: |
| // Instead of seekExact, use seekCeil with some leap-frogging, like Intersects does. |
| |
| /// <summary> |
| /// If the spatial data for a document is comprised of multiple overlapping or adjacent parts, |
| /// it might fail to match a query shape when doing the CONTAINS predicate when the sum of |
| /// those shapes contain the query shape but none do individually. Set this to false to |
| /// increase performance if you don't care about that circumstance (such as if your indexed |
| /// data doesn't even have such conditions). See LUCENE-5062. |
| /// </summary> |
| protected readonly bool m_multiOverlappingIndexedShapes; |
| |
| public ContainsPrefixTreeFilter(IShape queryShape, string fieldName, SpatialPrefixTree grid, int detailLevel, bool multiOverlappingIndexedShapes) |
| : base(queryShape, fieldName, grid, detailLevel) |
| { |
| this.m_multiOverlappingIndexedShapes = multiOverlappingIndexedShapes; |
| } |
| |
| public override bool Equals(object o) |
| { |
| if (!base.Equals(o)) |
| return false; |
| return m_multiOverlappingIndexedShapes == ((ContainsPrefixTreeFilter)o).m_multiOverlappingIndexedShapes; |
| } |
| |
| public override int GetHashCode() |
| { |
| return base.GetHashCode() + (m_multiOverlappingIndexedShapes ? 1 : 0); |
| } |
| |
| public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) |
| { |
| return new ContainsVisitor(this, context, acceptDocs).Visit(m_grid.WorldCell, acceptDocs); |
| } |
| |
| private class ContainsVisitor : BaseTermsEnumTraverser |
| { |
| public ContainsVisitor(ContainsPrefixTreeFilter outerInstance, AtomicReaderContext context, IBits acceptDocs) |
| : base(outerInstance, context, acceptDocs) |
| { |
| } |
| |
| internal BytesRef termBytes = new BytesRef(); |
| internal Cell nextCell;//see getLeafDocs |
| |
| /// <remarks>This is the primary algorithm; recursive. Returns null if finds none.</remarks> |
| /// <exception cref="IOException"></exception> |
| internal SmallDocSet Visit(Cell cell, IBits acceptContains) |
| { |
| if (m_termsEnum == null) |
| { |
| //signals all done |
| return null; |
| } |
| |
| ContainsPrefixTreeFilter outerInstance = (ContainsPrefixTreeFilter)base.m_outerInstance; |
| |
| //Leaf docs match all query shape |
| SmallDocSet leafDocs = GetLeafDocs(cell, acceptContains); |
| // Get the AND of all child results (into combinedSubResults) |
| SmallDocSet combinedSubResults = null; |
| // Optimization: use null subCellsFilter when we know cell is within the query shape. |
| IShape subCellsFilter = outerInstance.m_queryShape; |
| if (cell.Level != 0 && ((cell.ShapeRel == SpatialRelation.NOT_SET || cell.ShapeRel == SpatialRelation.WITHIN))) |
| { |
| subCellsFilter = null; |
| if (Debugging.AssertsEnabled) Debugging.Assert(cell.Shape.Relate(outerInstance.m_queryShape) == SpatialRelation.WITHIN); |
| } |
| ICollection<Cell> subCells = cell.GetSubCells(subCellsFilter); |
| foreach (Cell subCell in subCells) |
| { |
| if (!SeekExact(subCell)) |
| { |
| combinedSubResults = null; |
| } |
| else if (subCell.Level == outerInstance.m_detailLevel) |
| { |
| combinedSubResults = GetDocs(subCell, acceptContains); |
| } |
| else if (!outerInstance.m_multiOverlappingIndexedShapes && |
| subCell.ShapeRel == SpatialRelation.WITHIN) |
| { |
| combinedSubResults = GetLeafDocs(subCell, acceptContains); //recursion |
| } |
| else |
| { |
| combinedSubResults = Visit(subCell, acceptContains); |
| } |
| |
| if (combinedSubResults == null) |
| { |
| break; |
| } |
| |
| acceptContains = combinedSubResults;//has the 'AND' effect on next iteration |
| } |
| |
| // Result: OR the leaf docs with AND of all child results |
| if (combinedSubResults != null) |
| { |
| if (leafDocs == null) |
| { |
| return combinedSubResults; |
| } |
| return leafDocs.Union(combinedSubResults);//union is 'or' |
| } |
| return leafDocs; |
| } |
| |
| private bool SeekExact(Cell cell) |
| { |
| if (Debugging.AssertsEnabled) Debugging.Assert(new BytesRef(cell.GetTokenBytes()).CompareTo(termBytes) > 0); |
| this.termBytes.Bytes = cell.GetTokenBytes(); |
| this.termBytes.Length = this.termBytes.Bytes.Length; |
| if (m_termsEnum == null) |
| return false; |
| return this.m_termsEnum.SeekExact(termBytes); |
| } |
| |
| private SmallDocSet GetDocs(Cell cell, IBits acceptContains) |
| { |
| if (Debugging.AssertsEnabled) Debugging.Assert(new BytesRef(cell.GetTokenBytes()).Equals(termBytes)); |
| return this.CollectDocs(acceptContains); |
| } |
| |
| private Cell lastLeaf = null;//just for assertion |
| |
| private SmallDocSet GetLeafDocs(Cell leafCell, IBits acceptContains) |
| { |
| if (Debugging.AssertsEnabled) |
| { |
| Debugging.Assert(new BytesRef(leafCell.GetTokenBytes()).Equals(termBytes)); |
| Debugging.Assert(!leafCell.Equals(lastLeaf));//don't call for same leaf again |
| } |
| lastLeaf = leafCell; |
| |
| if (m_termsEnum == null) |
| return null; |
| if (!m_termsEnum.MoveNext()) |
| { |
| m_termsEnum = null;//signals all done |
| return null; |
| } |
| BytesRef nextTerm = m_termsEnum.Term; |
| nextCell = m_outerInstance.m_grid.GetCell(nextTerm.Bytes, nextTerm.Offset, nextTerm.Length, this.nextCell); |
| if (nextCell.Level == leafCell.Level && nextCell.IsLeaf) |
| { |
| return CollectDocs(acceptContains); |
| } |
| else |
| { |
| return null; |
| } |
| } |
| |
| private SmallDocSet CollectDocs(IBits acceptContains) |
| { |
| SmallDocSet set = null; |
| |
| m_docsEnum = m_termsEnum.Docs(acceptContains, m_docsEnum, DocsFlags.NONE); |
| int docid; |
| while ((docid = m_docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) |
| { |
| if (set == null) |
| { |
| int size = this.m_termsEnum.DocFreq; |
| if (size <= 0) |
| { |
| size = 16; |
| } |
| set = new SmallDocSet(size); |
| } |
| set.Set(docid); |
| } |
| return set; |
| } |
| }//class ContainsVisitor |
| |
| /// <summary>A hash based mutable set of docIds.</summary> |
| /// <remarks> |
| /// A hash based mutable set of docIds. If this were Solr code then we might |
| /// use a combination of HashDocSet and SortedIntDocSet instead. |
| /// </remarks> |
| private class SmallDocSet : DocIdSet, IBits |
| { |
| private readonly SentinelInt32Set intSet; |
| private int maxInt = 0; |
| |
| public SmallDocSet(int size) |
| { |
| intSet = new SentinelInt32Set(size, -1); |
| } |
| |
| public virtual bool Get(int index) |
| { |
| return intSet.Exists(index); |
| } |
| |
| public virtual void Set(int index) |
| { |
| intSet.Put(index); |
| if (index > maxInt) |
| { |
| maxInt = index; |
| } |
| } |
| |
| /// <summary>Largest docid.</summary> |
| public virtual int Length => maxInt; |
| |
| /// <summary> |
| /// Number of docids. |
| /// NOTE: This was size() in Lucene. |
| /// </summary> |
| public virtual int Count => intSet.Count; |
| |
| /// <summary>NOTE: modifies and returns either "this" or "other"</summary> |
| public virtual SmallDocSet Union(SmallDocSet other) |
| { |
| SmallDocSet bigger; |
| SmallDocSet smaller; |
| if (other.intSet.Count > this.intSet.Count) |
| { |
| bigger = other; |
| smaller = this; |
| } |
| else |
| { |
| bigger = this; |
| smaller = other; |
| } |
| //modify bigger |
| foreach (int v in smaller.intSet.Keys) |
| { |
| if (v == smaller.intSet.EmptyVal) |
| { |
| continue; |
| } |
| bigger.Set(v); |
| } |
| return bigger; |
| } |
| |
| public override IBits Bits => |
| //if the # of docids is super small, return null since iteration is going |
| // to be faster |
| Count > 4 ? this : null; |
| |
| public override DocIdSetIterator GetIterator() |
| { |
| if (Count == 0) |
| { |
| return null; |
| } |
| //copy the unsorted values to a new array then sort them |
| int d = 0; |
| int[] docs = new int[intSet.Count]; |
| foreach (int v in intSet.Keys) |
| { |
| if (v == intSet.EmptyVal) |
| { |
| continue; |
| } |
| docs[d++] = v; |
| } |
| if (Debugging.AssertsEnabled) Debugging.Assert(d == intSet.Count); |
| int size = d; |
| //sort them |
| Array.Sort(docs, 0, size); |
| return new DocIdSetIteratorAnonymousClass(size, docs); |
| } |
| |
| #region Nested Type: DocIdSetIteratorAnonymousClass |
| |
| private sealed class DocIdSetIteratorAnonymousClass : DocIdSetIterator |
| { |
| private readonly int size; |
| private readonly int[] docs; |
| |
| public DocIdSetIteratorAnonymousClass(int size, int[] docs) |
| { |
| this.size = size; |
| this.docs = docs; |
| } |
| |
| internal int idx = -1; |
| |
| public override int DocID |
| { |
| get |
| { |
| if (idx >= 0 && idx < size) |
| { |
| return docs[idx]; |
| } |
| else |
| { |
| return -1; |
| } |
| } |
| } |
| |
| public override int NextDoc() |
| { |
| if (++idx < size) |
| { |
| return docs[idx]; |
| } |
| return NO_MORE_DOCS; |
| } |
| |
| public override int Advance(int target) |
| { |
| //for this small set this is likely faster vs. a binary search |
| // into the sorted array |
| return SlowAdvance(target); |
| } |
| |
| public override long GetCost() |
| { |
| return size; |
| } |
| } |
| |
| #endregion |
| } |
| } |
| } |