blob: 5bee73ac9332b7c5874c7080e2d0b02c65d9a00e [file] [log] [blame]
using System.Collections.Concurrent;
using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Spatial.Prefix.Tree;
using Lucene.Net.Queries.Function;
using Lucene.Net.Spatial.Queries;
using Lucene.Net.Spatial.Util;
using Spatial4n.Core.Shapes;
namespace Lucene.Net.Spatial.Prefix
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// An abstract SpatialStrategy based on <see cref="Lucene.Net.Spatial.Prefix.Tree.SpatialPrefixTree"/>. The two
/// subclasses are <see cref="RecursivePrefixTreeStrategy">RecursivePrefixTreeStrategy</see> and
/// <see cref="TermQueryPrefixTreeStrategy">TermQueryPrefixTreeStrategy</see>. This strategy is most effective as a fast
/// approximate spatial search filter.
///
/// <h4>Characteristics:</h4>
/// <list type="bullet">
/// <item><description>Can index any shape; however only
/// <see cref="RecursivePrefixTreeStrategy">RecursivePrefixTreeStrategy</see>
/// can effectively search non-point shapes.</description></item>
/// <item><description>Can index a variable number of shapes per field value. This strategy
/// can do it via multiple calls to <see cref="CreateIndexableFields(IShape)"/>
/// for a document or by giving it some sort of Shape aggregate (e.g. NTS
/// WKT MultiPoint). The shape's boundary is approximated to a grid precision.
/// </description></item>
/// <item><description>Can query with any shape. The shape's boundary is approximated to a grid
/// precision.</description></item>
/// <item><description>Only <see cref="SpatialOperation.Intersects"/>
/// is supported. If only points are indexed then this is effectively equivalent
/// to IsWithin.</description></item>
/// <item><description>The strategy supports <see cref="MakeDistanceValueSource(IPoint, double)"/>
/// even for multi-valued data, so long as the indexed data is all points; the
/// behavior is undefined otherwise. However, <c>it will likely be removed in
/// the future</c> in lieu of using another strategy with a more scalable
/// implementation. Use of this call is the only
/// circumstance in which a cache is used. The cache is simple but as such
/// it doesn't scale to large numbers of points nor is it real-time-search
/// friendly.</description></item>
/// </list>
///
/// <h4>Implementation:</h4>
/// The <see cref="Lucene.Net.Spatial.Prefix.Tree.SpatialPrefixTree"/>
/// does most of the work, for example returning
/// a list of terms representing grids of various sizes for a supplied shape.
/// An important
/// configuration item is <see cref="DistErrPct"/> which balances
/// shape precision against scalability. See those docs.
///
/// @lucene.internal
/// </summary>
public abstract class PrefixTreeStrategy : SpatialStrategy
{
protected readonly SpatialPrefixTree m_grid;
private readonly ConcurrentDictionary<string, PointPrefixTreeFieldCacheProvider> provider =
new ConcurrentDictionary<string, PointPrefixTreeFieldCacheProvider>();
protected readonly bool m_simplifyIndexedCells;
protected int m_defaultFieldValuesArrayLen = 2;
protected double m_distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
protected PrefixTreeStrategy(SpatialPrefixTree grid, string fieldName, bool simplifyIndexedCells) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected)
: base(grid.SpatialContext, fieldName)
{
this.m_grid = grid;
this.m_simplifyIndexedCells = simplifyIndexedCells;
}
/// <summary>
/// A memory hint used by <see cref="SpatialStrategy.MakeDistanceValueSource(IPoint)"/>
/// for how big the initial size of each Document's array should be. The
/// default is 2. Set this to slightly more than the default expected number
/// of points per document.
/// </summary>
public virtual int DefaultFieldValuesArrayLen
{
get => m_defaultFieldValuesArrayLen; // LUCENENET NOTE: Added getter per MSDN guidelines
set => m_defaultFieldValuesArrayLen = value;
}
/// <summary>
/// The default measure of shape precision affecting shapes at index and query
/// times.
/// </summary>
/// <remarks>
/// The default measure of shape precision affecting shapes at index and query
/// times. Points don't use this as they are always indexed at the configured
/// maximum precision (<see cref="Lucene.Net.Spatial.Prefix.Tree.SpatialPrefixTree.MaxLevels"/>);
/// this applies to all other shapes. Specific shapes at index and query time
/// can use something different than this default value. If you don't set a
/// default then the default is <see cref="SpatialArgs.DEFAULT_DISTERRPCT"/> --
/// 2.5%.
/// </remarks>
/// <seealso cref="Lucene.Net.Spatial.Queries.SpatialArgs.DistErrPct"/>
public virtual double DistErrPct
{
get => m_distErrPct;
set => m_distErrPct = value;
}
public override Field[] CreateIndexableFields(IShape shape)
{
double distErr = SpatialArgs.CalcDistanceFromErrPct(shape, m_distErrPct, m_ctx);
return CreateIndexableFields(shape, distErr);
}
public virtual Field[] CreateIndexableFields(IShape shape, double distErr)
{
int detailLevel = m_grid.GetLevelForDistance(distErr);
IList<Cell> cells = m_grid.GetCells(shape, detailLevel, true, m_simplifyIndexedCells);//intermediates cells
//TODO is CellTokenStream supposed to be re-used somehow? see Uwe's comments:
// http://code.google.com/p/lucene-spatial-playground/issues/detail?id=4
Field field = new Field(FieldName, new CellTokenStream(cells.GetEnumerator()), FIELD_TYPE);
return new Field[] { field };
}
/// <summary>
/// Indexed, tokenized, not stored.
/// </summary>
public static readonly FieldType FIELD_TYPE = LoadFieldType();
private static FieldType LoadFieldType() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
{
var fieldType = new FieldType
{
IsIndexed = true,
IsTokenized = true,
OmitNorms = true,
IndexOptions = IndexOptions.DOCS_ONLY
};
fieldType.Freeze();
return fieldType;
}
/// <summary>Outputs the tokenString of a cell, and if its a leaf, outputs it again with the leaf byte.</summary>
internal sealed class CellTokenStream : TokenStream
{
private readonly ICharTermAttribute termAtt;
private IEnumerator<Cell> iter = null;
public CellTokenStream(IEnumerator<Cell> tokens)
{
this.iter = tokens;
termAtt = AddAttribute<ICharTermAttribute>();
}
internal string nextTokenStringNeedingLeaf = null;
public override bool IncrementToken()
{
ClearAttributes();
if (nextTokenStringNeedingLeaf != null)
{
termAtt.Append(nextTokenStringNeedingLeaf);
termAtt.Append((char)Cell.LEAF_BYTE);
nextTokenStringNeedingLeaf = null;
return true;
}
if (iter.MoveNext())
{
Cell cell = iter.Current;
string token = cell.TokenString;
termAtt.Append(token);
if (cell.IsLeaf)
{
nextTokenStringNeedingLeaf = token;
}
return true;
}
return false;
}
/// <summary>
/// Releases resources used by the <see cref="CellTokenStream"/> and
/// if overridden in a derived class, optionally releases unmanaged resources.
/// </summary>
/// <param name="disposing"><c>true</c> to release both managed and unmanaged resources;
/// <c>false</c> to release only unmanaged resources.</param>
// LUCENENET specific
protected override void Dispose(bool disposing)
{
try
{
if (disposing)
{
iter?.Dispose(); // LUCENENET specific - dispose iter and set to null
iter = null;
}
}
finally
{
base.Dispose(disposing);
}
}
}
public override ValueSource MakeDistanceValueSource(IPoint queryPoint, double multiplier)
{
var p = provider.GetOrAdd(FieldName, f => new PointPrefixTreeFieldCacheProvider(m_grid, FieldName, m_defaultFieldValuesArrayLen));
return new ShapeFieldCacheDistanceValueSource(m_ctx, p, queryPoint, multiplier);
}
public virtual SpatialPrefixTree Grid => m_grid;
}
}