blob: 67a3671d6638e46ba25893e2f021b061fdee87a4 [file] [log] [blame]
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
using System;
using System.Diagnostics;
using System.Reflection;
namespace Lucene.Net.Analysis
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// <b>Expert:</b> this class provides a <see cref="TokenStream"/>
/// for indexing numeric values that can be used by <see cref="Search.NumericRangeQuery"/>
/// or <see cref="Search.NumericRangeFilter"/>.
///
/// <para/>Note that for simple usage, <see cref="Documents.Int32Field"/>, <see cref="Documents.Int64Field"/>,
/// <see cref="Documents.SingleField"/> or <see cref="Documents.DoubleField"/> is
/// recommended. These fields disable norms and
/// term freqs, as they are not usually needed during
/// searching. If you need to change these settings, you
/// should use this class.
///
/// <para/>Here's an example usage, for an <see cref="int"/> field:
///
/// <code>
/// IndexableFieldType fieldType = new IndexableFieldType(TextField.TYPE_NOT_STORED)
/// {
/// OmitNorms = true,
/// IndexOptions = IndexOptions.DOCS_ONLY
/// };
/// Field field = new Field(name, new NumericTokenStream(precisionStep).SetInt32Value(value), fieldType);
/// document.Add(field);
/// </code>
///
/// <para/>For optimal performance, re-use the <see cref="TokenStream"/> and <see cref="Documents.Field"/> instance
/// for more than one document:
///
/// <code>
/// NumericTokenStream stream = new NumericTokenStream(precisionStep);
/// IndexableFieldType fieldType = new IndexableFieldType(TextField.TYPE_NOT_STORED)
/// {
/// OmitNorms = true,
/// IndexOptions = IndexOptions.DOCS_ONLY
/// };
/// Field field = new Field(name, stream, fieldType);
/// Document document = new Document();
/// document.Add(field);
///
/// for(all documents)
/// {
/// stream.SetInt32Value(value)
/// writer.AddDocument(document);
/// }
/// </code>
///
/// <para>this stream is not intended to be used in analyzers;
/// it's more for iterating the different precisions during
/// indexing a specific numeric value.</para>
///
/// <para><b>NOTE</b>: as token streams are only consumed once
/// the document is added to the index, if you index more
/// than one numeric field, use a separate <see cref="NumericTokenStream"/>
/// instance for each.</para>
///
/// <para>See <see cref="Search.NumericRangeQuery"/> for more details on the
/// <c>precisionStep</c> parameter as well as how numeric fields work under the hood.
/// </para>
///
/// @since 2.9
/// </summary>
public sealed class NumericTokenStream : TokenStream
{
private void InitializeInstanceFields()
{
numericAtt = AddAttribute<INumericTermAttribute>();
typeAtt = AddAttribute<ITypeAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
}
/// <summary>
/// The full precision token gets this token type assigned. </summary>
public const string TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
/// <summary>
/// The lower precision tokens gets this token type assigned. </summary>
public const string TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
/// <summary>
/// <b>Expert:</b> Use this attribute to get the details of the currently generated token.
/// @lucene.experimental
/// @since 4.0
/// </summary>
public interface INumericTermAttribute : IAttribute
{
/// <summary>
/// Returns current shift value, undefined before first token </summary>
int Shift { get; set; }
/// <summary>
/// Returns current token's raw value as <see cref="long"/> with all <see cref="Shift"/> applied, undefined before first token </summary>
long RawValue { get; }
/// <summary>
/// Returns value size in bits (32 for <see cref="float"/>, <see cref="int"/>; 64 for <see cref="double"/>, <see cref="long"/>) </summary>
int ValueSize { get; }
/// <summary>
/// <em>Don't call this method!</em>
/// @lucene.internal
/// </summary>
void Init(long value, int valSize, int precisionStep, int shift);
/// <summary>
/// <em>Don't call this method!</em>
/// @lucene.internal
/// </summary>
int IncShift();
}
// just a wrapper to prevent adding CTA
private sealed class NumericAttributeFactory : AttributeSource.AttributeFactory
{
private readonly AttributeSource.AttributeFactory @delegate;
internal NumericAttributeFactory(AttributeSource.AttributeFactory @delegate)
{
this.@delegate = @delegate;
}
public override Util.Attribute CreateAttributeInstance<T>()
{
var attClass = typeof(T);
if (typeof(ICharTermAttribute).IsAssignableFrom(attClass))
{
throw new ArgumentException("NumericTokenStream does not support CharTermAttribute.");
}
return @delegate.CreateAttributeInstance<T>();
}
}
/// <summary>
/// Implementation of <see cref="INumericTermAttribute"/>.
/// @lucene.internal
/// @since 4.0
/// </summary>
public sealed class NumericTermAttribute : Util.Attribute, INumericTermAttribute, ITermToBytesRefAttribute
{
private long _value = 0L;
private int _precisionStep = 0;
private readonly BytesRef _bytes = new BytesRef();
/// <summary>
/// Creates, but does not yet initialize this attribute instance
/// </summary>
/// <seealso cref="Init(long, int, int, int)"/>
public NumericTermAttribute()
{
ValueSize = 0;
}
public BytesRef BytesRef => _bytes;
public void FillBytesRef()
{
if (Debugging.AssertsEnabled) Debugging.Assert(ValueSize == 64 || ValueSize == 32);
if (ValueSize == 64)
{
NumericUtils.Int64ToPrefixCoded(_value, Shift, _bytes);
}
else
{
NumericUtils.Int32ToPrefixCoded((int)_value, Shift, _bytes);
}
}
public int Shift { get; set; }
public int IncShift()
{
return (Shift += _precisionStep);
}
public long RawValue => _value & ~((1L << Shift) - 1L);
public int ValueSize { get; private set; }
public void Init(long value, int valueSize, int precisionStep, int shift)
{
this._value = value;
this.ValueSize = valueSize;
this._precisionStep = precisionStep;
this.Shift = shift;
}
public override void Clear()
{
// this attribute has no contents to clear!
// we keep it untouched as it's fully controlled by outer class.
}
public override void ReflectWith(IAttributeReflector reflector)
{
FillBytesRef();
reflector.Reflect(typeof(ITermToBytesRefAttribute), "bytes", BytesRef.DeepCopyOf(_bytes));
reflector.Reflect(typeof(INumericTermAttribute), "shift", Shift);
reflector.Reflect(typeof(INumericTermAttribute), "rawValue", RawValue);
reflector.Reflect(typeof(INumericTermAttribute), "valueSize", ValueSize);
}
public override void CopyTo(Util.IAttribute target)
{
var a = (NumericTermAttribute)target;
a.Init(_value, ValueSize, _precisionStep, Shift);
}
}
/// <summary>
/// Creates a token stream for numeric values using the default <seealso cref="precisionStep"/>
/// <see cref="NumericUtils.PRECISION_STEP_DEFAULT"/> (4). The stream is not yet initialized,
/// before using set a value using the various Set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream()
: this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, NumericUtils.PRECISION_STEP_DEFAULT)
{
InitializeInstanceFields();
}
/// <summary>
/// Creates a token stream for numeric values with the specified
/// <paramref name="precisionStep"/>. The stream is not yet initialized,
/// before using set a value using the various Set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream(int precisionStep)
: this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep)
{
InitializeInstanceFields();
}
/// <summary>
/// Expert: Creates a token stream for numeric values with the specified
/// <paramref name="precisionStep"/> using the given
/// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/>.
/// The stream is not yet initialized,
/// before using set a value using the various Set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream(AttributeSource.AttributeFactory factory, int precisionStep)
: base(new NumericAttributeFactory(factory))
{
InitializeInstanceFields();
if (precisionStep < 1)
{
throw new ArgumentException("precisionStep must be >=1");
}
this.precisionStep = precisionStep;
numericAtt.Shift = -precisionStep;
}
/// <summary>
/// Initializes the token stream with the supplied <see cref="long"/> value.
/// <para/>
/// NOTE: This was setLongValue() in Lucene
/// </summary>
/// <param name="value"> the value, for which this <see cref="TokenStream"/> should enumerate tokens. </param>
/// <returns> this instance, because of this you can use it the following way:
/// <code>new Field(name, new NumericTokenStream(precisionStep).SetInt64Value(value))</code> </returns>
public NumericTokenStream SetInt64Value(long value)
{
numericAtt.Init(value, valSize = 64, precisionStep, -precisionStep);
return this;
}
/// <summary>
/// Initializes the token stream with the supplied <see cref="int"/> value.
/// <para/>
/// NOTE: This was setIntValue() in Lucene
/// </summary>
/// <param name="value"> the value, for which this <see cref="TokenStream"/> should enumerate tokens. </param>
/// <returns> this instance, because of this you can use it the following way:
/// <code>new Field(name, new NumericTokenStream(precisionStep).SetInt32Value(value))</code> </returns>
public NumericTokenStream SetInt32Value(int value)
{
numericAtt.Init(value, valSize = 32, precisionStep, -precisionStep);
return this;
}
/// <summary>
/// Initializes the token stream with the supplied <see cref="double"/> value. </summary>
/// <param name="value"> the value, for which this <see cref="TokenStream"/> should enumerate tokens. </param>
/// <returns> this instance, because of this you can use it the following way:
/// <code>new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))</code> </returns>
public NumericTokenStream SetDoubleValue(double value)
{
numericAtt.Init(NumericUtils.DoubleToSortableInt64(value), valSize = 64, precisionStep, -precisionStep);
return this;
}
/// <summary>
/// Initializes the token stream with the supplied <see cref="float"/> value.
/// <para/>
/// NOTE: This was setFloatValue() in Lucene
/// </summary>
/// <param name="value"> the value, for which this <see cref="TokenStream"/> should enumerate tokens. </param>
/// <returns> this instance, because of this you can use it the following way:
/// <code>new Field(name, new NumericTokenStream(precisionStep).SetSingleValue(value))</code> </returns>
public NumericTokenStream SetSingleValue(float value)
{
numericAtt.Init(NumericUtils.SingleToSortableInt32(value), valSize = 32, precisionStep, -precisionStep);
return this;
}
public override void Reset()
{
if (valSize == 0)
{
throw new Exception("call Set???Value() before usage");
}
numericAtt.Shift = -precisionStep;
}
public override bool IncrementToken()
{
if (valSize == 0)
{
throw new Exception("call Set???Value() before usage");
}
// this will only clear all other attributes in this TokenStream
ClearAttributes();
int shift = numericAtt.IncShift();
typeAtt.Type = (shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC;
posIncrAtt.PositionIncrement = (shift == 0) ? 1 : 0;
return (shift < valSize);
}
/// <summary>
/// Returns the precision step. </summary>
public int PrecisionStep => precisionStep;
// members
private INumericTermAttribute numericAtt;
private ITypeAttribute typeAtt;
private IPositionIncrementAttribute posIncrAtt;
private int valSize = 0; // valSize==0 means not initialized
private readonly int precisionStep;
}
}