| using Lucene.Net.Index; |
| using Lucene.Net.Search.Similarities; |
| using System; |
| |
| namespace Lucene.Net.Misc |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// <para> |
| /// A similarity with a lengthNorm that provides for a "plateau" of |
| /// equally good lengths, and tf helper functions. |
| /// </para> |
| /// <para> |
| /// For lengthNorm, A min/max can be specified to define the |
| /// plateau of lengths that should all have a norm of 1.0. |
| /// Below the min, and above the max the lengthNorm drops off in a |
| /// sqrt function. |
| /// </para> |
| /// <para> |
| /// For tf, baselineTf and hyperbolicTf functions are provided, which |
| /// subclasses can choose between. |
| /// </para> |
| /// </summary> |
| /// <a href="doc-files/ss.gnuplot">A Gnuplot file used to generate some of the visualizations refrenced from each function.</a> |
| public class SweetSpotSimilarity : DefaultSimilarity |
| { |
| private int ln_min = 1; |
| private int ln_max = 1; |
| private float ln_steep = 0.5f; |
| |
| private float tf_base = 0.0f; |
| private float tf_min = 0.0f; |
| |
| private float tf_hyper_min = 0.0f; |
| private float tf_hyper_max = 2.0f; |
| private double tf_hyper_base = 1.3d; |
| private float tf_hyper_xoffset = 10.0f; |
| |
| public SweetSpotSimilarity() |
| : base() |
| { |
| } |
| |
| /// <summary> |
| /// Sets the baseline and minimum function variables for baselineTf |
| /// </summary> |
| /// <seealso cref="BaselineTf(float)"/> |
| public virtual void SetBaselineTfFactors(float @base, float min) |
| { |
| tf_min = min; |
| tf_base = @base; |
| } |
| |
| /// <summary> |
| /// Sets the function variables for the hyperbolicTf functions |
| /// </summary> |
| /// <param name="min"> the minimum tf value to ever be returned (default: 0.0) </param> |
| /// <param name="max"> the maximum tf value to ever be returned (default: 2.0) </param> |
| /// <param name="base"> the base value to be used in the exponential for the hyperbolic function (default: 1.3) </param> |
| /// <param name="xoffset"> the midpoint of the hyperbolic function (default: 10.0) </param> |
| /// <seealso cref="HyperbolicTf(float)"/> |
| public virtual void SetHyperbolicTfFactors(float min, float max, double @base, float xoffset) |
| { |
| tf_hyper_min = min; |
| tf_hyper_max = max; |
| tf_hyper_base = @base; |
| tf_hyper_xoffset = xoffset; |
| } |
| |
| /// <summary> |
| /// Sets the default function variables used by lengthNorm when no field |
| /// specific variables have been set. |
| /// </summary> |
| /// <seealso cref="ComputeLengthNorm(int)"/> |
| public virtual void SetLengthNormFactors(int min, int max, float steepness, bool discountOverlaps) |
| { |
| this.ln_min = min; |
| this.ln_max = max; |
| this.ln_steep = steepness; |
| this.DiscountOverlaps = discountOverlaps; |
| } |
| |
| /// <summary> |
| /// Implemented as |
| /// <c> |
| /// state.Boost * ComputeLengthNorm(numTokens) |
| /// </c> |
| /// where numTokens does not count overlap tokens if |
| /// discountOverlaps is true by default or true for this |
| /// specific field. |
| /// </summary> |
| public override float LengthNorm(FieldInvertState state) |
| { |
| int numTokens; |
| |
| if (DiscountOverlaps) |
| { |
| numTokens = state.Length - state.NumOverlap; |
| } |
| else |
| { |
| numTokens = state.Length; |
| } |
| |
| return state.Boost * ComputeLengthNorm(numTokens); |
| } |
| |
| /// <summary> |
| /// Implemented as: |
| /// <code> |
| /// 1/sqrt( steepness * (Math.Abs(x-min) + Math.Abs(x-max) - (max-min)) + 1 ) |
| /// </code>. |
| /// |
| /// <para> |
| /// This degrades to <code>1/Math.Sqrt(x)</code> when min and max are both 1 and |
| /// steepness is 0.5 |
| /// </para> |
| /// |
| /// <para> |
| /// :TODO: potential optimization is to just flat out return 1.0f if numTerms |
| /// is between min and max. |
| /// </para> |
| /// </summary> |
| /// <seealso cref="SetLengthNormFactors(int, int, float, bool)"/> |
| /// <a href="doc-files/ss.computeLengthNorm.svg">An SVG visualization of this function</a> |
| public virtual float ComputeLengthNorm(int numTerms) |
| { |
| int l = ln_min; |
| int h = ln_max; |
| float s = ln_steep; |
| |
| return (float)(1.0f / Math.Sqrt((s * (float)(Math.Abs(numTerms - l) + Math.Abs(numTerms - h) - (h - l))) + 1.0f)); |
| } |
| |
| /// <summary> |
| /// Delegates to baselineTf |
| /// </summary> |
| /// <seealso cref="BaselineTf(float)"/> |
| public override float Tf(float freq) |
| { |
| return BaselineTf(freq); |
| } |
| |
| /// <summary> |
| /// Implemented as: |
| /// <code> |
| /// (x <= min) ? base : Math.Sqrt(x+(base**2)-min) |
| /// </code> |
| /// ...but with a special case check for 0. |
| /// <para> |
| /// This degrates to <code>Math.Sqrt(x)</code> when min and base are both 0 |
| /// </para> |
| /// </summary> |
| /// <seealso cref="SetBaselineTfFactors(float, float)"/> |
| /// <a href="doc-files/ss.baselineTf.svg">An SVG visualization of this function</a> |
| public virtual float BaselineTf(float freq) |
| { |
| if (0.0f == freq) |
| { |
| return 0.0f; |
| } |
| |
| return (freq <= tf_min) ? tf_base : (float)Math.Sqrt(freq + (tf_base * tf_base) - tf_min); |
| } |
| |
| /// <summary> |
| /// Uses a hyperbolic tangent function that allows for a hard max... |
| /// |
| /// <code> |
| /// tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1) |
| /// </code> |
| /// |
| /// <para> |
| /// This code is provided as a convenience for subclasses that want |
| /// to use a hyperbolic tf function. |
| /// </para> |
| /// </summary> |
| /// <seealso cref="SetHyperbolicTfFactors(float, float, double, float)"/> |
| /// <a href="doc-files/ss.hyperbolicTf.svg">An SVG visualization of this function</a> |
| public virtual float HyperbolicTf(float freq) |
| { |
| if (0.0f == freq) |
| { |
| return 0.0f; |
| } |
| |
| float min = tf_hyper_min; |
| float max = tf_hyper_max; |
| double @base = tf_hyper_base; |
| float xoffset = tf_hyper_xoffset; |
| double x = (double)(freq - xoffset); |
| |
| float result = min + (float)((max - min) / 2.0f * (((Math.Pow(@base, x) - Math.Pow(@base, -x)) / (Math.Pow(@base, x) + Math.Pow(@base, -x))) + 1.0d)); |
| |
| return float.IsNaN(result) ? max : result; |
| } |
| } |
| } |