blob: 9287a750ef44607465c6d9f9eb5572788ec51b5c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc;
import org.apache.lucene.search.similarities.ClassicSimilarity;
/**
* <p>
* A similarity with a lengthNorm that provides for a "plateau" of
* equally good lengths, and tf helper functions.
* </p>
* <p>
* For lengthNorm, A min/max can be specified to define the
* plateau of lengths that should all have a norm of 1.0.
* Below the min, and above the max the lengthNorm drops off in a
* sqrt function.
* </p>
* <p>
* For tf, baselineTf and hyperbolicTf functions are provided, which
* subclasses can choose between.
* </p>
*
* @see <a href="doc-files/ss.gnuplot">A Gnuplot file used to generate some of the visualizations referenced from each function.</a>
*/
public class SweetSpotSimilarity extends ClassicSimilarity {
private int ln_min = 1;
private int ln_max = 1;
private float ln_steep = 0.5f;
private float tf_base = 0.0f;
private float tf_min = 0.0f;
private float tf_hyper_min = 0.0f;
private float tf_hyper_max = 2.0f;
private double tf_hyper_base = 1.3d;
private float tf_hyper_xoffset = 10.0f;
public SweetSpotSimilarity() {
super();
}
/**
* Sets the baseline and minimum function variables for baselineTf
*
* @see #baselineTf
*/
public void setBaselineTfFactors(float base, float min) {
tf_min = min;
tf_base = base;
}
/**
* Sets the function variables for the hyperbolicTf functions
*
* @param min the minimum tf value to ever be returned (default: 0.0)
* @param max the maximum tf value to ever be returned (default: 2.0)
* @param base the base value to be used in the exponential for the hyperbolic function (default: 1.3)
* @param xoffset the midpoint of the hyperbolic function (default: 10.0)
* @see #hyperbolicTf
*/
public void setHyperbolicTfFactors(float min, float max,
double base, float xoffset) {
tf_hyper_min = min;
tf_hyper_max = max;
tf_hyper_base = base;
tf_hyper_xoffset = xoffset;
}
/**
* Sets the default function variables used by lengthNorm when no field
* specific variables have been set.
*
* @see #lengthNorm
*/
public void setLengthNormFactors(int min, int max, float steepness, boolean discountOverlaps) {
this.ln_min = min;
this.ln_max = max;
this.ln_steep = steepness;
this.discountOverlaps = discountOverlaps;
}
/**
* Implemented as:
* <code>
* 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 )
* </code>.
*
* <p>
* This degrades to <code>1/sqrt(x)</code> when min and max are both 1 and
* steepness is 0.5
* </p>
*
* <p>
* :TODO: potential optimization is to just flat out return 1.0f if numTerms
* is between min and max.
* </p>
*
* @see #setLengthNormFactors
* @see <a href="doc-files/ss.computeLengthNorm.svg">An SVG visualization of this function</a>
*/
@Override
public float lengthNorm(int numTerms) {
final int l = ln_min;
final int h = ln_max;
final float s = ln_steep;
return (float)
(1.0f /
Math.sqrt
(
(
s *
(float)(Math.abs(numTerms - l) + Math.abs(numTerms - h) - (h-l))
)
+ 1.0f
)
);
}
/**
* Delegates to baselineTf
*
* @see #baselineTf
*/
@Override
public float tf(float freq) {
return baselineTf(freq);
}
/**
* Implemented as:
* <code>
* (x &lt;= min) &#63; base : sqrt(x+(base**2)-min)
* </code>
* ...but with a special case check for 0.
* <p>
* This degrates to <code>sqrt(x)</code> when min and base are both 0
* </p>
*
* @see #setBaselineTfFactors
* @see <a href="doc-files/ss.baselineTf.svg">An SVG visualization of this function</a>
*/
public float baselineTf(float freq) {
if (0.0f == freq) return 0.0f;
return (freq <= tf_min)
? tf_base
: (float)Math.sqrt(freq + (tf_base * tf_base) - tf_min);
}
/**
* Uses a hyperbolic tangent function that allows for a hard max...
*
* <code>
* tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1)
* </code>
*
* <p>
* This code is provided as a convenience for subclasses that want
* to use a hyperbolic tf function.
* </p>
*
* @see #setHyperbolicTfFactors
* @see <a href="doc-files/ss.hyperbolicTf.svg">An SVG visualization of this function</a>
*/
public float hyperbolicTf(float freq) {
if (0.0f == freq) return 0.0f;
final float min = tf_hyper_min;
final float max = tf_hyper_max;
final double base = tf_hyper_base;
final float xoffset = tf_hyper_xoffset;
final double x = (double)(freq - xoffset);
final float result = min +
(float)(
(max-min) / 2.0f
*
(
( ( Math.pow(base,x) - Math.pow(base,-x) )
/ ( Math.pow(base,x) + Math.pow(base,-x) )
)
+ 1.0d
)
);
return Float.isNaN(result) ? max : result;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("SweetSpotSimilarity")
.append('(').append("ln_min=").append(ln_min).append(", ")
.append("ln_max=").append(ln_max).append(", ")
.append("ln_steep=").append(ln_steep).append(", ")
.append("tf_base=").append(tf_base).append(", ")
.append("tf_min=").append(tf_min).append(", ")
.append("tf_hyper_min=").append(tf_hyper_min).append(", ")
.append("tf_hyper_max=").append(tf_hyper_max).append(", ")
.append("tf_hyper_base=").append(tf_hyper_base).append(", ")
.append("tf_hyper_xoffset=").append(tf_hyper_xoffset)
.append(")");
return sb.toString();
}
}