| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.misc; |
| |
| import org.apache.lucene.search.similarities.ClassicSimilarity; |
| |
| /** |
| * <p> |
| * A similarity with a lengthNorm that provides for a "plateau" of |
| * equally good lengths, and tf helper functions. |
| * </p> |
| * <p> |
| * For lengthNorm, A min/max can be specified to define the |
| * plateau of lengths that should all have a norm of 1.0. |
| * Below the min, and above the max the lengthNorm drops off in a |
| * sqrt function. |
| * </p> |
| * <p> |
| * For tf, baselineTf and hyperbolicTf functions are provided, which |
| * subclasses can choose between. |
| * </p> |
| * |
| * @see <a href="doc-files/ss.gnuplot">A Gnuplot file used to generate some of the visualizations referenced from each function.</a> |
| */ |
| public class SweetSpotSimilarity extends ClassicSimilarity { |
| |
| private int ln_min = 1; |
| private int ln_max = 1; |
| private float ln_steep = 0.5f; |
| |
| private float tf_base = 0.0f; |
| private float tf_min = 0.0f; |
| |
| private float tf_hyper_min = 0.0f; |
| private float tf_hyper_max = 2.0f; |
| private double tf_hyper_base = 1.3d; |
| private float tf_hyper_xoffset = 10.0f; |
| |
| public SweetSpotSimilarity() { |
| super(); |
| } |
| |
| /** |
| * Sets the baseline and minimum function variables for baselineTf |
| * |
| * @see #baselineTf |
| */ |
| public void setBaselineTfFactors(float base, float min) { |
| tf_min = min; |
| tf_base = base; |
| } |
| |
| /** |
| * Sets the function variables for the hyperbolicTf functions |
| * |
| * @param min the minimum tf value to ever be returned (default: 0.0) |
| * @param max the maximum tf value to ever be returned (default: 2.0) |
| * @param base the base value to be used in the exponential for the hyperbolic function (default: 1.3) |
| * @param xoffset the midpoint of the hyperbolic function (default: 10.0) |
| * @see #hyperbolicTf |
| */ |
| public void setHyperbolicTfFactors(float min, float max, |
| double base, float xoffset) { |
| tf_hyper_min = min; |
| tf_hyper_max = max; |
| tf_hyper_base = base; |
| tf_hyper_xoffset = xoffset; |
| } |
| |
| /** |
| * Sets the default function variables used by lengthNorm when no field |
| * specific variables have been set. |
| * |
| * @see #lengthNorm |
| */ |
| public void setLengthNormFactors(int min, int max, float steepness, boolean discountOverlaps) { |
| this.ln_min = min; |
| this.ln_max = max; |
| this.ln_steep = steepness; |
| this.discountOverlaps = discountOverlaps; |
| } |
| |
| /** |
| * Implemented as: |
| * <code> |
| * 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 ) |
| * </code>. |
| * |
| * <p> |
| * This degrades to <code>1/sqrt(x)</code> when min and max are both 1 and |
| * steepness is 0.5 |
| * </p> |
| * |
| * <p> |
| * :TODO: potential optimization is to just flat out return 1.0f if numTerms |
| * is between min and max. |
| * </p> |
| * |
| * @see #setLengthNormFactors |
| * @see <a href="doc-files/ss.computeLengthNorm.svg">An SVG visualization of this function</a> |
| */ |
| @Override |
| public float lengthNorm(int numTerms) { |
| final int l = ln_min; |
| final int h = ln_max; |
| final float s = ln_steep; |
| |
| return (float) |
| (1.0f / |
| Math.sqrt |
| ( |
| ( |
| s * |
| (float)(Math.abs(numTerms - l) + Math.abs(numTerms - h) - (h-l)) |
| ) |
| + 1.0f |
| ) |
| ); |
| } |
| |
| /** |
| * Delegates to baselineTf |
| * |
| * @see #baselineTf |
| */ |
| @Override |
| public float tf(float freq) { |
| return baselineTf(freq); |
| } |
| |
| /** |
| * Implemented as: |
| * <code> |
| * (x <= min) ? base : sqrt(x+(base**2)-min) |
| * </code> |
| * ...but with a special case check for 0. |
| * <p> |
| * This degrates to <code>sqrt(x)</code> when min and base are both 0 |
| * </p> |
| * |
| * @see #setBaselineTfFactors |
| * @see <a href="doc-files/ss.baselineTf.svg">An SVG visualization of this function</a> |
| */ |
| public float baselineTf(float freq) { |
| |
| if (0.0f == freq) return 0.0f; |
| |
| return (freq <= tf_min) |
| ? tf_base |
| : (float)Math.sqrt(freq + (tf_base * tf_base) - tf_min); |
| } |
| |
| /** |
| * Uses a hyperbolic tangent function that allows for a hard max... |
| * |
| * <code> |
| * tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1) |
| * </code> |
| * |
| * <p> |
| * This code is provided as a convenience for subclasses that want |
| * to use a hyperbolic tf function. |
| * </p> |
| * |
| * @see #setHyperbolicTfFactors |
| * @see <a href="doc-files/ss.hyperbolicTf.svg">An SVG visualization of this function</a> |
| */ |
| public float hyperbolicTf(float freq) { |
| if (0.0f == freq) return 0.0f; |
| |
| final float min = tf_hyper_min; |
| final float max = tf_hyper_max; |
| final double base = tf_hyper_base; |
| final float xoffset = tf_hyper_xoffset; |
| final double x = (double)(freq - xoffset); |
| |
| final float result = min + |
| (float)( |
| (max-min) / 2.0f |
| * |
| ( |
| ( ( Math.pow(base,x) - Math.pow(base,-x) ) |
| / ( Math.pow(base,x) + Math.pow(base,-x) ) |
| ) |
| + 1.0d |
| ) |
| ); |
| |
| return Float.isNaN(result) ? max : result; |
| |
| } |
| |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("SweetSpotSimilarity") |
| .append('(').append("ln_min=").append(ln_min).append(", ") |
| .append("ln_max=").append(ln_max).append(", ") |
| .append("ln_steep=").append(ln_steep).append(", ") |
| .append("tf_base=").append(tf_base).append(", ") |
| .append("tf_min=").append(tf_min).append(", ") |
| .append("tf_hyper_min=").append(tf_hyper_min).append(", ") |
| .append("tf_hyper_max=").append(tf_hyper_max).append(", ") |
| .append("tf_hyper_base=").append(tf_hyper_base).append(", ") |
| .append("tf_hyper_xoffset=").append(tf_hyper_xoffset) |
| .append(")"); |
| return sb.toString(); |
| } |
| } |