﻿using System;

namespace Lucene.Net.Search.Spell
{
    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /// <summary>
    /// N-Gram version of edit distance based on paper by Grzegorz Kondrak, 
    /// "N-gram similarity and distance". Proceedings of the Twelfth International 
    /// Conference on String Processing and Information Retrieval (SPIRE 2005), pp. 115-126, 
    /// Buenos Aires, Argentina, November 2005. 
    /// <a href="http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf">http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf</a>
    /// 
    /// This implementation uses the position-based optimization to compute partial
    /// matches of n-gram sub-strings and adds a null-character prefix of size n-1 
    /// so that the first character is contained in the same number of n-grams as 
    /// a middle character.  Null-character prefix matches are discounted so that 
    /// strings with no matching characters will return a distance of 0.
    /// </summary>
    public class NGramDistance : IStringDistance
    {

        private int n;

        /// <summary>
        /// Creates an N-Gram distance measure using n-grams of the specified size. </summary>
        /// <param name="size"> The size of the n-gram to be used to compute the string distance. </param>
        public NGramDistance(int size)
        {
            this.n = size;
        }

        /// <summary>
        /// Creates an N-Gram distance measure using n-grams of size 2.
        /// </summary>
        public NGramDistance()
            : this(2)
        {
        }

        public virtual float GetDistance(string source, string target)
        {
            int sl = source.Length;
            int tl = target.Length;

            if (sl == 0 || tl == 0)
            {
                if (sl == tl)
                {
                    return 1;
                }
                else
                {
                    return 0;
                }
            }

            int cost = 0;
            if (sl < n || tl < n)
            {
                for (int i2 = 0, ni = Math.Min(sl, tl); i2 < ni; i2++)
                {
                    if (source[i2] == target[i2])
                    {
                        cost++;
                    }
                }
                return (float)cost / Math.Max(sl, tl);
            }

            char[] sa = new char[sl + n - 1];
            float[] p; //'previous' cost array, horizontally
            float[] d; // cost array, horizontally
            float[] _d; //placeholder to assist in swapping p and d

            //construct sa with prefix
            for (int i2 = 0; i2 < sa.Length; i2++)
            {
                if (i2 < n - 1)
                {
                    sa[i2] = (char)0; //add prefix
                }
                else
                {
                    sa[i2] = source[i2 - n + 1];
                }
            }
            p = new float[sl + 1];
            d = new float[sl + 1];

            // indexes into strings s and t
            int i; // iterates through source
            int j; // iterates through target

            char[] t_j = new char[n]; // jth n-gram of t

            for (i = 0; i <= sl; i++)
            {
                p[i] = i;
            }

            for (j = 1; j <= tl; j++)
            {
                //construct t_j n-gram 
                if (j < n)
                {
                    for (int ti = 0; ti < n - j; ti++)
                    {
                        t_j[ti] = (char)0; //add prefix
                    }
                    for (int ti = n - j; ti < n; ti++)
                    {
                        t_j[ti] = target[ti - (n - j)];
                    }
                }
                else
                {
                    t_j = target.Substring(j - n, j - (j - n)).ToCharArray();
                }
                d[0] = j;
                for (i = 1; i <= sl; i++)
                {
                    cost = 0;
                    int tn = n;
                    //compare sa to t_j
                    for (int ni = 0; ni < n; ni++)
                    {
                        if (sa[i - 1 + ni] != t_j[ni])
                        {
                            cost++;
                        }
                        else if (sa[i - 1 + ni] == 0) //discount matches on prefix
                        {
                            tn--;
                        }
                    }
                    float ec = (float)cost / tn;
                    // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
                    d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec);
                }
                // copy current distance counts to 'previous row' distance counts
                _d = p;
                p = d;
                d = _d;
            }

            // our last action in the above loop was to switch d and p, so p now
            // actually has the most recent cost counts
            return 1.0f - (p[sl] / Math.Max(tl, sl));
        }

        public override int GetHashCode()
        {
            return 1427 * n * this.GetType().GetHashCode();
        }

        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return true;
            }
            if (null == obj || this.GetType() != obj.GetType())
            {
                return false;
            }

            var o = (NGramDistance)obj;
            return o.n == this.n;
        }

        public override string ToString()
        {
            return "ngram(" + n + ")";
        }
    }
}