﻿using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Util;
using System;
using System.IO;
using System.Runtime.CompilerServices;

namespace Lucene.Net.Sandbox.Queries
{
    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /// <summary>
    /// Potentially slow fuzzy <see cref="TermsEnum"/> for enumerating all terms that are similar
    /// to the specified filter term.
    /// <para/>
    /// If the minSimilarity or maxEdits is greater than the Automaton's
    /// allowable range, this backs off to the classic (brute force)
    /// fuzzy terms enum method by calling <see cref="FuzzyTermsEnum.GetAutomatonEnum(int, BytesRef)"/>.
    /// <para/>
    /// Term enumerations are always ordered by
    /// <see cref="FuzzyTermsEnum.Comparer"/>. Each term in the enumeration is
    /// greater than all that precede it.
    /// </summary>
    [Obsolete("Use FuzzyTermsEnum instead.")]
    public class SlowFuzzyTermsEnum : FuzzyTermsEnum
    {
        public SlowFuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
            float minSimilarity, int prefixLength)
            : base(terms, atts, term, minSimilarity, prefixLength, false)
        {
        }

        protected override void MaxEditDistanceChanged(BytesRef lastTerm, int maxEdits, bool init)
        {
            TermsEnum newEnum = GetAutomatonEnum(maxEdits, lastTerm);
            if (newEnum != null)
            {
                SetEnum(newEnum);
            }
            else if (init)
            {
                SetEnum(new LinearFuzzyTermsEnum(this));
            }
        }

        /// <summary>
        /// Implement fuzzy enumeration with linear brute force.
        /// </summary>
        private class LinearFuzzyTermsEnum : FilteredTermsEnum
        {
            private readonly SlowFuzzyTermsEnum outerInstance;

            /// <summary>
            /// Allows us save time required to create a new array
            /// every time similarity is called.
            /// </summary>
            private int[] d;
            private int[] p;

            /// <summary>this is the text, minus the prefix</summary>
            private readonly int[] text;

            private readonly IBoostAttribute boostAtt;

            /// <summary>
            /// Constructor for enumeration of all terms from specified <c>reader</c> which share a prefix of
            /// length <c>prefixLength</c> with <c>term</c> and which have a fuzzy similarity &gt;
            /// <c>minSimilarity</c>.
            /// <para/>
            /// After calling the constructor the enumeration is already pointing to the first 
            /// valid term if such a term exists.
            /// </summary>
            /// <exception cref="IOException">If there is a low-level I/O error.</exception>
            public LinearFuzzyTermsEnum(SlowFuzzyTermsEnum outerInstance)
                : base(outerInstance.m_terms.GetEnumerator())
            {
                this.outerInstance = outerInstance;
                this.boostAtt = Attributes.AddAttribute<IBoostAttribute>();

                this.text = new int[outerInstance.m_termLength - outerInstance.m_realPrefixLength];
                System.Array.Copy(outerInstance.m_termText, outerInstance.m_realPrefixLength, text, 0, text.Length);
                string prefix = UnicodeUtil.NewString(outerInstance.m_termText, 0, outerInstance.m_realPrefixLength);
                prefixBytesRef = new BytesRef(prefix);
                this.d = new int[this.text.Length + 1];
                this.p = new int[this.text.Length + 1];


                SetInitialSeekTerm(prefixBytesRef);
            }

            private readonly BytesRef prefixBytesRef;
            /// <summary>used for unicode conversion from BytesRef byte[] to int[]</summary>
            private readonly Int32sRef utf32 = new Int32sRef(20);

            /// <summary>
            /// <para>
            /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to
            /// calculate the distance between the given term and the comparing term.
            /// </para>
            /// <para>
            /// If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
            /// Otherwise, this method uses the following logic to calculate similarity.
            /// <code>
            ///     similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
            /// </code>
            /// where distance is the Levenshtein distance for the two words.
            /// </para>
            /// </summary>
#if NETFRAMEWORK
            [MethodImpl(MethodImplOptions.NoOptimization)] // LUCENENET specific: comparing float equality fails in x86 on .NET Framework with optimizations enabled. Fixes TestTokenLengthOpt.
#endif
            protected override sealed AcceptStatus Accept(BytesRef term)
            {
                if (StringHelper.StartsWith(term, prefixBytesRef))
                {
                    UnicodeUtil.UTF8toUTF32(term, utf32);
                    int distance = CalcDistance(utf32.Int32s, outerInstance.m_realPrefixLength, utf32.Length - outerInstance.m_realPrefixLength);

                    //Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
                    if (distance == int.MinValue)
                    {
                        return AcceptStatus.NO;
                    }
                    //no need to calc similarity, if raw is true and distance > maxEdits
                    if (outerInstance.m_raw == true && distance > outerInstance.m_maxEdits)
                    {
                        return AcceptStatus.NO;
                    }
                    float similarity = CalcSimilarity(distance, (utf32.Length - outerInstance.m_realPrefixLength), text.Length);

                    //if raw is true, then distance must also be <= maxEdits by now
                    //given the previous if statement
                    if (outerInstance.m_raw == true ||
                          (outerInstance.m_raw == false && similarity > outerInstance.MinSimilarity))
                    {
                        boostAtt.Boost = (similarity - outerInstance.MinSimilarity) * outerInstance.m_scaleFactor;
                        return AcceptStatus.YES;
                    }
                    else
                    {
                        return AcceptStatus.NO;
                    }
                }
                else
                {
                    return AcceptStatus.END;
                }
            }

            /******************************
             * Compute Levenshtein distance
             ******************************/

            /// <summary>
            /// <para>
            /// <see cref="CalcDistance(int[], int, int)"/> returns the Levenshtein distance between the query term
            /// and the target term.
            /// </para>
            /// <para>
            /// Embedded within this algorithm is a fail-fast Levenshtein distance
            /// algorithm.  The fail-fast algorithm differs from the standard Levenshtein
            /// distance algorithm in that it is aborted if it is discovered that the
            /// minimum distance between the words is greater than some threshold.
            /// </para>
            /// <para>
            /// Levenshtein distance (also known as edit distance) is a measure of similarity
            /// between two strings where the distance is measured as the number of character
            /// deletions, insertions or substitutions required to transform one string to
            /// the other string.
            /// </para>
            /// </summary>
            /// <param name="target">the target word or phrase</param>
            /// <param name="offset">the offset at which to start the comparison</param>
            /// <param name="length">the length of what's left of the string to compare</param>
            /// <returns>
            /// the number of edits or <see cref="int.MaxValue"/> if the edit distance is
            /// greater than maxDistance.
            /// </returns>
            private int CalcDistance(int[] target, int offset, int length)
            {
                int m = length;
                int n = text.Length;
                if (n == 0)
                {
                    //we don't have anything to compare.  That means if we just add
                    //the letters for m we get the new word
                    return m;
                }
                if (m == 0)
                {
                    return n;
                }

                int maxDistance = CalculateMaxDistance(m);

                if (maxDistance < Math.Abs(m - n))
                {
                    //just adding the characters of m to n or vice-versa results in
                    //too many edits
                    //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
                    //given this optimal circumstance, the edit distance cannot be less than 5.
                    //which is 8-3 or more precisely Math.abs(3-8).
                    //if our maximum edit distance is 4, then we can discard this word
                    //without looking at it.
                    return int.MinValue;
                }

                // init matrix d
                for (int i = 0; i <= n; ++i)
                {
                    p[i] = i;
                }

                // start computing edit distance
                for (int j = 1; j <= m; ++j)
                { // iterates through target
                    int bestPossibleEditDistance = m;
                    int t_j = target[offset + j - 1]; // jth character of t
                    d[0] = j;

                    for (int i = 1; i <= n; ++i)
                    { // iterates through text
                      // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1)
                        if (t_j != text[i - 1])
                        {
                            d[i] = Math.Min(Math.Min(d[i - 1], p[i]), p[i - 1]) + 1;
                        }
                        else
                        {
                            d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1]);
                        }
                        bestPossibleEditDistance = Math.Min(bestPossibleEditDistance, d[i]);
                    }

                    //After calculating row i, the best possible edit distance
                    //can be found by found by finding the smallest value in a given column.
                    //If the bestPossibleEditDistance is greater than the max distance, abort.

                    if (j > maxDistance && bestPossibleEditDistance > maxDistance)
                    {  //equal is okay, but not greater
                       //the closest the target can be to the text is just too far away.
                       //this target is leaving the party early.
                        return int.MinValue;
                    }

                    // copy current distance counts to 'previous row' distance counts: swap p and d
                    int[] _d = p;
                    p = d;
                    d = _d;
                }

                // our last action in the above loop was to switch d and p, so p now
                // actually has the most recent cost counts

                return p[n];
            }

            private float CalcSimilarity(int edits, int m, int n)
            {
                // this will return less than 0.0 when the edit distance is
                // greater than the number of characters in the shorter word.
                // but this was the formula that was previously used in FuzzyTermEnum,
                // so it has not been changed (even though minimumSimilarity must be
                // greater than 0.0)

                return 1.0f - ((float)edits / (float)(outerInstance.m_realPrefixLength + Math.Min(n, m)));
            }

            /// <summary>
            /// The max Distance is the maximum Levenshtein distance for the text
            /// compared to some other value that results in score that is
            /// better than the minimum similarity.
            /// </summary>
            /// <param name="m">the length of the "other value"</param>
            /// <returns>the maximum levenshtein distance that we care about</returns>
            private int CalculateMaxDistance(int m)
            {
                return outerInstance.m_raw ? outerInstance.m_maxEdits : Math.Min(outerInstance.m_maxEdits,
                    (int)((1 - outerInstance.MinSimilarity) * (Math.Min(text.Length, m) + outerInstance.m_realPrefixLength)));
            }
        }
    }
}
