﻿// Lucene version compatibility level 7.1.0
using ICU4N.Text;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Support;

namespace Lucene.Net.Analysis.Icu
{
    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /// <summary>
    /// A <see cref="TokenFilter"/> that transforms text with ICU.
    /// </summary>
    /// <remarks>
    /// ICU provides text-transformation functionality via its Transliteration API.
    /// Although script conversion is its most common use, a Transliterator can
    /// actually perform a more general class of tasks. In fact, Transliterator
    /// defines a very general API which specifies only that a segment of the input
    /// text is replaced by new text. The particulars of this conversion are
    /// determined entirely by subclasses of Transliterator.
    /// <para/>
    /// Some useful transformations for search are built-in:
    /// <list type="bullet">
    ///     <item><description>Conversion from Traditional to Simplified Chinese characters</description></item>
    ///     <item><description>Conversion from Hiragana to Katakana</description></item>
    ///     <item><description>Conversion from Fullwidth to Halfwidth forms.</description></item>
    ///     <item><description>Script conversions, for example Serbian Cyrillic to Latin</description></item>
    /// </list>
    /// <para/>
    /// Example usage: 
    /// <code>
    ///     stream = new ICUTransformFilter(stream, Transliterator.GetInstance("Traditional-Simplified"));
    /// </code>
    /// <para/>
    /// For more details, see the <a href="http://userguide.icu-project.org/transforms/general">ICU User Guide</a>.
    /// </remarks>
    [ExceptionToClassNameConvention]
    public sealed class ICUTransformFilter : TokenFilter
    {
        // Transliterator to transform the text
        private readonly Transliterator transform;

        // Reusable position object
        private readonly TransliterationPosition position = new TransliterationPosition();

        // term attribute, will be updated with transformed text.
        private readonly ICharTermAttribute termAtt;

        // Wraps a termAttribute around the replaceable interface.
        private readonly ReplaceableTermAttribute replaceableAttribute = new ReplaceableTermAttribute();

        /// <summary>
        /// Create a new <see cref="ICUTransformFilter"/> that transforms text on the given stream.
        /// </summary>
        /// <param name="input"><see cref="TokenStream"/> to filter.</param>
        /// <param name="transform">Transliterator to transform the text.</param>
        public ICUTransformFilter(TokenStream input, Transliterator transform)
            : base(input)
        {
            this.transform = transform;
            this.termAtt = AddAttribute<ICharTermAttribute>();

            /* 
             * This is cheating, but speeds things up a lot.
             * If we wanted to use pkg-private APIs we could probably do better.
             */
#pragma warning disable 612, 618
            if (transform.Filter == null && transform is RuleBasedTransliterator)
#pragma warning restore 612, 618
            {
                UnicodeSet sourceSet = transform.GetSourceSet();
                if (sourceSet != null && sourceSet.Any())
                    transform.Filter=sourceSet;
            }
        }

        public override bool IncrementToken()
        {
            /*
             * Wrap around replaceable. clear the positions, and transliterate.
             */
            if (m_input.IncrementToken())
            {
                replaceableAttribute.SetText(termAtt);

                int length = termAtt.Length;
                position.Start = 0;
                position.Limit = length;
                position.ContextStart = 0;
                position.ContextLimit = length;

                transform.FilteredTransliterate(replaceableAttribute, position, false);
                return true;
            }
            else
            {
                return false;
            }
        }

        /// <summary>
        /// Wrap a <see cref="ICharTermAttribute"/> with the <see cref="IReplaceable"/> API.
        /// </summary>
        private sealed class ReplaceableTermAttribute : IReplaceable
        {
            private char[] buffer;
            private int length;
            private ICharTermAttribute token;

            public void SetText(ICharTermAttribute token)
            {
                this.token = token;
                this.buffer = token.Buffer;
                this.length = token.Length;
            }

            public int Char32At(int pos) => UTF16.CharAt(buffer, 0, length, pos);

            public char this[int pos] => buffer[pos];

            public void Copy(int startIndex, int length, int destinationIndex) // LUCENENET: Changed 2nd parameter from limit to length
            {
                char[] text = new char[length]; // LUCENENET: Corrected length
                CopyTo(startIndex, text, 0, length); // LUCENENET: Corrected length
                Replace(destinationIndex, destinationIndex - destinationIndex, text, 0, length); // LUCENENET: Corrected length & charsLen
            }

            public void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
            {
                System.Array.Copy(buffer, sourceIndex, destination, destinationIndex, count);
            }

            public bool HasMetaData => false;

            public int Length => length;

            public void Replace(int start, int length, string text) // LUCENENET: Changed 2nd parameter from limit to length
            {
                int charsLen = text.Length;
                int newLength = ShiftForReplace(start, length + start, charsLen); // LUCENENET: Changed 2nd parameter to calculate limit
                // insert the replacement text
                text.CopyTo(0, buffer, start, charsLen);
                token.Length = (this.length = newLength);
            }

            public void Replace(int start, int length, char[] text, int charsStart,
                int charsLen)
            {
                // shift text if necessary for the replacement
                int newLength = ShiftForReplace(start, length + start, charsLen); // LUCENENET: Changed 2nd parameter to calculate limit
                // insert the replacement text
                System.Array.Copy(text, charsStart, buffer, start, charsLen);
                token.Length = (this.length = newLength);
            }

            /// <summary>shift text (if necessary) for a replacement operation</summary>
            private int ShiftForReplace(int start, int limit, int charsLen)
            {
                int replacementLength = limit - start;
                int newLength = length - replacementLength + charsLen;
                // resize if necessary
                if (newLength > length)
                    buffer = token.ResizeBuffer(newLength);
                // if the substring being replaced is longer or shorter than the
                // replacement, need to shift things around
                if (replacementLength != charsLen && limit < length)
                    System.Array.Copy(buffer, limit, buffer, start + charsLen, length - limit);
                return newLength;
            }
        }
    }
}
