﻿// Lucene version compatibility level 8.2.0
using ICU4N.Support.Text;
using ICU4N.Text;
using Lucene.Net.Analysis.OpenNlp.Tools;
using Lucene.Net.Analysis.Util;
using opennlp.tools.util;
using System;
using System.Diagnostics;
using System.Text;

namespace Lucene.Net.Analysis.OpenNlp
{
    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /// <summary>
    /// A <see cref="BreakIterator"/> that splits sentences using an OpenNLP sentence chunking model.
    /// </summary>
    public sealed class OpenNLPSentenceBreakIterator : BreakIterator
    {
        private CharacterIterator text;
        private int currentSentence;
        private int[] sentenceStarts;
        private NLPSentenceDetectorOp sentenceOp;

        public OpenNLPSentenceBreakIterator(NLPSentenceDetectorOp sentenceOp)
        {
            this.sentenceOp = sentenceOp;
        }

        public override int Current => text.Index;

        public override int First()
        {
            currentSentence = 0;
            text.SetIndex(text.BeginIndex);
            return Current;
        }

        public override int Last()
        {
            if (sentenceStarts.Length > 0)
            {
                currentSentence = sentenceStarts.Length - 1;
                text.SetIndex(text.EndIndex);
            }
            else
            { // there are no sentences; both the first and last positions are the begin index
                currentSentence = 0;
                text.SetIndex(text.BeginIndex);
            }
            return Current;
        }

        public override int Next()
        {
            if (text.Index == text.EndIndex || 0 == sentenceStarts.Length)
            {
                return Done;
            }
            else if (currentSentence < sentenceStarts.Length - 1)
            {
                text.SetIndex(sentenceStarts[++currentSentence]);
                return Current;
            }
            else
            {
                return Last();
            }
        }

        public override int Following(int pos)
        {
            if (pos < text.BeginIndex || pos > text.EndIndex)
            {
                throw new ArgumentException("offset out of bounds");
            }
            else if (0 == sentenceStarts.Length)
            {
                text.SetIndex(text.BeginIndex);
                return Done;
            }
            else if (pos >= sentenceStarts[sentenceStarts.Length - 1])
            {
                // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
                // https://bugs.openjdk.java.net/browse/JDK-8015110
                text.SetIndex(text.EndIndex);
                currentSentence = sentenceStarts.Length - 1;
                return Done;
            }
            else
            { // there are at least two sentences
                currentSentence = (sentenceStarts.Length - 1) / 2; // start search from the middle
                MoveToSentenceAt(pos, 0, sentenceStarts.Length - 2);
                text.SetIndex(sentenceStarts[++currentSentence]);
                return Current;
            }
        }

        /// <summary>Binary search over sentences</summary>
        private void MoveToSentenceAt(int pos, int minSentence, int maxSentence)
        {
            if (minSentence != maxSentence)
            {
                if (pos < sentenceStarts[currentSentence])
                {
                    int newMaxSentence = currentSentence - 1;
                    currentSentence = minSentence + (currentSentence - minSentence) / 2;
                    MoveToSentenceAt(pos, minSentence, newMaxSentence);
                }
                else if (pos >= sentenceStarts[currentSentence + 1])
                {
                    int newMinSentence = currentSentence + 1;
                    currentSentence = maxSentence - (maxSentence - currentSentence) / 2;
                    MoveToSentenceAt(pos, newMinSentence, maxSentence);
                }
            }
            else
            {
                Debug.Assert(currentSentence == minSentence);
                Debug.Assert(pos >= sentenceStarts[currentSentence]);
                Debug.Assert((currentSentence == sentenceStarts.Length - 1 && pos <= text.EndIndex)
                    || pos < sentenceStarts[currentSentence + 1]);
            }
            // we have arrived - nothing to do
        }

        public override int Previous()
        {
            if (text.Index == text.BeginIndex)
            {
                return Done;
            }
            else
            {
                if (0 == sentenceStarts.Length)
                {
                    text.SetIndex(text.BeginIndex);
                    return Done;
                }
                if (text.Index == text.EndIndex)
                {
                    text.SetIndex(sentenceStarts[currentSentence]);
                }
                else
                {
                    text.SetIndex(sentenceStarts[--currentSentence]);
                }
                return Current;
            }
        }

        public override int Preceding(int pos)
        {
            if (pos < text.BeginIndex || pos > text.EndIndex)
            {
                throw new ArgumentException("offset out of bounds");
            }
            else if (0 == sentenceStarts.Length)
            {
                text.SetIndex(text.BeginIndex);
                currentSentence = 0;
                return Done;
            }
            else if (pos < sentenceStarts[0])
            {
                // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
                // https://bugs.openjdk.java.net/browse/JDK-8015110
                text.SetIndex(text.BeginIndex);
                currentSentence = 0;
                return Done;
            }
            else
            {
                currentSentence = sentenceStarts.Length / 2; // start search from the middle
                MoveToSentenceAt(pos, 0, sentenceStarts.Length - 1);
                if (0 == currentSentence)
                {
                    text.SetIndex(text.BeginIndex);
                    return Done;
                }
                else
                {
                    text.SetIndex(sentenceStarts[--currentSentence]);
                    return Current;
                }
            }
        }

        public override int Next(int n)
        {
            currentSentence += n;
            if (n < 0)
            {
                if (text.Index == text.EndIndex)
                {
                    ++currentSentence;
                }
                if (currentSentence < 0)
                {
                    currentSentence = 0;
                    text.SetIndex(text.BeginIndex);
                    return Done;
                }
                else
                {
                    text.SetIndex(sentenceStarts[currentSentence]);
                }
            }
            else if (n > 0)
            {
                if (currentSentence >= sentenceStarts.Length)
                {
                    currentSentence = sentenceStarts.Length - 1;
                    text.SetIndex(text.EndIndex);
                    return Done;
                }
                else
                {
                    text.SetIndex(sentenceStarts[currentSentence]);
                }
            }
            return Current;
        }

        public override CharacterIterator Text => text;

        public override void SetText(CharacterIterator newText)
        {
            text = newText;
            text.SetIndex(text.BeginIndex);
            currentSentence = 0;
            Span[] spans = sentenceOp.SplitSentences(CharacterIteratorToString());
            sentenceStarts = new int[spans.Length];
            for (int i = 0; i < spans.Length; ++i)
            {
                // Adjust start positions to match those of the passed-in CharacterIterator
                sentenceStarts[i] = spans[i].getStart() + text.BeginIndex;
            }
        }

        private string CharacterIteratorToString()
        {
            string fullText;
            if (text is CharArrayIterator)
            {
                CharArrayIterator charArrayIterator = (CharArrayIterator)text;
                fullText = new string(charArrayIterator.Text, charArrayIterator.Start, charArrayIterator.Length);
            }
            else
            {
                // TODO: is there a better way to extract full text from arbitrary CharacterIterators?
                StringBuilder builder = new StringBuilder();
                for (char ch = text.First(); ch != CharacterIterator.Done; ch = text.Next())
                {
                    builder.Append(ch);
                }
                fullText = builder.ToString();
                text.SetIndex(text.BeginIndex);
            }
            return fullText;
        }
    }
}
