/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.nlpcraft.server.nlp.wordnet

import io.opencensus.trace.Span
import net.sf.extjwnl.data.POS._
import net.sf.extjwnl.data.{IndexWord, POS, PointerType}
import net.sf.extjwnl.dictionary.{Dictionary, MorphologicalProcessor}
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.NCService

import scala.collection.JavaConverters._

/**
  * WordNet manager.
  */
object NCWordNetManager extends NCService {
    @volatile private var dic: Dictionary = _
    @volatile private var morph: MorphologicalProcessor = _
    
    private def pennPos2WordNet(pennPos: String): Option[POS] =
        pennPos.head match {
            case 'N' ⇒ Some(NOUN)
            case 'V' ⇒ Some(VERB)
            case 'J' ⇒ Some(ADJECTIVE)
            case 'R' ⇒ Some(ADVERB)
            
            case _ ⇒ None
        }
    
    // Process WordNet formatted multi-word entries (they are split with '_').
    private def normalize(str: String) = str.replaceAll("_", " ")
    
    // Converts words.
    private def convert(str: String, initPos: POS, targetPos: POS): Seq[String] = {
        val word = dic.getIndexWord(initPos, str)
        
        if (word != null)
            word.getSenses.asScala.flatMap(synset ⇒
                synset.getPointers(PointerType.DERIVATION).asScala.flatMap(p ⇒ {
                    val trg = p.getTargetSynset

                    if (trg.getPOS == targetPos)
                        trg.getWords.asScala.map(p ⇒ normalize(p.getLemma))
                    else
                        Seq.empty
                })
            ).distinct
        else
            Seq.empty[String]
    }

    /**
     *
     * @param parent Optional parent span.
     * @throws NCE
     * @return
     */
    @throws[NCE]
    override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
        ackStarting()

        dic =  Dictionary.getDefaultResourceInstance
        morph = dic.getMorphologicalProcessor

        ackStarted()
    }

    /**
     *
     * @param parent Optional parent span.
     */
    override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { _ ⇒
        ackStopping()
        ackStopped()
    }
    
    /**
      * Gets a sequence of possible nouns relatives for the given adjective.
      *
      * @param adj An adjective to match.
      * @return A number of possible noun relatives.
      */
    def getNNsForJJ(adj: String): Seq[String] = convert(adj, ADJECTIVE, NOUN)
    
    /**
      * Gets a sequence of possible adjective relatives for the given noun.
      *
      * @param noun A noun to match.
      * @return A number of possible adjective relatives.
      */
    def getJJsForNN(noun: String): Seq[String] = convert(noun, NOUN, ADJECTIVE)
    
    /**
      * Gets base form using more precision method.
      *
      * It drops base form like 'Alice'→'louse', 'God'→'od' and 'better'→'well'
      * which produced by WordNet if the exact base form not found.
      *
      * @param lemma Lemma to get a WordNet base form.
      * @param pennPos Lemma's Penn Treebank POS tag.
      */
    def getBaseForm(lemma: String, pennPos: String, syns: Set[String] = null): String =
        pennPos2WordNet(pennPos) match {
            case Some(wnPos) ⇒
                morph.lookupBaseForm(wnPos, lemma) match {
                    case wnWord: IndexWord ⇒
                        val wnLemma = wnWord.getLemma
                        val synonyms = if (syns == null) getSynonyms(lemma, pennPos).flatten.toSet else syns
                        
                        if (synonyms.contains(wnLemma))
                            wnLemma
                        else
                            lemma
                    case null ⇒ lemma
                }
                
            // For unsupported POS tags - return the input lemma.
            case None ⇒ lemma
        }
    
    /**
      * Gets synonyms for given lemma and its POS tag.
      *
      * @param lemma Lemma to find synonyms for.
      * @param pennPos Lemma's Penn Treebank POS tag.
      */
    def getSynonyms(lemma: String, pennPos: String): Seq[Seq[String]] = {
        val res: Seq[Seq[String]] = pennPos2WordNet(pennPos) match {
            case Some(wnPos) ⇒
                val wnWord = dic.lookupIndexWord(wnPos, lemma)
                
                if (wnWord == null)
                    Seq.empty
                else
                    wnWord.getSynsetOffsets match {
                        case synsOffs: Array[Long] ⇒
                            synsOffs.
                                map(dic.getSynsetAt(wnPos, _)).
                                filter(_.getPOS == wnPos).
                                map(
                                    _.getWords.asScala.
                                        map(_.getLemma.toLowerCase).
                                        filter(_ != lemma).
                                        map(normalize).toSeq
                                )
                        
                        case null ⇒ Seq.empty
                    }
                
            // Invalid POS.
            case None ⇒ Seq.empty
        }
        
        res.filter(_.nonEmpty)
    }
}
