nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala - incubator-nlpcraft - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.nlpcraft.common.nlp

 import java.util
 import java.util.Collections

 import org.apache.nlpcraft.common.NCE
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank

 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, Set, mutable}
 import scala.language.implicitConversions

 object NCNlpSentence {
     implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens

     /**
       *
       * @param ns
       * @param idxs
       * @param notesType
       * @param note
       * @return
       */
     private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
         val types =
             idxs.flatMap(idx ⇒ {
                 val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)

                 types.size match {
                     case 0 ⇒ None
                     case 1 ⇒ Some(types.head)
                     case _ ⇒ throw new AssertionError(s"Unexpected tokes: ${ns(idx)}")
                 }
             }).distinct

         /**
           * Example:
           * 1. Sentence 'maximum x' (single element related function)
           * - maximum is aggregate function linked to date element.
           * - x defined as 2 elements: date and num.
           * So, the variant 'maximum x (as num)' should be excluded.
           * *
           * 2. Sentence 'compare x and y' (multiple elements related function)
           * - compare is relation function linked to date element.
           * - x an y defined as 2 elements: date and num.
           * So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
           * should't be excluded, but invalid relation should be deleted for these combinations.
           */
         types.size match {
             case 0 ⇒ throw new AssertionError(s"Unexpected empty types [notesType=$notesType]")
             case 1 ⇒ types.head == notesType
             case _ ⇒
                 // Equal elements should be processed together with function element.
                 if (types.size == 1)
                     false
                 else {
                     ns.removeNote(note)

                     true
                 }
         }
     }

     /**
       * Fixes notes with references to other notes indexes.
       * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
       *
       * @param noteType Note type.
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @return Valid flag.
       */
     private def fixIndexesReferences(noteType: String, ns: NCNlpSentence, history: Seq[(Int, Int)]): Boolean = {
         ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
             tok.getNoteOpt(noteType, "indexes") match {
                 case Some(n) ⇒
                     val idxs: Seq[Int] = n.data[java.util.List[Int]]("indexes").asScala
                     var fixed = idxs

                     history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }

                     fixed = fixed.distinct

                     if (idxs != fixed)
                         ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable])
                 case None ⇒ // No-op.
             }
         )

         ns.flatMap(_.getNotes(noteType)).forall(
             n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String]("note"), n)
         )
     }

     /**
       * Copies token.
       *
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @param toksCopy Copied tokens.
       * @param i Index.
       */
     private def simpleCopy(
         ns: NCNlpSentence,
         history: mutable.ArrayBuffer[(Int, Int)],
         toksCopy: NCNlpSentence, i: Int
     ): Seq[NCNlpSentenceToken] = {
         val tokCopy = toksCopy(i)

         history += tokCopy.index → ns.size

         ns += tokCopy.clone(ns.size)
     }

     /**
       * Glues stop words.
       *
       * @param ns Sentence.
       * @param userNoteTypes Notes types.
       * @param history Indexes transformation history.
       */
     private def unionStops(
         ns: NCNlpSentence,
         userNoteTypes: Seq[String],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): Unit = {
         // Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
         // Of compilation errors which seems as scala compiler internal error.
         val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()

         def last[T](l: util.List[T]): T = l.get(l.size() - 1)

         ns.filter(t ⇒ t.isStopWord && !t.isBracketed).foreach(t ⇒
             if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
                 last(bufs) += t
             else
                 bufs.add(mutable.Buffer.empty[NCNlpSentenceToken] :+ t)
         )

         val idxsSeq = bufs.asScala.filter(_.lengthCompare(1) > 0).map(_.map(_.index))

         if (idxsSeq.nonEmpty) {
             val nsCopyToks = ns.clone()
             ns.clear()

             val buf = mutable.Buffer.empty[Int]

             for (i ← nsCopyToks.indices)
                 idxsSeq.find(_.contains(i)) match {
                     case Some(idxs) ⇒
                         if (!buf.contains(idxs.head)) {
                             buf += idxs.head

                             ns += mkCompound(ns, nsCopyToks, idxs, stop = true, ns.size, None, history)
                         }
                     case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
                 }

             fixIndexes(ns, userNoteTypes)
         }
     }

     /**
       * Fixes indexes for all notes after recreating tokens.
       *
       * @param ns Sentence.
       * @param userNoteTypes Notes types.
       */
     private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]) {
         // Replaces other notes indexes.
         for (t ← userNoteTypes :+ "nlpcraft:nlp"; note ← ns.getNotes(t)) {
             val toks = ns.filter(_.contains(note)).sortBy(_.index)

             val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).sorted)

             toks.foreach(t ⇒ {
                 t.remove(note)
                 t.add(newNote)
             })
         }

         // Special case - field index of core NLP note.
         ns.zipWithIndex.foreach { case (tok, idx) ⇒ ns.fixNote(tok.getNlpNote, "index" → idx) }
     }

     /**
       * Zip notes with same type.
       *
       * @param ns Sentence.
       * @param nType Notes type.
       * @param userNotesTypes Notes types.
       * @param history Indexes transformation history.
       */
     private def zipNotes(
         ns: NCNlpSentence,
         nType: String,
         userNotesTypes: Seq[String],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): Unit = {
         val nts = ns.getNotes(nType).filter(n ⇒ n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)

         val overlapped =
             nts.flatMap(n ⇒ n.tokenFrom to n.tokenTo).map(ns(_)).exists(
                 t ⇒ userNotesTypes.map(pt ⇒ t.getNotes(pt).size).sum > 1
             )

         if (nts.nonEmpty && !overlapped) {
             val nsCopyToks = ns.clone()
             ns.clear()

             val buf = mutable.ArrayBuffer.empty[Int]

             for (i ← nsCopyToks.indices)
                 nts.find(_.tokenIndexes.contains(i)) match {
                     case Some(n) ⇒
                         if (!buf.contains(n.tokenFrom)) {
                             buf += n.tokenFrom

                             ns += mkCompound(ns, nsCopyToks, n.tokenIndexes, stop = false, ns.size, Some(n), history)
                         }
                     case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
                 }

             fixIndexes(ns, userNotesTypes)
         }
     }

     /**
       * Makes compound note.
       *
       * @param ns Sentence.
       * @param nsCopyToks Tokens.
       * @param indexes Indexes.
       * @param stop Flag.
       * @param idx Index.
       * @param commonNote Common note.
       * @param history Indexes transformation history.
       */
     private def mkCompound(
         ns: NCNlpSentence,
         nsCopyToks: Seq[NCNlpSentenceToken],
         indexes: Seq[Int],
         stop: Boolean,
         idx: Int,
         commonNote: Option[NCNlpSentenceNote],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): NCNlpSentenceToken = {
         val t = NCNlpSentenceToken(idx)

         // Note, it adds stop-words too.
         val content = nsCopyToks.zipWithIndex.filter(p ⇒ indexes.contains(p._2)).map(_._1)

         content.foreach(t ⇒ history += t.index → idx)

         def mkValue(get: NCNlpSentenceToken ⇒ String): String = {
             val buf = mutable.Buffer.empty[String]

             val n = content.size - 1

             content.zipWithIndex.foreach(p ⇒ {
                 val t = p._1
                 val idx = p._2

                 buf += get(t)

                 if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
                     buf += " "
             })

             buf.mkString
         }

         val origText = mkValue((t: NCNlpSentenceToken) ⇒ t.origText)

         val idxs = Seq(idx)
         val wordIdxs = content.flatMap(_.wordIndexes).sorted

         val direct =
             commonNote match {
                 case Some(n) if n.isUser ⇒ n.isDirect
                 case _ ⇒ content.forall(_.isDirect)
             }

         val params = Seq(
             "index" → idx,
             "pos" → NCPennTreebank.SYNTH_POS,
             "posDesc" → NCPennTreebank.SYNTH_POS_DESC,
             "lemma" → mkValue((t: NCNlpSentenceToken) ⇒ t.lemma),
             "origText" → origText,
             "normText" → mkValue((t: NCNlpSentenceToken) ⇒ t.normText),
             "stem" → mkValue((t: NCNlpSentenceToken) ⇒ t.stem),
             "start" → content.head.startCharIndex,
             "end" → content.last.endCharIndex,
             "charLength" → origText.length,
             "quoted" → false,
             "stopWord" → stop,
             "bracketed" → false,
             "direct" → direct,
             "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
             "english" → nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
             "swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
         )

         val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)

         t.add(nlpNote)

         // Adds processed note with fixed indexes.
         commonNote match {
             case Some(n) ⇒
                 ns.removeNote(n)
                 t.add(n.clone(idxs, wordIdxs))
             case None ⇒ // No-op.
         }

         t
     }


     /**
       * Fixes notes with references list to other notes indexes.
       *
       * @param noteType Note type.
       * @param idxsField Indexes field.
       * @param noteField Note field.
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @return Valid flag.
       */
     private def fixIndexesReferencesList(
         noteType: String,
         idxsField: String,
         noteField: String,
         ns: NCNlpSentence,
         history: Seq[(Int, Int)]
     ): Boolean = {
         var ok = true

         for (tok ← ns.filter(_.isTypeOf(noteType)) if ok)
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) ⇒
                     val idxs: Seq[Seq[Int]] = n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
                     var fixed = idxs

                     history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct) }

                     if (fixed.forall(_.size == 1))
                     // Fix double dimension array to one dimension,
                     // so it should be called always in spite of 'fixIndexesReferences' method.
                     ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable])
                     else
                     ok = false
                 case None ⇒ // No-op.
             }

         ok &&
             ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
                 rel.dataOpt[java.util.List[Int]](idxsField) match {
                     case Some(idxsList) ⇒
                         val notesTypes = rel.data[util.List[String]](noteField)

                         require(idxsList.size() == notesTypes.size())

                         idxsList.asScala.zip(notesTypes.asScala).forall {
                             case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel)
                         }
                     case None ⇒ true
                 }
             )
     }

     /**
       * Fixes tokens positions.
       *
       * @param ns Sentence.
       * @param notNlpTypes Token types.
       */
     private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
         ns.
             filter(!_.isNlp).
             filter(_.isStopWord).
             flatten.
             filter(_.isNlp).
             foreach(n ⇒ ns.fixNote(n, "stopWord" → false))

         val nsNotes: Map[String, Seq[Int]] = ns.tokens.flatten.map(p ⇒ p.noteType → p.tokenIndexes).toMap

         for (
             t ← ns.tokens;
             stopReason ← t.stopsReasons
             if nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
         )
             ns.fixNote(t.getNlpNote, "stopWord" → true)

         val history = mutable.ArrayBuffer.empty[(Int, Int)]

         notNlpTypes.foreach(typ ⇒ zipNotes(ns, typ, notNlpTypes, history))

         unionStops(ns, notNlpTypes, history)

         val res =
             Seq("nlpcraft:relation", "nlpcraft:limit").forall(t ⇒ fixIndexesReferences(t, ns, history)) &&
                 fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
                 fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)

         if (res)
         // Validation (all indexes calculated well)
         require(
             !ns.flatten.
                 exists(n ⇒ ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t ⇒ !t.contains(n))),
             s"Invalid sentence:\n" +
                 ns.map(t ⇒
                     // Human readable invalid sentence for debugging.
                     s"${t.origText}{index:${t.index}}[${t.map(n ⇒ s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("|")}]"
                 ).mkString("\n")
         )

         res
     }
 }

 import org.apache.nlpcraft.common.nlp.NCNlpSentence._

 /**
   * Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
   * each note is a collection of KV pairs.
   *
   * @param srvReqId Server request ID.
   * @param text Normalized text.
   * @param weight Weight.
   * @param enabledBuiltInToks Enabled built-in tokens.
   * @param tokens Initial buffer.
   */
 class NCNlpSentence(
     val srvReqId: String,
     val text: String,
     val weight: Double,
     val enabledBuiltInToks: Set[String],
     override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
 ) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
     @transient
     private var hash: java.lang.Integer = _

     private def calcHash(): Int =
         Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)

     // Deep copy.
     override def clone(): NCNlpSentence =
         new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, tokens.map(_.clone()))

     /**
       * Utility method that gets set of notes for given note type collected from
       * tokens in this sentence. Notes are sorted in the same order they appear
       * in this sentence.
       *
       * @param noteType Note type.
       */
     def getNotes(noteType: String): Seq[NCNlpSentenceNote] = this.flatMap(_.getNotes(noteType)).distinct

     /**
       * Utility method that removes note with given ID from all tokens in this sentence.
       * No-op if such note wasn't found.
       *
       * @param note Note.
       */
     def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))

     //noinspection HashCodeUsesVar
     override def hashCode(): Int = {
         if (hash == null)
             hash = calcHash()

         hash
     }

     def fixNote(note: NCNlpSentenceNote, kvs: (String, java.io.Serializable)*): Unit = {
         val fixed = note.clone(kvs: _*)

         this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
             t.remove(note)
             t.add(fixed)
         })

         hash = null
     }

     /**
       * This collapser handles several tasks:
       * - "overall" collapsing after all other individual collapsers had their turn.
       * - Special further enrichment of tokens like linking, etc.
       *
       * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
       * lengths - the winning note is chosen based on this priority.
       *
       */
     @throws[NCE]
     def collapse(): Seq[NCNlpSentence] = {
         // Always deletes `similar` notes.
         // Some words with same note type can be detected various ways.
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
         this.flatten.filter(!_.isNlp).distinct.
             groupBy(_.getKey()).
             map(p ⇒ p._2.sortBy(p ⇒
                 (
                     // System notes don't have such flags.
                     if (p.isUser) {
                         if (p.isDirect) 0 else 1
                     }
                     else
                         0,
                     if (p.isUser) p.sparsity else 0
                 )
             )).
             flatMap(_.drop(1)).
             toSeq

         redundant.foreach(this.removeNote)

         def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
             toks.flatten.filter(!_.isNlp).distinct

         val delCombs: Seq[NCNlpSentenceNote] =
             getNotNlpNotes(this).
                 flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom, note.tokenTo + 1)).filter(_ != note)).
                 distinct

         val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
             delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                 groupBy { case (idx, _) ⇒ idx }.
                 map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
                 toSeq.sortBy(-_.size)

         val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1

         val sens =
             if (delCombs.nonEmpty) {
                 val deleted = mutable.ArrayBuffer.empty[Seq[NCNlpSentenceNote]]

                 val sens =
                     (minDelSize to delCombs.size).
                         flatMap(i ⇒
                             delCombs.combinations(i).
                                 filter(delComb ⇒ !toksByIdx.exists(_.count(note ⇒ !delComb.contains(note)) > 1))
                         ).
                         sortBy(_.size).
                         flatMap(delComb ⇒
                             // Already processed with less subset of same deleted tokens.
                             if (!deleted.exists(_.forall(delComb.contains))) {
                                 val nsClone = this.clone()

                                 delComb.foreach(nsClone.removeNote)

                                 // Has overlapped notes for some tokens.
                                 require(!nsClone.exists(_.count(!_.isNlp) > 1))

                                 deleted += delComb

                                 val notNlpTypes = getNotNlpNotes(nsClone).map(_.noteType).distinct

                                 if (collapseSentence(nsClone, notNlpTypes)) Some(nsClone) else None
                             }
                             else
                                 None
                         )

                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
                 case class Key(
                     sysNotes: Seq[Map[String, java.io.Serializable]],
                     userNotes: Seq[Map[String, java.io.Serializable]]
                 )
                 case class Value(sentence: NCNlpSentence, directCount: Int)

                 val m = mutable.HashMap.empty[Key, Value]

                 sens.map(sen ⇒ {
                     val notes = sen.flatten

                     val sysNotes = notes.filter(_.isSystem)
                     val nlpNotes = notes.filter(_.isNlp)
                     val userNotes = notes.filter(_.isUser)

                     def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, java.io.Serializable]] =
                         seq.map(p ⇒
                             // We have to delete some keys to have possibility to compare sentences.
                             p.clone().filter(_._1 != "direct")
                         )

                     (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
                 }).
                     foreach { case (key, sen, directCnt) ⇒
                         m.get(key) match {
                             case Some(v) ⇒
                                 // Best sentence is sentence with `direct` synonyms.
                                 if (v.directCount > directCnt)
                                     m += key → Value(sen, directCnt)
                             case None ⇒ m += key → Value(sen, directCnt)
                         }
                     }

                 m.values.map(_.sentence).toSeq
             }
             else {
                 if (collapseSentence(this, getNotNlpNotes(this).map(_.noteType).distinct)) Seq(this) else Seq.empty
             }.distinct

         sens.foreach(sen ⇒
             sen.foreach(tok ⇒
                 tok.size match {
                     case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
                     case 2 ⇒ require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
                     case _ ⇒ require(requirement = false, s"Unexpected token notes count: $tok")
                 }
             )
         )

         // Drops similar sentences (with same tokens structure).
         // Among similar sentences we prefer one with minimal free words count.
         sens.groupBy(_.flatten.filter(!_.isNlp).map(_.getKey(withIndexes = false))).
             map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq
     }

     /**
       * Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
       *
       * @param n1 First note.
       * @param n2 Second note.
       */
     def notesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
         if (n1.noteType != n2.noteType)
             false
         else {
             val stopIdxs = this.filter(_.isStopWord).map(_.index)

             // One possible difference - stopwords indexes.
             def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
                 val set1 = n1.wordIndexes.toSet
                 val set2 = n2.wordIndexes.toSet

                 set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(stopIdxs.contains)
             }

             def wordsEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
                 wordsEqualOrSimilar0(n1, n2) || wordsEqualOrSimilar0(n2, n1)

             def tokensEqualOrSimilar0(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
                 set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(_.isStopWord)

             def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
                 tokensEqualOrSimilar0(set1, set2) || tokensEqualOrSimilar0(set2, set1)

             def getList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
                 n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[java.util.List[Int]].asScala.
                     map(this (_)).toSet

             def getListList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
                 n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[java.util.List[java.util.List[Int]]].asScala.
                     flatMap(_.asScala.map(this (_))).toSet

             def referencesEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
                 require(n1.noteType == n2.noteType)

                 n1.noteType match {
                     case "nlpcraft:sort" ⇒
                         tokensEqualOrSimilar(getListList(n1, "subjindexes"), getListList(n2, "subjindexes")) &&
                             tokensEqualOrSimilar(getListList(n1, "byindexes"), getListList(n2, "byindexes"))
                     case "nlpcraft:limit" ⇒
                         tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
                     case "nlpcraft:reference" ⇒
                         tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))

                     case _ ⇒ true
                 }
             }

             def referencesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
                 referencesEqualOrSimilar0(n1, n2) || referencesEqualOrSimilar0(n2, n1)

             def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = n.getKey(withIndexes = false, withReferences = false)

             getUniqueKey0(n1) == getUniqueKey0(n2) && wordsEqualOrSimilar(n1, n2) && referencesEqualOrSimilar(n1, n2)
         }

     override def equals(obj: Any): Boolean = obj match {
         case x: NCNlpSentence ⇒
             tokens == x.tokens &&
                 srvReqId == x.srvReqId &&
                 text == x.text &&
                 enabledBuiltInToks == x.enabledBuiltInToks
         case _ ⇒ false
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.nlpcraft.common.nlp

	import java.util
	import java.util.Collections

	import org.apache.nlpcraft.common.NCE
	import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank

	import scala.collection.JavaConverters._
	import scala.collection.mutable.ArrayBuffer
	import scala.collection.{Map, Seq, Set, mutable}
	import scala.language.implicitConversions

	object NCNlpSentence {
	implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens

	/**
	*
	* @param ns
	* @param idxs
	* @param notesType
	* @param note
	* @return
	*/
	private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
	val types =
	idxs.flatMap(idx ⇒ {
	val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)

	types.size match {
	case 0 ⇒ None
	case 1 ⇒ Some(types.head)
	case _ ⇒ throw new AssertionError(s"Unexpected tokes: ${ns(idx)}")
	}
	}).distinct

	/**
	* Example:
	* 1. Sentence 'maximum x' (single element related function)
	* - maximum is aggregate function linked to date element.
	* - x defined as 2 elements: date and num.
	* So, the variant 'maximum x (as num)' should be excluded.
	* *
	* 2. Sentence 'compare x and y' (multiple elements related function)
	* - compare is relation function linked to date element.
	* - x an y defined as 2 elements: date and num.
	* So, variants 'x (as num) and x (as date)' and 'x (as date) and x (as num)'
	* should't be excluded, but invalid relation should be deleted for these combinations.
	*/
	types.size match {
	case 0 ⇒ throw new AssertionError(s"Unexpected empty types [notesType=$notesType]")
	case 1 ⇒ types.head == notesType
	case _ ⇒
	// Equal elements should be processed together with function element.
	if (types.size == 1)
	false
	else {
	ns.removeNote(note)

	true
	}
	}
	}

	/**
	* Fixes notes with references to other notes indexes.
	* Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
	*
	* @param noteType Note type.
	* @param ns Sentence.
	* @param history Indexes transformation history.
	* @return Valid flag.
	*/
	private def fixIndexesReferences(noteType: String, ns: NCNlpSentence, history: Seq[(Int, Int)]): Boolean = {
	ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
	tok.getNoteOpt(noteType, "indexes") match {
	case Some(n) ⇒
	val idxs: Seq[Int] = n.data[java.util.List[Int]]("indexes").asScala
	var fixed = idxs

	history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }

	fixed = fixed.distinct

	if (idxs != fixed)
	ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable])
	case None ⇒ // No-op.
	}
	)

	ns.flatMap(_.getNotes(noteType)).forall(
	n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String]("note"), n)
	)
	}

	/**
	* Copies token.
	*
	* @param ns Sentence.
	* @param history Indexes transformation history.
	* @param toksCopy Copied tokens.
	* @param i Index.
	*/
	private def simpleCopy(
	ns: NCNlpSentence,
	history: mutable.ArrayBuffer[(Int, Int)],
	toksCopy: NCNlpSentence, i: Int
	): Seq[NCNlpSentenceToken] = {
	val tokCopy = toksCopy(i)

	history += tokCopy.index → ns.size

	ns += tokCopy.clone(ns.size)
	}

	/**
	* Glues stop words.
	*
	* @param ns Sentence.
	* @param userNoteTypes Notes types.
	* @param history Indexes transformation history.
	*/
	private def unionStops(
	ns: NCNlpSentence,
	userNoteTypes: Seq[String],
	history: mutable.ArrayBuffer[(Int, Int)]
	): Unit = {
	// Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
	// Of compilation errors which seems as scala compiler internal error.
	val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()

	def last[T](l: util.List[T]): T = l.get(l.size() - 1)

	ns.filter(t ⇒ t.isStopWord && !t.isBracketed).foreach(t ⇒
	if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
	last(bufs) += t
	else
	bufs.add(mutable.Buffer.empty[NCNlpSentenceToken] :+ t)
	)

	val idxsSeq = bufs.asScala.filter(_.lengthCompare(1) > 0).map(_.map(_.index))

	if (idxsSeq.nonEmpty) {
	val nsCopyToks = ns.clone()
	ns.clear()

	val buf = mutable.Buffer.empty[Int]

	for (i ← nsCopyToks.indices)
	idxsSeq.find(_.contains(i)) match {
	case Some(idxs) ⇒
	if (!buf.contains(idxs.head)) {
	buf += idxs.head

	ns += mkCompound(ns, nsCopyToks, idxs, stop = true, ns.size, None, history)
	}
	case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
	}

	fixIndexes(ns, userNoteTypes)
	}
	}

	/**
	* Fixes indexes for all notes after recreating tokens.
	*
	* @param ns Sentence.
	* @param userNoteTypes Notes types.
	*/
	private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]) {
	// Replaces other notes indexes.
	for (t ← userNoteTypes :+ "nlpcraft:nlp"; note ← ns.getNotes(t)) {
	val toks = ns.filter(_.contains(note)).sortBy(_.index)

	val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).sorted)

	toks.foreach(t ⇒ {
	t.remove(note)
	t.add(newNote)
	})
	}

	// Special case - field index of core NLP note.
	ns.zipWithIndex.foreach { case (tok, idx) ⇒ ns.fixNote(tok.getNlpNote, "index" → idx) }
	}

	/**
	* Zip notes with same type.
	*
	* @param ns Sentence.
	* @param nType Notes type.
	* @param userNotesTypes Notes types.
	* @param history Indexes transformation history.
	*/
	private def zipNotes(
	ns: NCNlpSentence,
	nType: String,
	userNotesTypes: Seq[String],
	history: mutable.ArrayBuffer[(Int, Int)]
	): Unit = {
	val nts = ns.getNotes(nType).filter(n ⇒ n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)

	val overlapped =
	nts.flatMap(n ⇒ n.tokenFrom to n.tokenTo).map(ns(_)).exists(
	t ⇒ userNotesTypes.map(pt ⇒ t.getNotes(pt).size).sum > 1
	)

	if (nts.nonEmpty && !overlapped) {
	val nsCopyToks = ns.clone()
	ns.clear()

	val buf = mutable.ArrayBuffer.empty[Int]

	for (i ← nsCopyToks.indices)
	nts.find(_.tokenIndexes.contains(i)) match {
	case Some(n) ⇒
	if (!buf.contains(n.tokenFrom)) {
	buf += n.tokenFrom

	ns += mkCompound(ns, nsCopyToks, n.tokenIndexes, stop = false, ns.size, Some(n), history)
	}
	case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
	}

	fixIndexes(ns, userNotesTypes)
	}
	}

	/**
	* Makes compound note.
	*
	* @param ns Sentence.
	* @param nsCopyToks Tokens.
	* @param indexes Indexes.
	* @param stop Flag.
	* @param idx Index.
	* @param commonNote Common note.
	* @param history Indexes transformation history.
	*/
	private def mkCompound(
	ns: NCNlpSentence,
	nsCopyToks: Seq[NCNlpSentenceToken],
	indexes: Seq[Int],
	stop: Boolean,
	idx: Int,
	commonNote: Option[NCNlpSentenceNote],
	history: mutable.ArrayBuffer[(Int, Int)]
	): NCNlpSentenceToken = {
	val t = NCNlpSentenceToken(idx)

	// Note, it adds stop-words too.
	val content = nsCopyToks.zipWithIndex.filter(p ⇒ indexes.contains(p._2)).map(_._1)

	content.foreach(t ⇒ history += t.index → idx)

	def mkValue(get: NCNlpSentenceToken ⇒ String): String = {
	val buf = mutable.Buffer.empty[String]

	val n = content.size - 1

	content.zipWithIndex.foreach(p ⇒ {
	val t = p._1
	val idx = p._2

	buf += get(t)

	if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
	buf += " "
	})

	buf.mkString
	}

	val origText = mkValue((t: NCNlpSentenceToken) ⇒ t.origText)

	val idxs = Seq(idx)
	val wordIdxs = content.flatMap(_.wordIndexes).sorted

	val direct =
	commonNote match {
	case Some(n) if n.isUser ⇒ n.isDirect
	case _ ⇒ content.forall(_.isDirect)
	}

	val params = Seq(
	"index" → idx,
	"pos" → NCPennTreebank.SYNTH_POS,
	"posDesc" → NCPennTreebank.SYNTH_POS_DESC,
	"lemma" → mkValue((t: NCNlpSentenceToken) ⇒ t.lemma),
	"origText" → origText,
	"normText" → mkValue((t: NCNlpSentenceToken) ⇒ t.normText),
	"stem" → mkValue((t: NCNlpSentenceToken) ⇒ t.stem),
	"start" → content.head.startCharIndex,
	"end" → content.last.endCharIndex,
	"charLength" → origText.length,
	"quoted" → false,
	"stopWord" → stop,
	"bracketed" → false,
	"direct" → direct,
	"dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
	"english" → nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
	"swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
	)

	val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)

	t.add(nlpNote)

	// Adds processed note with fixed indexes.
	commonNote match {
	case Some(n) ⇒
	ns.removeNote(n)
	t.add(n.clone(idxs, wordIdxs))
	case None ⇒ // No-op.
	}

	t
	}


	/**
	* Fixes notes with references list to other notes indexes.
	*
	* @param noteType Note type.
	* @param idxsField Indexes field.
	* @param noteField Note field.
	* @param ns Sentence.
	* @param history Indexes transformation history.
	* @return Valid flag.
	*/
	private def fixIndexesReferencesList(
	noteType: String,
	idxsField: String,
	noteField: String,
	ns: NCNlpSentence,
	history: Seq[(Int, Int)]
	): Boolean = {
	var ok = true

	for (tok ← ns.filter(_.isTypeOf(noteType)) if ok)
	tok.getNoteOpt(noteType, idxsField) match {
	case Some(n) ⇒
	val idxs: Seq[Seq[Int]] = n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
	var fixed = idxs

	history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct) }

	if (fixed.forall(_.size == 1))
	// Fix double dimension array to one dimension,
	// so it should be called always in spite of 'fixIndexesReferences' method.
	ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable])
	else
	ok = false
	case None ⇒ // No-op.
	}

	ok &&
	ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
	rel.dataOpt[java.util.List[Int]](idxsField) match {
	case Some(idxsList) ⇒
	val notesTypes = rel.data[util.List[String]](noteField)

	require(idxsList.size() == notesTypes.size())

	idxsList.asScala.zip(notesTypes.asScala).forall {
	case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel)
	}
	case None ⇒ true
	}
	)
	}

	/**
	* Fixes tokens positions.
	*
	* @param ns Sentence.
	* @param notNlpTypes Token types.
	*/
	private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
	ns.
	filter(!_.isNlp).
	filter(_.isStopWord).
	flatten.
	filter(_.isNlp).
	foreach(n ⇒ ns.fixNote(n, "stopWord" → false))

	val nsNotes: Map[String, Seq[Int]] = ns.tokens.flatten.map(p ⇒ p.noteType → p.tokenIndexes).toMap

	for (
	t ← ns.tokens;
	stopReason ← t.stopsReasons
	if nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
	)
	ns.fixNote(t.getNlpNote, "stopWord" → true)

	val history = mutable.ArrayBuffer.empty[(Int, Int)]

	notNlpTypes.foreach(typ ⇒ zipNotes(ns, typ, notNlpTypes, history))

	unionStops(ns, notNlpTypes, history)

	val res =
	Seq("nlpcraft:relation", "nlpcraft:limit").forall(t ⇒ fixIndexesReferences(t, ns, history)) &&
	fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
	fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)

	if (res)
	// Validation (all indexes calculated well)
	require(
	!ns.flatten.
	exists(n ⇒ ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t ⇒ !t.contains(n))),
	s"Invalid sentence:\n" +
	ns.map(t ⇒
	// Human readable invalid sentence for debugging.
	s"${t.origText}{index:${t.index}}[${t.map(n ⇒ s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("\|")}]"
	).mkString("\n")
	)

	res
	}
	}

	import org.apache.nlpcraft.common.nlp.NCNlpSentence._

	/**
	* Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
	* each note is a collection of KV pairs.
	*
	* @param srvReqId Server request ID.
	* @param text Normalized text.
	* @param weight Weight.
	* @param enabledBuiltInToks Enabled built-in tokens.
	* @param tokens Initial buffer.
	*/
	class NCNlpSentence(
	val srvReqId: String,
	val text: String,
	val weight: Double,
	val enabledBuiltInToks: Set[String],
	override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
	) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
	@transient
	private var hash: java.lang.Integer = _

	private def calcHash(): Int =
	Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)

	// Deep copy.
	override def clone(): NCNlpSentence =
	new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, tokens.map(_.clone()))

	/**
	* Utility method that gets set of notes for given note type collected from
	* tokens in this sentence. Notes are sorted in the same order they appear
	* in this sentence.
	*
	* @param noteType Note type.
	*/
	def getNotes(noteType: String): Seq[NCNlpSentenceNote] = this.flatMap(_.getNotes(noteType)).distinct

	/**
	* Utility method that removes note with given ID from all tokens in this sentence.
	* No-op if such note wasn't found.
	*
	* @param note Note.
	*/
	def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))

	//noinspection HashCodeUsesVar
	override def hashCode(): Int = {
	if (hash == null)
	hash = calcHash()

	hash
	}

	def fixNote(note: NCNlpSentenceNote, kvs: (String, java.io.Serializable)*): Unit = {
	val fixed = note.clone(kvs: _*)

	this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
	t.remove(note)
	t.add(fixed)
	})

	hash = null
	}

	/**
	* This collapser handles several tasks:
	* - "overall" collapsing after all other individual collapsers had their turn.
	* - Special further enrichment of tokens like linking, etc.
	*
	* In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
	* lengths - the winning note is chosen based on this priority.
	*
	*/
	@throws[NCE]
	def collapse(): Seq[NCNlpSentence] = {
	// Always deletes `similar` notes.
	// Some words with same note type can be detected various ways.
	// We keep only one variant - with `best` direct and sparsity parameters,
	// other variants for these words are redundant.
	val redundant: Seq[NCNlpSentenceNote] =
	this.flatten.filter(!_.isNlp).distinct.
	groupBy(_.getKey()).
	map(p ⇒ p._2.sortBy(p ⇒
	(
	// System notes don't have such flags.
	if (p.isUser) {
	if (p.isDirect) 0 else 1
	}
	else
	0,
	if (p.isUser) p.sparsity else 0
	)
	)).
	flatMap(_.drop(1)).
	toSeq

	redundant.foreach(this.removeNote)

	def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
	toks.flatten.filter(!_.isNlp).distinct

	val delCombs: Seq[NCNlpSentenceNote] =
	getNotNlpNotes(this).
	flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom, note.tokenTo + 1)).filter(_ != note)).
	distinct

	val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
	delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
	groupBy { case (idx, _) ⇒ idx }.
	map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
	toSeq.sortBy(-_.size)

	val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1

	val sens =
	if (delCombs.nonEmpty) {
	val deleted = mutable.ArrayBuffer.empty[Seq[NCNlpSentenceNote]]

	val sens =
	(minDelSize to delCombs.size).
	flatMap(i ⇒
	delCombs.combinations(i).
	filter(delComb ⇒ !toksByIdx.exists(_.count(note ⇒ !delComb.contains(note)) > 1))
	).
	sortBy(_.size).
	flatMap(delComb ⇒
	// Already processed with less subset of same deleted tokens.
	if (!deleted.exists(_.forall(delComb.contains))) {
	val nsClone = this.clone()

	delComb.foreach(nsClone.removeNote)

	// Has overlapped notes for some tokens.
	require(!nsClone.exists(_.count(!_.isNlp) > 1))

	deleted += delComb

	val notNlpTypes = getNotNlpNotes(nsClone).map(_.noteType).distinct

	if (collapseSentence(nsClone, notNlpTypes)) Some(nsClone) else None
	}
	else
	None
	)

	// It removes sentences which have only one difference - 'direct' flag of their user tokens.
	// `Direct` sentences have higher priority.
	case class Key(
	sysNotes: Seq[Map[String, java.io.Serializable]],
	userNotes: Seq[Map[String, java.io.Serializable]]
	)
	case class Value(sentence: NCNlpSentence, directCount: Int)

	val m = mutable.HashMap.empty[Key, Value]

	sens.map(sen ⇒ {
	val notes = sen.flatten

	val sysNotes = notes.filter(_.isSystem)
	val nlpNotes = notes.filter(_.isNlp)
	val userNotes = notes.filter(_.isUser)

	def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, java.io.Serializable]] =
	seq.map(p ⇒
	// We have to delete some keys to have possibility to compare sentences.
	p.clone().filter(_._1 != "direct")
	)

	(Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
	}).
	foreach { case (key, sen, directCnt) ⇒
	m.get(key) match {
	case Some(v) ⇒
	// Best sentence is sentence with `direct` synonyms.
	if (v.directCount > directCnt)
	m += key → Value(sen, directCnt)
	case None ⇒ m += key → Value(sen, directCnt)
	}
	}

	m.values.map(_.sentence).toSeq
	}
	else {
	if (collapseSentence(this, getNotNlpNotes(this).map(_.noteType).distinct)) Seq(this) else Seq.empty
	}.distinct

	sens.foreach(sen ⇒
	sen.foreach(tok ⇒
	tok.size match {
	case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
	case 2 ⇒ require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
	case _ ⇒ require(requirement = false, s"Unexpected token notes count: $tok")
	}
	)
	)

	// Drops similar sentences (with same tokens structure).
	// Among similar sentences we prefer one with minimal free words count.
	sens.groupBy(_.flatten.filter(!_.isNlp).map(_.getKey(withIndexes = false))).
	map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
	toSeq
	}

	/**
	* Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
	*
	* @param n1 First note.
	* @param n2 Second note.
	*/
	def notesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
	if (n1.noteType != n2.noteType)
	false
	else {
	val stopIdxs = this.filter(_.isStopWord).map(_.index)

	// One possible difference - stopwords indexes.
	def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
	val set1 = n1.wordIndexes.toSet
	val set2 = n2.wordIndexes.toSet

	set1 == set2 \|\| set1.subsetOf(set2) && set2.diff(set1).forall(stopIdxs.contains)
	}

	def wordsEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
	wordsEqualOrSimilar0(n1, n2) \|\| wordsEqualOrSimilar0(n2, n1)

	def tokensEqualOrSimilar0(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
	set1 == set2 \|\| set1.subsetOf(set2) && set2.diff(set1).forall(_.isStopWord)

	def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
	tokensEqualOrSimilar0(set1, set2) \|\| tokensEqualOrSimilar0(set2, set1)

	def getList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
	n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[java.util.List[Int]].asScala.
	map(this (_)).toSet

	def getListList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
	n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[java.util.List[java.util.List[Int]]].asScala.
	flatMap(_.asScala.map(this (_))).toSet

	def referencesEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
	require(n1.noteType == n2.noteType)

	n1.noteType match {
	case "nlpcraft:sort" ⇒
	tokensEqualOrSimilar(getListList(n1, "subjindexes"), getListList(n2, "subjindexes")) &&
	tokensEqualOrSimilar(getListList(n1, "byindexes"), getListList(n2, "byindexes"))
	case "nlpcraft:limit" ⇒
	tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
	case "nlpcraft:reference" ⇒
	tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))

	case _ ⇒ true
	}
	}

	def referencesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
	referencesEqualOrSimilar0(n1, n2) \|\| referencesEqualOrSimilar0(n2, n1)

	def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = n.getKey(withIndexes = false, withReferences = false)

	getUniqueKey0(n1) == getUniqueKey0(n2) && wordsEqualOrSimilar(n1, n2) && referencesEqualOrSimilar(n1, n2)
	}

	override def equals(obj: Any): Boolean = obj match {
	case x: NCNlpSentence ⇒
	tokens == x.tokens &&
	srvReqId == x.srvReqId &&
	text == x.text &&
	enabledBuiltInToks == x.enabledBuiltInToks
	case _ ⇒ false
	}
	}