nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala - incubator-nlpcraft - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      https://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.nlpcraft.probe.mgrs.sentence

 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCE, NCService, U, _}
 import org.apache.nlpcraft.model.{NCModel, NCToken}
 import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey

 import java.io.{Serializable => JSerializable}
 import java.util
 import java.util.{List => JList}
 import scala.collection.mutable
 import scala.collection.parallel.CollectionConverters._
 import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava, SetHasAsJava}
 import scala.language.implicitConversions

 /**
   * Sentences processing manager.
   */
 object NCSentenceManager extends NCService {
     @volatile private var pool: java.util.concurrent.ForkJoinPool = _

     type CacheKey = Seq[Set[NCNlpSentenceNote]]
     type CacheValue = Seq[Seq[NCNlpSentenceNote]]
     private val combCache = mutable.HashMap.empty[String, mutable.HashMap[CacheKey, CacheValue]]

     /**
       *
       * @param notes
       */
     private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
         val noteLinks = mutable.ArrayBuffer.empty[NoteLink]

         for (n <- notes.filter(n => n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
             noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala.toSeq.sorted)

         for (n <- notes.filter(_.noteType == "nlpcraft:sort")) {
             def add(noteName: String, idxsName: String): Unit = {
                 val names = n(noteName).asInstanceOf[JList[String]]
                 val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]

                 require(names.size() == idxsSeq.size())

                 noteLinks ++=
                     (
                         for ((name, idxs) <- names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
                             yield NoteLink(name, idxs.sorted.toSeq)
                     )
             }

             if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
             if (n.contains("bynotes")) add("bynotes", "byindexes")
         }

         noteLinks
     }

     /**
       *
       * @param notes
       */
     private def getPartKeys(notes: NCNlpSentenceNote*): Seq[NCTokenPartKey] =
         notes.
             filter(_.isUser).
             flatMap(n => {
                 val optList: Option[JList[NCTokenPartKey]] = n.dataOpt("parts")

                 optList
             }).flatMap(_.asScala).distinct

     /**
       *
       * @param ns
       * @param idxs
       * @param notesType
       * @param note
       * @return
       */
     private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
         val types = idxs.flatMap(idx => ns(idx).map(p => p).filter(!_.isNlp).map(_.noteType)).distinct

         /**
           * Example:
           * 1. Sentence 'maximum x' (single element related function)
           * - maximum is aggregate function linked to date element.
           * - x defined as 2 elements: date and num.
           * So, the variant 'maximum x (as num)' should be excluded.
           * *
           * 2. Sentence 'compare x and y' (multiple elements related function)
           * - compare is relation function linked to date element.
           * - x an y defined as 2 elements: date and num.
           * So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
           * should not be excluded, but invalid relation should be deleted for these combinations.
           */
         types.size match {
             case 0 => false
             case 1 => types.head == notesType
             case _ =>
                 // Equal elements should be processed together with function element.
                 if (types.size == 1)
                     false
                 else {
                     ns.removeNote(note)

                     logger.trace(s"Removed note: $note")

                     true
                 }
         }
     }

     /**
       * Fixes notes with references to other notes indexes.
       * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
       *
       * @param noteType Note type.
       * @param idxsField Indexes field.
       * @param noteField Note field.
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @return Valid flag.
       */
     private def fixIndexesReferences(
         noteType: String,
         idxsField: String,
         noteField: String,
         ns: NCNlpSentence,
         history: Seq[(Int, Int)]
     ): Boolean = {
         ns.filter(_.isTypeOf(noteType)).foreach(tok =>
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) =>
                     val idxs: Seq[Int] = n.data[JList[Int]](idxsField).asScala.toSeq
                     var fixed = idxs

                     history.foreach { case (idxOld, idxNew) => fixed = fixed.map(i => if (i == idxOld) idxNew else i) }

                     fixed = fixed.distinct

                     if (idxs != fixed)
                         ns.fixNote(n, "indexes" -> fixed.asJava.asInstanceOf[JSerializable])
                 case None => // No-op.
             }
         )

         ns.flatMap(_.getNotes(noteType)).forall(
             n => checkRelation(ns, n.data[JList[Int]]("indexes").asScala.toSeq, n.data[String](noteField), n)
         )
     }

     /**
       *
       * @param note
       * @param idxsField
       * @param noteField
       * @param ns
       */
     private def fixNoteIndexes(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
         ns.flatMap(_.getNotes(note)).foreach(
             n => checkRelation(ns, n.data[JList[Int]](idxsField).asScala.toSeq, n.data[String](noteField), n)
         )

     /**
       *
       * @param note
       * @param idxsField
       * @param noteField
       * @param ns
       */
     private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
         ns.flatMap(_.getNotes(note)).foreach(rel =>
             rel.dataOpt[JList[JList[Int]]](idxsField) match {
                 case Some(idxsList) =>
                     val notesTypes = rel.data[JList[String]](noteField)

                     require(idxsList.size() == notesTypes.size())

                     idxsList.asScala.zip(notesTypes.asScala).foreach {
                         case (idxs, notesType) => checkRelation(ns, idxs.asScala.toSeq, notesType, rel)
                     }
                 case None => // No-op.
             }
         )

     /**
       * Copies token.
       *
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @param toksCopy Copied tokens.
       * @param i Index.
       */
     private def simpleCopy(
         ns: NCNlpSentence,
         history: mutable.ArrayBuffer[(Int, Int)],
         toksCopy: NCNlpSentence, i: Int
     ): Seq[NCNlpSentenceToken] = {
         val tokCopy = toksCopy(i)

         history += tokCopy.index -> ns.size

         ns += tokCopy.clone(ns.size)
     }

     /**
       * Glues stop words.
       *
       * @param ns Sentence.
       * @param userNoteTypes Notes types.
       * @param history Indexes transformation history.
       */
     private def unionStops(
         ns: NCNlpSentence,
         userNoteTypes: Seq[String],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): Unit = {
         // Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
         // Of compilation errors which seems as scala compiler internal error.
         val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()

         def last[T](l: JList[T]): T = l.get(l.size() - 1)

         ns.filter(t => t.isStopWord && !t.isBracketed).foreach(t =>
             if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
                 last(bufs) += t
             else
                 bufs.add(mutable.Buffer.empty[NCNlpSentenceToken] :+ t)
         )

         val idxsSeq = bufs.asScala.filter(_.lengthCompare(1) > 0).map(_.map(_.index))

         if (idxsSeq.nonEmpty) {
             val nsCopyToks = ns.clone()
             ns.clear()

             val buf = mutable.Buffer.empty[Int]

             for (i <- nsCopyToks.indices)
                 idxsSeq.find(_.contains(i)) match {
                     case Some(idxs) =>
                         if (!buf.contains(idxs.head)) {
                             buf += idxs.head

                             ns += mkCompound(ns, nsCopyToks.toSeq, idxs.toSeq, stop = true, ns.size, None, history)
                         }
                     case None => simpleCopy(ns, history, nsCopyToks, i)
                 }

             fixIndexes(ns, userNoteTypes)
         }
     }

     /**
       * Fixes indexes for all notes after recreating tokens.
       *
       * @param ns Sentence.
       * @param userNoteTypes Notes types.
       */
     private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]): Unit = {
         // Replaces other notes indexes.
         for (t <- userNoteTypes :+ "nlpcraft:nlp"; note <- ns.getNotes(t)) {
             val toks = ns.filter(_.contains(note)).sortBy(_.index)

             val newNote = note.clone(toks.map(_.index).toSeq, toks.flatMap(_.wordIndexes).toSeq.sorted)

             toks.foreach(t => {
                 t.remove(note)
                 t.add(newNote)
             })
         }

         // Special case - field index of core NLP note.
         ns.zipWithIndex.foreach { case (tok, idx) => ns.fixNote(tok.getNlpNote, "index" -> idx) }
     }

     /**
       * Zip notes with same type.
       *
       * @param ns Sentence.
       * @param nType Notes type.
       * @param userNotesTypes Notes types.
       * @param history Indexes transformation history.
       */
     private def zipNotes(
         ns: NCNlpSentence,
         nType: String,
         userNotesTypes: Seq[String],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): Unit = {
         val nts = ns.getNotes(nType).filter(n => n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)

         val overlapped =
             nts.flatMap(n => n.tokenFrom to n.tokenTo).map(ns(_)).exists(
                 t => userNotesTypes.map(pt => t.getNotes(pt).size).sum > 1
             )

         if (nts.nonEmpty && !overlapped) {
             val nsCopyToks = ns.clone()
             ns.clear()

             val buf = mutable.ArrayBuffer.empty[Int]

             for (i <- nsCopyToks.indices)
                 nts.find(_.tokenIndexes.contains(i)) match {
                     case Some(n) =>
                         if (!buf.contains(n.tokenFrom)) {
                             buf += n.tokenFrom

                             ns += mkCompound(ns, nsCopyToks.toSeq, n.tokenIndexes, stop = false, ns.size, Some(n), history)
                         }
                     case None => simpleCopy(ns, history, nsCopyToks, i)
                 }

             fixIndexes(ns, userNotesTypes)
         }
     }

     /**
       * Makes compound note.
       *
       * @param ns Sentence.
       * @param nsCopyToks Tokens.
       * @param indexes Indexes.
       * @param stop Flag.
       * @param idx Index.
       * @param commonNote Common note.
       * @param history Indexes transformation history.
       */
     private def mkCompound(
         ns: NCNlpSentence,
         nsCopyToks: Seq[NCNlpSentenceToken],
         indexes: Seq[Int],
         stop: Boolean,
         idx: Int,
         commonNote: Option[NCNlpSentenceNote],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): NCNlpSentenceToken = {
         val t = NCNlpSentenceToken(idx)

         // Note, it adds stop-words too.
         val content = nsCopyToks.zipWithIndex.filter(p => indexes.contains(p._2)).map(_._1)

         content.foreach(t => history += t.index -> idx)

         def mkValue(get: NCNlpSentenceToken => String): String = {
             val buf = mutable.Buffer.empty[String]

             val n = content.size - 1

             content.zipWithIndex.foreach(p => {
                 val t = p._1
                 val idx = p._2

                 buf += get(t)

                 if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
                     buf += " "
             })

             buf.mkString
         }

         val origText = mkValue((t: NCNlpSentenceToken) => t.origText)

         val idxs = Seq(idx)
         val wordIdxs = content.flatMap(_.wordIndexes).sorted

         val direct =
             commonNote match {
                 case Some(n) if n.isUser => n.isDirect
                 case _ => content.forall(_.isDirect)
             }

         val params = Seq(
             "index" -> idx,
             "pos" -> NCPennTreebank.SYNTH_POS,
             "posDesc" -> NCPennTreebank.SYNTH_POS_DESC,
             "lemma" -> mkValue((t: NCNlpSentenceToken) => t.lemma),
             "origText" -> origText,
             "normText" -> mkValue((t: NCNlpSentenceToken) => t.normText),
             "stem" -> mkValue((t: NCNlpSentenceToken) => t.stem),
             "start" -> content.head.startCharIndex,
             "end" -> content.last.endCharIndex,
             "charLength" -> origText.length,
             "quoted" -> false,
             "stopWord" -> stop,
             "bracketed" -> false,
             "direct" -> direct,
             "dict" -> (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
             "english" -> nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
             "swear" -> nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
         )

         val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)

         t.add(nlpNote)

         // Adds processed note with fixed indexes.
         commonNote match {
             case Some(n) =>
                 ns.removeNote(n)
                 t.add(n.clone(idxs, wordIdxs))
             case None => // No-op.
         }

         t
     }

     /**
       * Fixes notes with references list to other notes indexes.
       *
       * @param noteType Note type.
       * @param idxsField Indexes field.
       * @param noteField Note field.
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @return Valid flag.
       */
     private def fixIndexesReferencesList(
         noteType: String,
         idxsField: String,
         noteField: String,
         ns: NCNlpSentence,
         history: Seq[(Int, Int)]
     ): Boolean = {
         var ok = true

         for (tok <- ns.filter(_.isTypeOf(noteType)) if ok)
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) =>
                     val idxs: Seq[Seq[Int]] =
                         n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala.toSeq).toSeq
                     var fixed = idxs

                     history.foreach {
                         case (idxOld, idxNew) => fixed = fixed.map(_.map(i => if (i == idxOld) idxNew else i).distinct)
                     }

                     if (fixed.forall(_.size == 1))
                     // Fix double dimension array to one dimension,
                     // so it should be called always in spite of 'fixIndexesReferences' method.
                         ns.fixNote(n, idxsField -> fixed.map(_.head).asJava.asInstanceOf[JSerializable])
                     else
                         ok = false
                 case None => // No-op.
             }

         ok &&
             ns.flatMap(_.getNotes(noteType)).forall(rel =>
                 rel.dataOpt[JList[Int]](idxsField) match {
                     case Some(idxsList) =>
                         val notesTypes = rel.data[JList[String]](noteField)

                         require(idxsList.size() == notesTypes.size())

                         idxsList.asScala.zip(notesTypes.asScala).forall {
                             case (idxs, notesType) => checkRelation(ns, Seq(idxs), notesType, rel)
                         }
                     case None => true
                 }
             )
     }

     /**
       * Fixes tokens positions.
       *
       * @param ns Sentence.
       * @param notNlpTypes Token types.
       */
     private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
         ns.
             filter(!_.isNlp).
             filter(_.isStopWord).
             flatten.
             filter(_.isNlp).
             foreach(n => ns.fixNote(n, "stopWord" -> false))

         val all = ns.tokens.flatten
         val nsNotes: Map[String, Seq[Int]] = all.map(p => p.noteType -> p.tokenIndexes).toMap

         for (
             t <- ns.tokens; stopReason <- t.stopsReasons
                 if all.contains(stopReason) && nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
         )
             ns.fixNote(t.getNlpNote, "stopWord" -> true)

         val history = mutable.ArrayBuffer.empty[(Int, Int)]

         fixNoteIndexes("nlpcraft:relation", "indexes", "note", ns)
         fixNoteIndexes("nlpcraft:limit", "indexes", "note", ns)
         fixNoteIndexesList("nlpcraft:sort", "subjindexes", "subjnotes", ns)
         fixNoteIndexesList("nlpcraft:sort", "byindexes", "bynotes", ns)

         notNlpTypes.foreach(typ => zipNotes(ns, typ, notNlpTypes, history))
         unionStops(ns, notNlpTypes, history)

         val histSeq = history.toSeq

         val res =
             fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, histSeq) &&
             fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, histSeq) &&
             fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, histSeq) &&
             fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, histSeq)

         if (res) {
             // Validation (all indexes calculated well)
             require(
                 !res ||
                     !ns.flatten.
                         exists(n => ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t => !t.contains(n))),
                 s"Invalid sentence:\n" +
                     ns.map(t =>
                         // Human readable invalid sentence for debugging.
                         s"${t.origText}{index:${t.index}}[${t.map(n => s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("|")}]"
                     ).mkString("\n")
             )
         }

         res
     }

     /**
       *
       * @param mdl
       * @param ns
       */
     private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
         if (!mdl.getAbstractTokens.isEmpty) {
             val notes = ns.flatten

             val keys = getPartKeys(notes: _*)
             val noteLinks = getLinks(notes)

             notes.filter(n => {
                 val noteToks = ns.tokens.filter(_.contains(n))

                 mdl.getAbstractTokens.contains(n.noteType) &&
                     !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
                     !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
             }).foreach(ns.removeNote)
         }

     /**
       *
       * @param toks
       * @return
       */
     private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
         toks.flatten.filter(!_.isNlp).distinct

     /**
       *
       * @param thisSen
       * @param sen
       * @param dels
       */
     private def addDeleted(thisSen: NCNlpSentence, sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
         sen.addDeletedNotes(dels.map(n => {
             val savedDelNote = n.clone()
             val savedDelToks = n.tokenIndexes.map(idx => thisSen(idx).clone())

             val mainNotes = savedDelToks.flatten.filter(n => n.noteType != "nlpcraft:nlp" && n != savedDelNote)

             // Deleted note's tokens should contains only nlp data and deleted notes.
             for (savedDelTok <- savedDelToks; mainNote <- mainNotes)
                 savedDelTok.remove(mainNote)

             savedDelNote -> savedDelToks
         }).toMap)

     /**
       * This collapser handles several tasks:
       * - "overall" collapsing after all other individual collapsers had their turn.
       * - Special further enrichment of tokens like linking, etc.
       *
       * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
       * lengths - the winning note is chosen based on this priority.
       */
     @throws[NCE]
     private def collapseSentence(sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
         def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
             if (lastPhase)
                 dropAbstract(mdl, ns)

             if (collapseSentence(ns, getNotNlpNotes(ns.toSeq).map(_.noteType).distinct)) Some(ns) else None
         }

         // Always deletes `similar` notes.
         // Some words with same note type can be detected various ways.
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
             sen.flatten.filter(!_.isNlp).distinct.
                 groupBy(_.getKey()).
                 map(p => p._2.sortBy(p =>
                     (
                         // System notes don't have such flags.
                         if (p.isUser) {
                             if (p.isDirect)
                                 0
                             else
                                 1
                         }
                         else
                             0,
                         if (p.isUser)
                             p.sparsity
                         else
                             0
                     )
                 )).
                 flatMap(_.drop(1)).
                 toSeq

         redundant.foreach(sen.removeNote)

         var delCombs: Seq[NCNlpSentenceNote] =
             getNotNlpNotes(sen.toSeq).
                 flatMap(note => getNotNlpNotes(note.tokenIndexes.sorted.map(i => sen(i))).filter(_ != note)).
                 distinct

         // Optimization. Deletes all wholly swallowed notes.
         val links = getLinks(sen.tokens.toSeq.flatten)

         val swallowed =
             delCombs.
                 // There aren't links on it.
                 filter(n => !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
                 // It doesn't have links.
                 filter(getPartKeys(_).isEmpty).
                 flatMap(note => {
                     val noteWordsIdxs = note.wordIndexes.toSet
                     val key = NCTokenPartKey(note, sen)

                     val delCombOthers =
                         delCombs.filter(_ != note).flatMap(n => if (getPartKeys(n).contains(key)) Some(n) else None)

                     if (
                         delCombOthers.exists(o => noteWordsIdxs == o.wordIndexes.toSet) ||
                         delCombOthers.nonEmpty && !delCombOthers.exists(o => noteWordsIdxs.subsetOf(o.wordIndexes.toSet))
                     )
                         Some(note)
                     else
                         None
                 })

         delCombs = delCombs.filter(p => !swallowed.contains(p))
         addDeleted(sen, sen, swallowed)
         swallowed.foreach(sen.removeNote)

         var sens =
             if (delCombs.nonEmpty) {
                 val toksByIdx =
                     delCombs.flatMap(note => note.wordIndexes.map(_ -> note)).
                         groupBy { case (idx, _) => idx }.
                         map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
                         toSeq.sortBy(-_.size)

                 def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
                     NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq)

                 val seqSens =
                     combCache.
                         getOrElseUpdate(sen.srvReqId, mutable.HashMap.empty[CacheKey, CacheValue]).
                         getOrElseUpdate(
                             toksByIdx,
                             findCombinations()
                         ).par.
                         flatMap(delComb => {
                             val nsClone = sen.clone()

                             // Saves deleted notes for sentence and their tokens.
                             addDeleted(sen, nsClone, delComb)
                             delComb.foreach(nsClone.removeNote)

                             // Has overlapped notes for some tokens.
                             require(!nsClone.exists(_.count(!_.isNlp) > 1))

                             collapse0(nsClone)
                         }).seq

                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
                 type Key = Seq[Map[String, JSerializable]]
                 case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)

                 def mkHolder(sen: NCNlpSentence): Holder = {
                     val notes = sen.flatten

                     Holder(
                         // We have to delete some keys to have possibility to compare sentences.
                         notes.map(_.clone().filter { case (name, _) => name != "direct" }).toSeq,
                         sen,
                         notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
                     )
                 }

                 seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) => seq.minBy(_.factor).sentence }.toSeq
             }
             else
                 collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)

         sens = sens.distinct

         sens.par.foreach(sen =>
             sen.foreach(tok =>
                 tok.size match {
                     case 1 => require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
                     case 2 => require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
                     case _ => require(requirement = false, s"Unexpected token notes count: $tok")
                 }
             )
         )

         def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp)

         // Drops similar sentences (with same notes structure). Keeps with more found.
         sens = sens.groupBy(notNlpNotes(_).groupBy(_.noteType).keys.toSeq.sorted.distinct).
             flatMap(p => {
                 val m: Map[NCNlpSentence, Int] = p._2.map(p => p -> notNlpNotes(p).size).toMap

                 val max = m.values.max

                 m.filter(_._2 == max).keys
             }).
             toSeq

         sens =
             sens.filter(s => {
                 def mkNotNlp(s: NCNlpSentence): Set[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp).toSet

                 val notNlpNotes = mkNotNlp(s)

                 !sens.filter(_ != s).map(mkNotNlp).exists(notNlpNotes.subsetOf)
             })

         // Drops similar sentences (with same tokens structure).
         // Among similar sentences we prefer one with minimal free words count.
         sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
             map { case (_, seq) => seq.minBy(_.filter(p => p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq
     }

     override def start(parent: Span): NCService = {
         ackStarting()

         pool = new java.util.concurrent.ForkJoinPool()

         ackStarted()
     }

     override def stop(parent: Span): Unit = {
         ackStopping()

         U.shutdownPool(pool)

         ackStopped()
     }

     /**
       *
       * @param mdl
       * @param sen
       * @param lastPhase
       */
     def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean = false): Seq[NCNlpSentence] =
         collapseSentence(sen, mdl, lastPhase)

     /**
       *
       * @param srvReqId
       */
     def clearCache(srvReqId: String): Unit = combCache -= srvReqId


     /**
       *
       * @param convTok
       * @param nonConvToks
       * @param allConvToks
       */
     def fixMeta(convTok: NCToken, nonConvToks: Seq[NCToken], allConvToks: Seq[NCToken]): Unit =
         convTok.getId match {
             case "nlpcraft:sort" =>
                 def fix(notesName: String, idxsName: String): Unit = {
                     val notes = convTok.meta[JList[String]](s"nlpcraft:sort:$notesName")
                     val idxs = convTok.meta[JList[Int]](s"nlpcraft:sort:$idxsName")

                     require(notes == null && idxs == null || notes.size() == idxs.size())

                     if (notes != null && !notes.isEmpty) {
                         val data: Seq[(String, Int)] =
                             notes.asScala.zip(idxs.asScala).map { case (note, idx) =>
                                 nonConvToks.find(t => t.getId == note && t.getIndex == idx) match {
                                     case Some(_) => (note, idx)
                                     case None =>
                                         val ref =
                                             allConvToks.
                                                 find(t => t.getId == note && t.getIndex == idx).
                                                 getOrElse(
                                                     throw new NCE(s"Reference is not found [note=$note, index=$idx]")
                                                 )

                                         val newRef =
                                             nonConvToks.
                                                 find(t =>
                                                     t.getGroups.asScala.toSet.intersect(ref.getGroups.asScala.toSet).nonEmpty
                                                 ).
                                                 getOrElse(
                                                     throw new NCE(s"New reference is not found [note=$note, index=$idx]")
                                                 )

                                         (newRef.getId, newRef.getIndex)
                                 }
                             }

                         convTok.getMetadata.put(s"nlpcraft:sort:$notesName", data.map(_._1).asJava)
                         convTok.getMetadata.put(s"nlpcraft:sort:$idxsName", data.map(_._2).asJava)
                     }
                 }

                 fix("bynotes", "byindexes")
                 fix("subjnotes", "subjindexes")
             case _ => // TODO: implement all other.
         }
 }