nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala - incubator-nlpcraft - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      https://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.nlpcraft.probe.mgrs.sentence

 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCE, NCService, U, _}
 import org.apache.nlpcraft.model.NCModel
 import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey

 import java.io.{Serializable => JSerializable}
 import java.util
 import java.util.{List => JList}
 import scala.collection.mutable
 import scala.collection.parallel.CollectionConverters._
 import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava, SetHasAsJava, SetHasAsScala}
 import scala.language.implicitConversions

 /**
   * Sentences processing manager.
   */
 object NCSentenceManager extends NCService {
     @volatile private var pool: java.util.concurrent.ForkJoinPool = _

     type CacheKey = Seq[Set[NCNlpSentenceNote]]
     type CacheValue = Seq[Seq[NCNlpSentenceNote]]
     private val combCache = mutable.HashMap.empty[String, mutable.HashMap[CacheKey, CacheValue]]

     def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
         val noteLinks = mutable.ArrayBuffer.empty[NoteLink]

         for (n <- notes.filter(n => n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
             noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala.toSeq.sorted)

         for (n <- notes.filter(_.noteType == "nlpcraft:sort")) {
             def add(noteName: String, idxsName: String): Unit = {
                 val names = n(noteName).asInstanceOf[JList[String]]
                 val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]

                 require(names.size() == idxsSeq.size())

                 noteLinks ++=
                     (
                         for ((name, idxs) <- names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
                             yield NoteLink(name, idxs.sorted.toSeq)
                     )
             }

             if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
             if (n.contains("bynotes")) add("bynotes", "byindexes")
         }

         noteLinks
     }

     /**
       *
       * @param n
       */
     private def getParts(n: NCNlpSentenceNote): Option[Seq[NCTokenPartKey]] = {
         val res: Option[JList[NCTokenPartKey]]  = n.dataOpt("parts")

         res match {
             case Some(v) => Some(v.asScala)
             case None => None
         }
     }

     /**
       *
       * @param notes
       */
     def getPartKeys(notes: Seq[NCNlpSentenceNote]): Seq[NCTokenPartKey] =
         notes.filter(_.isUser).flatMap(getParts).flatten.distinct

     /**
       *
       * @param note
       * @return
       */
     def getPartKeys(note: NCNlpSentenceNote): Seq[NCTokenPartKey] =
         if (note.isUser) getParts(note).getOrElse(Seq.empty) else Seq.empty

     /**
       *
       * @param ns
       * @param idxs
       * @param notesType
       * @param note
       * @return
       */
     private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
         val types = idxs.flatMap(idx => ns(idx).map(p => p).filter(!_.isNlp).map(_.noteType)).distinct

         /**
           * Example:
           * 1. Sentence 'maximum x' (single element related function)
           * - maximum is aggregate function linked to date element.
           * - x defined as 2 elements: date and num.
           * So, the variant 'maximum x (as num)' should be excluded.
           * *
           * 2. Sentence 'compare x and y' (multiple elements related function)
           * - compare is relation function linked to date element.
           * - x an y defined as 2 elements: date and num.
           * So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
           * should not be excluded, but invalid relation should be deleted for these combinations.
           */
         types.size match {
             case 0 => false
             case 1 => types.head == notesType
             case _ =>
                 // Equal elements should be processed together with function element.
                 if (types.size == 1)
                     false
                 else {
                     ns.removeNote(note)

                     logger.trace(s"Removed note: $note")

                     true
                 }
         }
     }

     /**
       * Fixes notes with references to other notes indexes.
       * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
       *
       * @param noteType Note type.
       * @param idxsField Indexes field.
       * @param noteField Note field.
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @return Valid flag.
       */
     private def fixIndexesReferences(
         noteType: String,
         idxsField: String,
         noteField: String,
         ns: NCNlpSentence,
         history: Seq[(Int, Int)]
     ): Boolean = {
         ns.filter(_.isTypeOf(noteType)).foreach(tok =>
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) =>
                     val idxs: Seq[Int] = n.data[JList[Int]](idxsField).asScala.toSeq
                     var fixed = idxs

                     history.foreach { case (idxOld, idxNew) => fixed = fixed.map(i => if (i == idxOld) idxNew else i) }

                     fixed = fixed.distinct

                     if (idxs != fixed)
                         ns.fixNote(n, "indexes" -> fixed.asJava.asInstanceOf[JSerializable])
                 case None => // No-op.
             }
         )

         ns.flatMap(_.getNotes(noteType)).forall(
             n => checkRelation(ns, n.data[JList[Int]]("indexes").asScala.toSeq, n.data[String](noteField), n)
         )
     }

     /**
       *
       * @param note
       * @param idxsField
       * @param noteField
       * @param ns
       */
     private def fixNoteIndexes(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
         ns.flatMap(_.getNotes(note)).foreach(
             n => checkRelation(ns, n.data[JList[Int]](idxsField).asScala.toSeq, n.data[String](noteField), n)
         )

     /**
       *
       * @param note
       * @param idxsField
       * @param noteField
       * @param ns
       */
     private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
         ns.flatMap(_.getNotes(note)).foreach(rel =>
             rel.dataOpt[JList[JList[Int]]](idxsField) match {
                 case Some(idxsList) =>
                     val notesTypes = rel.data[JList[String]](noteField)

                     require(idxsList.size() == notesTypes.size())

                     idxsList.asScala.zip(notesTypes.asScala).foreach {
                         case (idxs, notesType) => checkRelation(ns, idxs.asScala.toSeq, notesType, rel)
                     }
                 case None => // No-op.
             }
         )

     /**
       * Copies token.
       *
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @param toksCopy Copied tokens.
       * @param i Index.
       */
     private def simpleCopy(
         ns: NCNlpSentence,
         history: mutable.ArrayBuffer[(Int, Int)],
         toksCopy: NCNlpSentence,
         i: Int
     ): Seq[NCNlpSentenceToken] = {
         val tokCopy = toksCopy(i)

         history += tokCopy.index -> ns.size

         ns += tokCopy.clone(ns.size)
     }

     /**
       * Glues stop words.
       *
       * @param ns Sentence.
       * @param userNoteTypes Notes types.
       * @param history Indexes transformation history.
       */
     private def unionStops(
         ns: NCNlpSentence,
         userNoteTypes: Seq[String],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): Unit = {
         // Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
         // Of compilation errors which seems as scala compiler internal error.
         val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()

         def last[T](l: JList[T]): T = l.get(l.size() - 1)

         ns.filter(t => t.isStopWord && !t.isBracketed).foreach(t =>
             if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
                 last(bufs) += t
             else
                 bufs.add(mutable.Buffer.empty[NCNlpSentenceToken] :+ t)
         )

         val idxsSeq = bufs.asScala.filter(_.lengthCompare(1) > 0).map(_.map(_.index))

         if (idxsSeq.nonEmpty) {
             val nsCopyToks = ns.clone()
             ns.clear()

             val buf = mutable.Buffer.empty[Int]

             for (i <- nsCopyToks.indices)
                 idxsSeq.find(_.contains(i)) match {
                     case Some(idxs) =>
                         if (!buf.contains(idxs.head)) {
                             buf += idxs.head

                             ns += mkCompound(ns, nsCopyToks.toSeq, idxs.toSeq, stop = true, ns.size, None, history)
                         }
                     case None => simpleCopy(ns, history, nsCopyToks, i)
                 }

             fixIndexes(ns, userNoteTypes)
         }
     }

     /**
       * Fixes indexes for all notes after recreating tokens.
       *
       * @param ns Sentence.
       * @param userNoteTypes Notes types.
       */
     private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]): Unit = {
         // Replaces other notes indexes.
         for (t <- userNoteTypes :+ "nlpcraft:nlp"; note <- ns.getNotes(t)) {
             val toks = ns.filter(_.contains(note))

             val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).toSeq.sorted)

             toks.foreach(t => {
                 t.remove(note)
                 t.add(newNote)
             })
         }

         // Special case - field index of core NLP note.
         ns.zipWithIndex.foreach { case (tok, idx) => ns.fixNote(tok.getNlpNote, "index" -> idx) }
     }

     /**
       * Zip notes with same type.
       *
       * @param ns Sentence.
       * @param nType Notes type.
       * @param userNotesTypes Notes types.
       * @param history Indexes transformation history.
       */
     private def zipNotes(
         ns: NCNlpSentence,
         nType: String,
         userNotesTypes: Seq[String],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): Unit = {
         val nts = ns.getNotes(nType).filter(n => n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)

         val overlapped =
             nts.flatMap(n => n.tokenFrom to n.tokenTo).map(ns(_)).exists(
                 t => userNotesTypes.map(pt => t.getNotes(pt).size).sum > 1
             )

         if (nts.nonEmpty && !overlapped) {
             val nsCopyToks = ns.clone()
             ns.clear()

             val buf = mutable.ArrayBuffer.empty[Int]

             for (i <- nsCopyToks.indices)
                 nts.find(_.tokenIndexes.contains(i)) match {
                     case Some(n) =>
                         if (!buf.contains(n.tokenFrom)) {
                             buf += n.tokenFrom

                             ns += mkCompound(ns, nsCopyToks.toSeq, n.tokenIndexes, stop = false, ns.size, Some(n), history)
                         }
                     case None => simpleCopy(ns, history, nsCopyToks, i)
                 }

             fixIndexes(ns, userNotesTypes)
         }
     }

     /**
       * Makes compound note.
       *
       * @param ns Sentence.
       * @param nsCopyToks Tokens.
       * @param indexes Indexes.
       * @param stop Flag.
       * @param idx Index.
       * @param commonNote Common note.
       * @param history Indexes transformation history.
       */
     private def mkCompound(
         ns: NCNlpSentence,
         nsCopyToks: Seq[NCNlpSentenceToken],
         indexes: Seq[Int],
         stop: Boolean,
         idx: Int,
         commonNote: Option[NCNlpSentenceNote],
         history: mutable.ArrayBuffer[(Int, Int)]
     ): NCNlpSentenceToken = {
         val t = NCNlpSentenceToken(idx)

         // Note, it adds stop-words too.
         val content = nsCopyToks.zipWithIndex.filter { case (_, idx) => indexes.contains(idx) }.map { case (tok, _) => tok}

         content.foreach(t => history += t.index -> idx)

         def mkValue(get: NCNlpSentenceToken => String): String = {
             val buf = mutable.Buffer.empty[String]

             val n = content.size - 1

             content.zipWithIndex.foreach { case (t, idx) =>
                 buf += get(t)

                 if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
                     buf += " "
             }

             buf.mkString
         }

         val origText = mkValue((t: NCNlpSentenceToken) => t.origText)

         val idxs = Seq(idx)
         val wordIdxs = content.flatMap(_.wordIndexes).sorted

         val direct =
             commonNote match {
                 case Some(n) if n.isUser => n.isDirect
                 case _ => content.forall(_.isDirect)
             }

         val params = Seq(
             "index" -> idx,
             "pos" -> NCPennTreebank.SYNTH_POS,
             "posDesc" -> NCPennTreebank.SYNTH_POS_DESC,
             "lemma" -> mkValue((t: NCNlpSentenceToken) => t.lemma),
             "origText" -> origText,
             "normText" -> mkValue((t: NCNlpSentenceToken) => t.normText),
             "stem" -> mkValue((t: NCNlpSentenceToken) => t.stem),
             "start" -> content.head.startCharIndex,
             "end" -> content.last.endCharIndex,
             "charLength" -> origText.length,
             "quoted" -> false,
             "stopWord" -> stop,
             "bracketed" -> false,
             "direct" -> direct,
             "dict" -> (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
             "english" -> nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
             "swear" -> nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
         )

         val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)

         t.add(nlpNote)

         // Adds processed note with fixed indexes.
         commonNote match {
             case Some(n) =>
                 ns.removeNote(n)
                 t.add(n.clone(idxs, wordIdxs))
             case None => // No-op.
         }

         t
     }

     /**
       * Fixes notes with references list to other notes indexes.
       *
       * @param noteType Note type.
       * @param idxsField Indexes field.
       * @param noteField Note field.
       * @param ns Sentence.
       * @param history Indexes transformation history.
       * @return Valid flag.
       */
     private def fixIndexesReferencesList(
         noteType: String,
         idxsField: String,
         noteField: String,
         ns: NCNlpSentence,
         history: Seq[(Int, Int)]
     ): Boolean = {
         var ok = true

         for (tok <- ns.filter(_.isTypeOf(noteType)) if ok)
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) =>
                     val idxs: Seq[Seq[Int]] = n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala.toSeq).toSeq
                     var fixed = idxs

                     history.foreach {
                         case (idxOld, idxNew) => fixed = fixed.map(_.map(i => if (i == idxOld) idxNew else i).distinct)
                     }

                     if (fixed.forall(_.size == 1))
                     // Fix double dimension array to one dimension,
                     // so it should be called always in spite of 'fixIndexesReferences' method.
                         ns.fixNote(n, idxsField -> fixed.map(_.head).asJava.asInstanceOf[JSerializable])
                     else
                         ok = false
                 case None => // No-op.
             }

         ok &&
             ns.flatMap(_.getNotes(noteType)).forall(rel =>
                 rel.dataOpt[JList[Int]](idxsField) match {
                     case Some(idxsList) =>
                         val notesTypes = rel.data[JList[String]](noteField)

                         require(idxsList.size() == notesTypes.size())

                         idxsList.asScala.zip(notesTypes.asScala).forall {
                             case (idxs, notesType) => checkRelation(ns, Seq(idxs), notesType, rel)
                         }
                     case None => true
                 }
             )
     }

     /**
       * Fixes tokens positions.
       *
       * @param ns Sentence.
       * @param notNlpTypes Token types.
       * @param lastPhase Phase.
       */
     private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String], lastPhase: Boolean): Boolean = {
         ns.
             filter(!_.isNlp).
             filter(_.isStopWord).
             flatten.
             filter(_.isNlp).
             foreach(n => ns.fixNote(n, "stopWord" -> false))

         val all = ns.tokens.flatten
         val nsNotes: Map[String, Seq[Int]] = all.map(p => p.noteType -> p.tokenIndexes).toMap

         for (
             t <- ns.tokens; stopReason <- t.stopsReasons
                 if all.contains(stopReason) && nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
         )
             ns.fixNote(t.getNlpNote, "stopWord" -> true)

         val history = mutable.ArrayBuffer.empty[(Int, Int)]

         fixNoteIndexes("nlpcraft:relation", "indexes", "note", ns)
         fixNoteIndexes("nlpcraft:limit", "indexes", "note", ns)
         fixNoteIndexesList("nlpcraft:sort", "subjindexes", "subjnotes", ns)
         fixNoteIndexesList("nlpcraft:sort", "byindexes", "bynotes", ns)

         notNlpTypes.foreach(typ => zipNotes(ns, typ, notNlpTypes, history))
         unionStops(ns, notNlpTypes, history)

         val histSeq = history.toSeq

         val res =
             fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, histSeq) &&
             fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, histSeq) &&
             fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, histSeq) &&
             fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, histSeq)

         // On last phase - just for performance reasons.
         if (res && lastPhase) {
             // Validation (all indexes calculated well)
             require(
                 !res ||
                 !ns.flatten.exists(n => ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t => !t.contains(n))),
                 s"Invalid sentence:\n" +
                     ns.map(t =>
                         // Human readable invalid sentence for debugging.
                         s"${t.origText}{index:${t.index}}[${t.map(n => s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("|")}]"
                     ).mkString("\n")
             )
         }

         res
     }

     /**
       *
       * @param mdl
       * @param ns
       */
     private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit = {
         val abstractToks = mdl.getAbstractTokens

         if (!abstractToks.isEmpty) {
             val notes = ns.flatten.distinct.filter(n => abstractToks.contains(n.noteType))

             val keys = getPartKeys(notes)
             val noteLinks = getLinks(notes)

             notes.filter(n => {
                 lazy val noteToks = ns.tokens.filter(t => t.index >= n.tokenFrom && t.index <= n.tokenTo)

                 !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted)) &&
                 !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex))
             }).foreach(ns.removeNote)
         }
     }

     /**
       *
       * @param toks
       * @return
       */
     private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
         toks.flatten.filter(!_.isNlp).distinct

     /**
       *
       * @param thisSen
       * @param sen
       * @param dels
       */
     private def addDeleted(thisSen: NCNlpSentence, sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
         sen.addDeletedNotes(dels.map(n => {
             val savedDelNote = n.clone()
             val savedDelToks = n.tokenIndexes.map(idx => thisSen(idx).clone())

             val mainNotes = savedDelToks.flatten.filter(n => n.noteType != "nlpcraft:nlp" && n != savedDelNote)

             // Deleted note's tokens should contains only nlp data and deleted notes.
             for (savedDelTok <- savedDelToks; mainNote <- mainNotes)
                 savedDelTok.remove(mainNote)

             savedDelNote -> savedDelToks
         }).toMap)

     /**
       *
       * @param sen
       * @param mdl
       * @param lastPhase
       * @param overlappedNotes
       */
     private def mkVariants(
         sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean, overlappedNotes: Seq[NCNlpSentenceNote]
     ): Seq[NCNlpSentence] = {
         def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
             if (lastPhase)
                 dropAbstract(mdl, ns)

             if (collapseSentence(ns, getNotNlpNotes(ns.tokens).map(_.noteType).distinct, lastPhase)) Some(ns) else None
         }

         if (overlappedNotes.nonEmpty) {
             val overlappedVars: Seq[Set[NCNlpSentenceNote]] =
                 overlappedNotes.flatMap(note => note.wordIndexes.map(_ -> note)).
                     groupBy { case (idx, _) => idx }.
                     map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
                     toSeq.
                     sortBy(-_.size)

             val delCombs: Seq[Seq[NCNlpSentenceNote]] =
                 combCache.
                     getOrElseUpdate(
                         sen.srvReqId,
                         mutable.HashMap.empty[CacheKey, CacheValue]
                     ).
                     getOrElseUpdate(
                         overlappedVars,
                         NCSentenceHelper.findCombinations(
                             overlappedVars.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq
                         )
                     )

             val seqSens =
                 delCombs.
                     par.
                     flatMap(delComb => {
                         val nsClone = sen.clone()

                         // Saves deleted notes for sentence and their tokens.
                         addDeleted(sen, nsClone, delComb)
                         delComb.foreach(nsClone.removeNote)

                         // Has overlapped notes for some tokens.
                         require(!nsClone.exists(_.count(!_.isNlp) > 1))

                         collapse0(nsClone)
                     }).seq

             // It removes sentences which have only one difference - 'direct' flag of their user tokens.
             // `Direct` sentences have higher priority.
             type Key = Seq[Map[String, JSerializable]]
             case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)

             def mkHolder(sen: NCNlpSentence): Holder = {
                 val notes = sen.flatten

                 Holder(
                     // We have to delete some keys to have possibility to compare sentences.
                     notes.map(_.clone().toMap.filter { case (name, _) => name != "direct" }).toSeq,
                     sen,
                     notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
                 )
             }

             seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) => seq.minBy(_.factor).sentence }.toSeq
         }
         else
             collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)

     }

     /**
       * This collapser handles several tasks:
       * - "overall" collapsing after all other individual collapsers had their turn.
       * - Special further enrichment of tokens like linking, etc.
       *
       * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
       * lengths - the winning note is chosen based on this priority.
       */
     @throws[NCE]
     def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
         // Always deletes `similar` notes.
         // Some words with same note type can be detected various ways.
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
             sen.flatten.filter(!_.isNlp).distinct.
                 groupBy(_.getKey()).
                 map(p => p._2.sortBy(p =>
                     (
                         // System notes don't have such flags.
                         if (p.isUser) {
                             if (p.isDirect)
                                 0
                             else
                                 1
                         }
                         else
                             0,
                         if (p.isUser)
                             p.sparsity
                         else
                             0
                     )
                 )).
                 flatMap(_.drop(1)).
                 toSeq

         redundant.foreach(sen.removeNote)

         var overlappedNotes: Seq[NCNlpSentenceNote] =
             getNotNlpNotes(sen.tokens).
                 flatMap(note => getNotNlpNotes(note.tokenIndexes.map(sen(_))).filter(_ != note)).
                 distinct

         // Optimization. Deletes all wholly swallowed notes.
         val links = getLinks(sen.tokens.toSeq.flatten)

         val swallowed =
             overlappedNotes.
                 // There aren't links on it.
                 filter(n => !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
                 // It doesn't have links.
                 filter(n => getPartKeys(n).isEmpty).
                 flatMap(note => {
                     val noteWordsIdxs = note.wordIndexesSet
                     val key = NCTokenPartKey(note, sen)

                     val delCombOthers =
                         overlappedNotes.filter(_ != note).flatMap(n => if (getPartKeys(n).contains(key)) Some(n) else None)

                     if (
                         delCombOthers.nonEmpty &&
                         !delCombOthers.exists(o => noteWordsIdxs.subsetOf(o.wordIndexesSet))
                     )
                         Some(note)
                     else
                         None
                 })

         overlappedNotes = overlappedNotes.filter(p => !swallowed.contains(p))
         addDeleted(sen, sen, swallowed)
         swallowed.foreach(sen.removeNote)

         var sens = mkVariants( sen, mdl, lastPhase, overlappedNotes)

         sens.par.foreach(sen =>
             sen.foreach(tok =>
                 tok.size match {
                     case 1 => require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
                     case 2 => require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
                     case _ => require(requirement = false, s"Unexpected token notes count: $tok")
                 }
             )
         )

         // There are optimizations below. Similar variants by some criteria deleted.

         // Drops similar sentences with same notes structure based on greedy elements. Keeps with more notes found.
         val notGreedyElems =
             mdl.getElements.asScala.flatMap(e => if (!e.isGreedy.orElse(mdl.isGreedy)) Some(e.getId) else None).toSet

         sens = sens.groupBy(p => getNotNlpNotes(p.tokens).groupBy(_.noteType).keys.toSeq.sorted.distinct).
             flatMap { case (types, sensSeq) =>
                 if (types.exists(notGreedyElems.contains))
                     sensSeq
                 else {
                     val m: Map[NCNlpSentence, Int] = sensSeq.map(p => p -> getNotNlpNotes(p.tokens).size).toMap

                     val max = m.values.max

                     m.filter(_._2 == max).keys
                 }
             }.toSeq

         var sensWithNotes = sens.map(s => s -> s.flatten.filter(!_.isNlp).toSet)

         var sensWithNotesIdxs = sensWithNotes.zipWithIndex

         // Drops similar sentences if there are other sentences with superset of notes.
         sens =
             sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
                 !sensWithNotesIdxs.
                     filter { case (_, idx2) => idx2 != idx1 }.
                     exists { case((_, notNlpNotes2), _) => notNlpNotes1.subsetOf(notNlpNotes2) }
             }.map { case ((sen, _), _) => sen }

         // Drops similar sentences. Among similar sentences we prefer one with minimal free words count.
         sens = sens.groupBy(p => getNotNlpNotes(p.tokens).map(_.getKey(withIndexes = false))).
             map { case (_, seq) => seq.minBy(_.filter(p => p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq

         // Drops sentences if they are just subset of another (indexes ignored here)
         sensWithNotes = sensWithNotes.filter { case (sen, _) => sens.contains(sen) }

         sensWithNotesIdxs = sensWithNotes.zipWithIndex

         sens = sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
             !sensWithNotesIdxs.exists { case ((_, notNlpNotes2), idx2) =>
                 idx1 != idx2 && {
                     notNlpNotes2.size > notNlpNotes1.size &&
                     notNlpNotes1.forall(t1 => notNlpNotes2.exists(_.equalsWithoutIndexes(t1)))
                 }
             }
         }.map { case ((sen, _), _) => sen }

         sens
     }

     override def start(parent: Span): NCService = {
         ackStarting()

         pool = new java.util.concurrent.ForkJoinPool()

         ackStarted()
     }

     override def stop(parent: Span): Unit = {
         ackStopping()

         U.shutdownPool(pool)

         ackStopped()
     }

     /**
       *
       * @param srvReqId
       */
     def clearRequestData(srvReqId: String): Unit = combCache -= srvReqId
 }