nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala - incubator-nlpcraft - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      https://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model

 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence => Sentence, NCNlpSentenceNote => NlpNote, NCNlpSentenceToken => NlpToken}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}

 import java.io.Serializable
 import java.util.{List => JList}
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.CollectionConverters._
 import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, MapHasAsScala, SeqHasAsJava}

 /**
   * Model elements enricher.
   */
 object NCModelEnricher extends NCProbeEnricher {
     type TokType = (NCToken, NCSynonymChunkKind)
     type Cache = mutable.Map[String, ArrayBuffer[Seq[Int]]]

     object Complex {
         def apply(t: NCToken): Complex =
             Complex(
                 data = Left(t),
                 isToken = true,
                 isWord = false,
                 token = t,
                 word = null,
                 origText = t.origText,
                 wordIndexes = t.wordIndexes.toSet,
                 minIndex = t.wordIndexes.head,
                 maxIndex = t.wordIndexes.last
             )

         def apply(t: NlpToken): Complex =
             Complex(
                 data = Right(t),
                 isToken = false,
                 isWord = true,
                 token = null,
                 word = t,
                 origText = t.origText,
                 wordIndexes = t.wordIndexes.toSet,
                 minIndex = t.wordIndexes.head,
                 maxIndex = t.wordIndexes.last
             )
     }

     case class Complex(
         data: NCIdlContent,
         isToken: Boolean,
         isWord: Boolean,
         token: NCToken,
         word: NlpToken,
         origText: String,
         wordIndexes: Set[Int],
         minIndex: Int,
         maxIndex: Int
     ) {
         private final val hash = if (isToken) Seq(wordIndexes, token.getId).hashCode() else wordIndexes.hashCode()

         override def hashCode(): Int = hash

         def isSubsetOf(minIndex: Int, maxIndex: Int, indexes: Set[Int]): Boolean =
             if (this.minIndex > maxIndex || this.maxIndex < minIndex)
                 false
             else
                 wordIndexes.subsetOf(indexes)

         override def equals(obj: Any): Boolean = obj match {
             case x: Complex =>
                 hash == x.hash && (isToken && x.isToken && token == x.token || isWord && x.isWord && word == x.word)
             case _ => false
         }

         // Added for debug reasons.
         override def toString: String = {
             val idxs = wordIndexes.mkString(",")

             if (isToken && token.getId != "nlpcraft:nlp") s"'$origText' (${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
         }
     }

     object ComplexSeq {
         def apply(all: Seq[Complex]): ComplexSeq = ComplexSeq(all.filter(_.isToken), all.flatMap(_.wordIndexes).toSet)
     }

     case class ComplexSeq(tokensComplexes: Seq[Complex], wordsIndexes: Set[Int]) {
         private val (idxsSet: Set[Int], minIndex: Int, maxIndex: Int) = {
             val seq = tokensComplexes.flatMap(_.wordIndexes).distinct.sorted

             (seq.toSet, seq.head, seq.last)
         }

         def isIntersect(minIndex: Int, maxIndex: Int, idxsSet: Set[Int]): Boolean =
             if (this.minIndex > maxIndex || this.maxIndex < minIndex)
                 false
             else
                 this.idxsSet.exists(idxsSet.contains)

         override def toString: String = tokensComplexes.mkString(" | ")
     }

     case class ComplexHolder(complexesWords: Seq[Complex], complexes: Seq[ComplexSeq])

     /**
       *
       * @param parent Optional parent span.
       * @return
       */
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ =>
         ackStarting()
         ackStarted()
     }

     /**
       *
       * @param parent Optional parent span.
       */
     override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
         ackStopping()
         ackStopped()
     }

     def isComplex(mdl: NCProbeModel): Boolean = mdl.hasIdlSynonyms || !mdl.model.getParsers.isEmpty

     /**
       *
       * @param ns
       * @param elem
       * @param toks
       * @param direct
       * @param syn
       * @param parts
       * @param metaOpt
       */
     private def mark(
         ns: Sentence,
         elem: NCElement,
         toks: Seq[NlpToken],
         direct: Boolean,
         syn: Option[Synonym] = None,
         parts: Seq[TokType] = Seq.empty,
         metaOpt: Option[Map[String, Object]] = None
     ): Unit = {
         val params = mutable.ArrayBuffer.empty[(String, AnyRef)]

         // For system elements.
         params += "direct" -> direct.asInstanceOf[AnyRef]

         syn match {
             case Some(s) =>
                 if (s.isValueSynonym)
                     params += "value" -> s.value
             case None => // No-op.
         }

         metaOpt match {
             case Some(meta) => params += "meta" -> meta
             case None => // No-op.
         }

         if (parts.nonEmpty)
             params += "parts" -> parts.map { case (p, kind) => NCTokenPartKey(p, kind) }.asJava

         val idxs = toks.map(_.index).sorted

         val note = NlpNote(idxs, elem.getId, params: _*)

         toks.foreach(_.add(note))

         // For NLP elements.
         toks.foreach(t => ns.fixNote(t.getNlpNote, "direct" -> direct))
     }

     /**
       *
       * @param mdl
       * @param ns
       * @param span
       * @param req
       */
     private def processParsers(mdl: NCProbeModel, ns: Sentence, span: Span, req: NCRequestImpl): Unit = {
         for (parser <- mdl.model.getParsers.asScala) {
             parser.onInit()

             startScopedSpan("customParser", span,
                 "srvReqId" -> ns.srvReqId,
                 "mdlId" -> mdl.model.getId,
                 "txt" -> ns.text
             ) { _ =>
                 def to(t: NlpToken): NCCustomWord =
                     new NCCustomWord {
                         override def getNormalizedText: String = t.normText
                         override def getOriginalText: String = t.origText
                         override def getStartCharIndex: Int = t.startCharIndex
                         override def getEndCharIndex: Int = t.endCharIndex
                         override def getPos: String = t.pos
                         override def getPosDescription: String = t.posDesc
                         override def getLemma: String = t.lemma
                         override def getStem: String = t.stem
                         override def isStopWord: Boolean = t.isStopWord
                         override def isBracketed: Boolean = t.isBracketed
                         override def isQuoted: Boolean = t.isQuoted
                         override def isKnownWord: Boolean = t.isKnownWord
                         override def isSwearWord: Boolean = t.isSwearWord
                         override def isEnglish: Boolean = t.isEnglish
                     }

                 val res = parser.parse(
                     req,
                     mdl.model,
                     ns.map(to).asJava,
                     ns.flatten.distinct.filter(!_.isNlp).map(n => {
                         val noteId = n.noteType
                         val words = ns.filter(t => n.tokenIndexes.contains(t.index)).map(to).asJava
                         val md = n.asMetadata()

                         new NCCustomElement() {
                             override def getElementId: String = noteId
                             override def getWords: JList[NCCustomWord] = words
                             override def getMetadata: JavaMeta = md.map(p => p._1 -> p._2.asInstanceOf[AnyRef]).asJava
                         }
                     }).asJava
                 )

                 if (res != null)
                     res.asScala.foreach(e => {
                         val elmId = e.getElementId
                         val words = e.getWords

                         if (elmId == null)
                             throw new NCE(s"Custom model parser cannot return 'null' element ID.")

                         if (words == null || words.isEmpty)
                             throw new NCE(s"Custom model parser cannot return empty custom tokens for element: $elmId")

                         val matchedToks = words.asScala.map(w =>
                             ns.find(t =>
                                 t.startCharIndex == w.getStartCharIndex && t.endCharIndex == w.getEndCharIndex
                             ).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w"))
                         )

                         // Checks element's tokens.
                         if (!alreadyMarked(ns, elmId, matchedToks, matchedToks.map(_.index).sorted))
                             mark(
                                 ns,
                                 elem = mdl.elements.getOrElse(elmId, throw new NCE(s"Custom model parser returned unknown element: $elmId")),
                                 toks = matchedToks,
                                 direct = true,
                                 metaOpt = Some(e.getMetadata.asScala.toMap)
                             )
                     })
             }

             parser.onDiscard()
         }
     }

     /**
       * Gets all sequential permutations of given tokens.
       *
       * For example, if buffer contains "a b c d" tokens, then this function will return the
       * sequence of following token sequences in this order:
       * "a b c d"
       * "a b c"
       * "b c d"
       * "a b"
       * "b c"
       * "c d"
       * "a"
       * "b"
       * "c"
       * "d"
       *
       * @param toks
       * @return
       */
     private def combos[T](toks: Seq[T]): Seq[Seq[T]] =
         (for (n <- toks.size until 0 by -1) yield toks.sliding(n)).flatten.map(p => p)

     /**
       *
       * @param seq
       * @param s
       */
     private def toParts(seq: Seq[NCIdlContent], s: Synonym): Seq[TokType] =
         seq.zip(s.map(_.kind)).flatMap {
             case (complex, kind) => if (complex.isLeft) Some(complex.swap.toOption.get -> kind) else None
         }

     /**
       *
       * @param tows
       * @param ns
       */
     private def toTokens(tows: Seq[NCIdlContent], ns: Sentence): Seq[NlpToken] =
         (
             tows.filter(_.isRight).map(_.toOption.get) ++
                 tows.filter(_.isLeft).map(_.swap.toOption.get).
                     flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
         ).sortBy(_.startCharIndex)

     /**
       *
       * @param m
       * @param id
       * @return
       */
     private def get(m: Map[String , Seq[Synonym]], id: String): Seq[Synonym] = m.getOrElse(id, Seq.empty)

     /**
       * Gets synonyms sorted in descending order by their weight (already prepared),
       * i.e. first synonym in the sequence is the most important one.
       *
       * @param fastMap {Element ID -> {Synonym length -> T}}
       * @param elmId
       * @param len
       */
     private def fastAccess[T](fastMap: Map[String, Map[Int, T]], elmId: String, len: Int): Option[T] =
         fastMap.getOrElse(elmId, Map.empty[Int, T]).get(len)

     /**
       *
       * @param mdl
       * @param ns
       */
     private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
         val complexesWords = ns.map(Complex(_))

         val complexes =
             NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).
                 map(_.asScala).
                 par.
                 flatMap(sen =>
                     // Tokens splitting.
                     // For example sentence "A B С D E" (5 words) processed as 3 tokens on first phase after collapsing
                     //  'A B' (2 words), 'C D' (2 words) and 'E' (1 word)
                     //  So, result combinations will be:
                     //  Token(AB) + Token(CD) + Token(E)
                     //  Token(AB) + Word(C) + Word(D) + Token(E)
                     //  Word(A) + Word(B) + Token(CD) + Token(E)
                     //  Word(A) + Word(B) + Word(C) + Word(D) + Token(E)
                     combos(sen).
                         map(senPartComb => {
                             sen.flatMap(t =>
                                 // Single word token is not split as words - token.
                                 // Partly (not strict in) token - word.
                                 if (t.wordIndexes.length == 1 || senPartComb.contains(t))
                                     Seq(Complex(t))
                                 else
                                     t.wordIndexes.map(complexesWords)
                             )
                             // Drops without tokens (IDL part works with tokens).
                         }).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
                 ).seq

         ComplexHolder(complexesWords, complexes)
     }

     /**
       *
       * @param h
       * @param toks
       */
     private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: Set[Seq[Complex]]): Seq[Seq[Complex]] = {
         val idxs = toks.flatMap(_.wordIndexes).toSet

         h.complexes.par.
             flatMap(complexSeq => {
                 val rec = complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxs.contains))

                 // Drops without tokens (IDL part works with tokens).
                 if (rec.nonEmpty) {
                     val data = rec ++
                         (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.complexesWords)

                     if (!cache.contains(data)) Some(data) else None
                 }
                 else
                     None
             }).seq
     }

     private def add(
         dbgType: String,
         ns: Sentence,
         contCache: Cache,
         elem: NCElement,
         elemToks: Seq[NlpToken],
         sliceToksIdxs: Seq[Int],
         syn: Synonym,
         parts: Seq[TokType] = Seq.empty)
     : Unit = {
         val resIdxs = elemToks.map(_.index)
         val resIdxsSorted = resIdxs.sorted

         if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
             contCache(elem.getId) += sliceToksIdxs

         val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)

         if (ok)
             mark(ns, elem, elemToks, direct = syn.isDirect && U.isIncreased(resIdxs), syn = Some(syn), parts = parts)

         if (DEEP_DEBUG)
             logger.trace(
                 s"${if (ok) "Added" else "Skipped"} element [" +
                     s"id=${elem.getId}, " +
                     s"type=$dbgType, " +
                     s"text='${elemToks.map(_.origText).mkString(" ")}', " +
                     s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
                     s"allTokensIndexes=${sliceToksIdxs.mkString("[", ",", "]")}, " +
                     s"synonym=$syn" +
                     s"]"
             )
     }

     @throws[NCE]
     override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
         require(isStarted)

         startScopedSpan(
             "enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
         ) { span =>
             val req = NCRequestImpl(senMeta, ns.srvReqId)
             val combToks = combos(ns.toSeq)
             lazy val ch = mkComplexes(mdl, ns)

             def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
                 startScopedSpan(
                     "execute", span, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
                 ) { _ =>
                     if (DEEP_DEBUG)
                         logger.trace(s"Execution started [simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]")

                     val contCache = mutable.HashMap.empty ++
                         mdl.elements.keys.map(k => k -> mutable.ArrayBuffer.empty[Seq[Int]])
                     lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]

                     for (
                         toks <- combToks;
                         idxs = toks.map(_.index);
                         e <- mdl.elements.values;
                         eId = e.getId
                         if
                             !contCache(eId).exists(_.containsSlice(idxs)) &&
                             !alreadyMarked(ns, eId, toks, idxs)
                     ) {
                         // 1. SIMPLE.
                         if (simpleEnabled && (if (idlEnabled) mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
                             lazy val tokStems = toks.map(_.stem).mkString(" ")

                             // 1.1 Continuous.
                             var found = false

                             if (mdl.hasContinuousSynonyms)
                                 fastAccess(mdl.continuousSynonyms, eId, toks.length) match {
                                     case Some(h) =>
                                         def tryMap(syns: Map[String, Synonym], notFound: () => Unit): Unit =
                                             syns.get(tokStems) match {
                                                 case Some(s) =>
                                                     found = true
                                                     add("simple continuous", ns, contCache, e, toks, idxs, s)
                                                 case None => notFound()
                                             }

                                         def tryScan(syns: Seq[Synonym]): Unit =
                                             for (s <- syns if !found)
                                                 if (s.isMatch(toks)) {
                                                     found = true
                                                     add("simple continuous scan", ns, contCache, e, toks, idxs, s)
                                                 }

                                         tryMap(
                                             h.txtDirectSynonyms,
                                             () => {
                                                 tryScan(h.notTxtDirectSynonyms)

                                                 if (!found)
                                                     tryMap(h.txtNotDirectSynonyms, () => tryScan(h.notTxtNotDirectSynonyms))
                                             }
                                         )
                                     case None => // No-op.
                                 }

                             // 1.2 Sparse.
                             if (!found && mdl.hasSparseSynonyms)
                                 for (s <- get(mdl.sparseSynonyms, eId))
                                     s.sparseMatch(toks) match {
                                         case Some(res) => add("simple sparse", ns, contCache, e, res, idxs, s)
                                         case None => // No-op.
                                     }
                         }

                         // 2. IDL.
                         if (idlEnabled) {
                             val allSyns = get(mdl.idlSynonyms, eId)
                             lazy val allCombs = mkCombinations(ch, toks, idlCache.toSet)

                             // 2.1 Continuous.

                             if (!mdl.hasSparseSynonyms) {
                                 var found = false

                                 for (
                                     s <- allSyns;
                                     comb <- allCombs
                                     if !found;
                                     data = comb.map(_.data)
                                 )
                                     if (s.isMatch(data, req)) {
                                         add("IDL continuous", ns, contCache, e, toks, idxs, s, toParts(data, s))

                                         idlCache += comb

                                         found = true
                                     }
                             }
                             else
                                 // 2.2 Sparse.
                                 for (
                                     s <- allSyns;
                                     comb <- allCombs
                                 )
                                     s.sparseMatch(comb.map(_.data), req) match {
                                         case Some(res) =>
                                             val typ = if (s.sparse) "IDL sparse" else "IDL continuous"

                                             add(typ, ns, contCache, e, toTokens(res, ns), idxs, s, toParts(res, s))

                                             idlCache += comb
                                         case None => // No-op.
                                     }
                         }
                     }
                 }

             if (ns.firstProbePhase) {
                 ns.firstProbePhase = false

                 if (mdl.hasNoIdlSynonyms)
                     execute(simpleEnabled = true, idlEnabled = false)
                 execute(simpleEnabled = mdl.hasNoIdlSynonyms, idlEnabled = mdl.hasIdlSynonyms)
             }
             else if (mdl.hasIdlSynonyms)
                 execute(simpleEnabled = false, idlEnabled = true)

             processParsers(mdl, ns, span, req)
         }
     }

     // TODO: simplify, add tests, check model properties (sparse etc) for optimization.
     /**
       *
       * @param elmId
       * @param toks
       * @param sliceToksIdxsSorted
       */
     private def alreadyMarked(ns: Sentence, elmId: String, toks: Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
         lazy val toksIdxsSorted = toks.map(_.index).sorted

         sliceToksIdxsSorted.map(ns).forall(_.exists(n => n.noteType == elmId && n.sparsity == 0)) ||
         toks.exists(_.exists(n =>
             n.noteType == elmId &&
             (
                 (n.sparsity == 0 &&
                     (sliceToksIdxsSorted.containsSlice(n.tokenIndexes) || n.tokenIndexes.containsSlice(toksIdxsSorted))
                 )
                     ||
                 (
                     n.tokenIndexes == toksIdxsSorted ||
                         n.tokenIndexes.containsSlice(toksIdxsSorted) &&
                         U.isContinuous(toksIdxsSorted) &&
                         U.isContinuous(n.tokenIndexes)
                 )
             )
         ))
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* https://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model

	import io.opencensus.trace.Span
	import org.apache.nlpcraft.common._
	import org.apache.nlpcraft.common.nlp.{NCNlpSentence => Sentence, NCNlpSentenceNote => NlpNote, NCNlpSentenceToken => NlpToken}
	import org.apache.nlpcraft.model._
	import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
	import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
	import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
	import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
	import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
	import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}

	import java.io.Serializable
	import java.util.{List => JList}
	import scala.collection.mutable
	import scala.collection.mutable.ArrayBuffer
	import scala.collection.parallel.CollectionConverters._
	import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, MapHasAsScala, SeqHasAsJava}

	/**
	* Model elements enricher.
	*/
	object NCModelEnricher extends NCProbeEnricher {
	type TokType = (NCToken, NCSynonymChunkKind)
	type Cache = mutable.Map[String, ArrayBuffer[Seq[Int]]]

	object Complex {
	def apply(t: NCToken): Complex =
	Complex(
	data = Left(t),
	isToken = true,
	isWord = false,
	token = t,
	word = null,
	origText = t.origText,
	wordIndexes = t.wordIndexes.toSet,
	minIndex = t.wordIndexes.head,
	maxIndex = t.wordIndexes.last
	)

	def apply(t: NlpToken): Complex =
	Complex(
	data = Right(t),
	isToken = false,
	isWord = true,
	token = null,
	word = t,
	origText = t.origText,
	wordIndexes = t.wordIndexes.toSet,
	minIndex = t.wordIndexes.head,
	maxIndex = t.wordIndexes.last
	)
	}

	case class Complex(
	data: NCIdlContent,
	isToken: Boolean,
	isWord: Boolean,
	token: NCToken,
	word: NlpToken,
	origText: String,
	wordIndexes: Set[Int],
	minIndex: Int,
	maxIndex: Int
	) {
	private final val hash = if (isToken) Seq(wordIndexes, token.getId).hashCode() else wordIndexes.hashCode()

	override def hashCode(): Int = hash

	def isSubsetOf(minIndex: Int, maxIndex: Int, indexes: Set[Int]): Boolean =
	if (this.minIndex > maxIndex \|\| this.maxIndex < minIndex)
	false
	else
	wordIndexes.subsetOf(indexes)

	override def equals(obj: Any): Boolean = obj match {
	case x: Complex =>
	hash == x.hash && (isToken && x.isToken && token == x.token \|\| isWord && x.isWord && word == x.word)
	case _ => false
	}

	// Added for debug reasons.
	override def toString: String = {
	val idxs = wordIndexes.mkString(",")

	if (isToken && token.getId != "nlpcraft:nlp") s"'$origText' (${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
	}
	}

	object ComplexSeq {
	def apply(all: Seq[Complex]): ComplexSeq = ComplexSeq(all.filter(_.isToken), all.flatMap(_.wordIndexes).toSet)
	}

	case class ComplexSeq(tokensComplexes: Seq[Complex], wordsIndexes: Set[Int]) {
	private val (idxsSet: Set[Int], minIndex: Int, maxIndex: Int) = {
	val seq = tokensComplexes.flatMap(_.wordIndexes).distinct.sorted

	(seq.toSet, seq.head, seq.last)
	}

	def isIntersect(minIndex: Int, maxIndex: Int, idxsSet: Set[Int]): Boolean =
	if (this.minIndex > maxIndex \|\| this.maxIndex < minIndex)
	false
	else
	this.idxsSet.exists(idxsSet.contains)

	override def toString: String = tokensComplexes.mkString(" \| ")
	}

	case class ComplexHolder(complexesWords: Seq[Complex], complexes: Seq[ComplexSeq])

	/**
	*
	* @param parent Optional parent span.
	* @return
	*/
	override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ =>
	ackStarting()
	ackStarted()
	}

	/**
	*
	* @param parent Optional parent span.
	*/
	override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
	ackStopping()
	ackStopped()
	}

	def isComplex(mdl: NCProbeModel): Boolean = mdl.hasIdlSynonyms \|\| !mdl.model.getParsers.isEmpty

	/**
	*
	* @param ns
	* @param elem
	* @param toks
	* @param direct
	* @param syn
	* @param parts
	* @param metaOpt
	*/
	private def mark(
	ns: Sentence,
	elem: NCElement,
	toks: Seq[NlpToken],
	direct: Boolean,
	syn: Option[Synonym] = None,
	parts: Seq[TokType] = Seq.empty,
	metaOpt: Option[Map[String, Object]] = None
	): Unit = {
	val params = mutable.ArrayBuffer.empty[(String, AnyRef)]

	// For system elements.
	params += "direct" -> direct.asInstanceOf[AnyRef]

	syn match {
	case Some(s) =>
	if (s.isValueSynonym)
	params += "value" -> s.value
	case None => // No-op.
	}

	metaOpt match {
	case Some(meta) => params += "meta" -> meta
	case None => // No-op.
	}

	if (parts.nonEmpty)
	params += "parts" -> parts.map { case (p, kind) => NCTokenPartKey(p, kind) }.asJava

	val idxs = toks.map(_.index).sorted

	val note = NlpNote(idxs, elem.getId, params: _*)

	toks.foreach(_.add(note))

	// For NLP elements.
	toks.foreach(t => ns.fixNote(t.getNlpNote, "direct" -> direct))
	}

	/**
	*
	* @param mdl
	* @param ns
	* @param span
	* @param req
	*/
	private def processParsers(mdl: NCProbeModel, ns: Sentence, span: Span, req: NCRequestImpl): Unit = {
	for (parser <- mdl.model.getParsers.asScala) {
	parser.onInit()

	startScopedSpan("customParser", span,
	"srvReqId" -> ns.srvReqId,
	"mdlId" -> mdl.model.getId,
	"txt" -> ns.text
	) { _ =>
	def to(t: NlpToken): NCCustomWord =
	new NCCustomWord {
	override def getNormalizedText: String = t.normText
	override def getOriginalText: String = t.origText
	override def getStartCharIndex: Int = t.startCharIndex
	override def getEndCharIndex: Int = t.endCharIndex
	override def getPos: String = t.pos
	override def getPosDescription: String = t.posDesc
	override def getLemma: String = t.lemma
	override def getStem: String = t.stem
	override def isStopWord: Boolean = t.isStopWord
	override def isBracketed: Boolean = t.isBracketed
	override def isQuoted: Boolean = t.isQuoted
	override def isKnownWord: Boolean = t.isKnownWord
	override def isSwearWord: Boolean = t.isSwearWord
	override def isEnglish: Boolean = t.isEnglish
	}

	val res = parser.parse(
	req,
	mdl.model,
	ns.map(to).asJava,
	ns.flatten.distinct.filter(!_.isNlp).map(n => {
	val noteId = n.noteType
	val words = ns.filter(t => n.tokenIndexes.contains(t.index)).map(to).asJava
	val md = n.asMetadata()

	new NCCustomElement() {
	override def getElementId: String = noteId
	override def getWords: JList[NCCustomWord] = words
	override def getMetadata: JavaMeta = md.map(p => p._1 -> p._2.asInstanceOf[AnyRef]).asJava
	}
	}).asJava
	)

	if (res != null)
	res.asScala.foreach(e => {
	val elmId = e.getElementId
	val words = e.getWords

	if (elmId == null)
	throw new NCE(s"Custom model parser cannot return 'null' element ID.")

	if (words == null \|\| words.isEmpty)
	throw new NCE(s"Custom model parser cannot return empty custom tokens for element: $elmId")

	val matchedToks = words.asScala.map(w =>
	ns.find(t =>
	t.startCharIndex == w.getStartCharIndex && t.endCharIndex == w.getEndCharIndex
	).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w"))
	)

	// Checks element's tokens.
	if (!alreadyMarked(ns, elmId, matchedToks, matchedToks.map(_.index).sorted))
	mark(
	ns,
	elem = mdl.elements.getOrElse(elmId, throw new NCE(s"Custom model parser returned unknown element: $elmId")),
	toks = matchedToks,
	direct = true,
	metaOpt = Some(e.getMetadata.asScala.toMap)
	)
	})
	}

	parser.onDiscard()
	}
	}

	/**
	* Gets all sequential permutations of given tokens.
	*
	* For example, if buffer contains "a b c d" tokens, then this function will return the
	* sequence of following token sequences in this order:
	* "a b c d"
	* "a b c"
	* "b c d"
	* "a b"
	* "b c"
	* "c d"
	* "a"
	* "b"
	* "c"
	* "d"
	*
	* @param toks
	* @return
	*/
	private def combos[T](toks: Seq[T]): Seq[Seq[T]] =
	(for (n <- toks.size until 0 by -1) yield toks.sliding(n)).flatten.map(p => p)

	/**
	*
	* @param seq
	* @param s
	*/
	private def toParts(seq: Seq[NCIdlContent], s: Synonym): Seq[TokType] =
	seq.zip(s.map(_.kind)).flatMap {
	case (complex, kind) => if (complex.isLeft) Some(complex.swap.toOption.get -> kind) else None
	}

	/**
	*
	* @param tows
	* @param ns
	*/
	private def toTokens(tows: Seq[NCIdlContent], ns: Sentence): Seq[NlpToken] =
	(
	tows.filter(_.isRight).map(_.toOption.get) ++
	tows.filter(_.isLeft).map(_.swap.toOption.get).
	flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
	).sortBy(_.startCharIndex)

	/**
	*
	* @param m
	* @param id
	* @return
	*/
	private def get(m: Map[String , Seq[Synonym]], id: String): Seq[Synonym] = m.getOrElse(id, Seq.empty)

	/**
	* Gets synonyms sorted in descending order by their weight (already prepared),
	* i.e. first synonym in the sequence is the most important one.
	*
	* @param fastMap {Element ID -> {Synonym length -> T}}
	* @param elmId
	* @param len
	*/
	private def fastAccess[T](fastMap: Map[String, Map[Int, T]], elmId: String, len: Int): Option[T] =
	fastMap.getOrElse(elmId, Map.empty[Int, T]).get(len)

	/**
	*
	* @param mdl
	* @param ns
	*/
	private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
	val complexesWords = ns.map(Complex(_))

	val complexes =
	NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).
	map(_.asScala).
	par.
	flatMap(sen =>
	// Tokens splitting.
	// For example sentence "A B С D E" (5 words) processed as 3 tokens on first phase after collapsing
	// 'A B' (2 words), 'C D' (2 words) and 'E' (1 word)
	// So, result combinations will be:
	// Token(AB) + Token(CD) + Token(E)
	// Token(AB) + Word(C) + Word(D) + Token(E)
	// Word(A) + Word(B) + Token(CD) + Token(E)
	// Word(A) + Word(B) + Word(C) + Word(D) + Token(E)
	combos(sen).
	map(senPartComb => {
	sen.flatMap(t =>
	// Single word token is not split as words - token.
	// Partly (not strict in) token - word.
	if (t.wordIndexes.length == 1 \|\| senPartComb.contains(t))
	Seq(Complex(t))
	else
	t.wordIndexes.map(complexesWords)
	)
	// Drops without tokens (IDL part works with tokens).
	}).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
	).seq

	ComplexHolder(complexesWords, complexes)
	}

	/**
	*
	* @param h
	* @param toks
	*/
	private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: Set[Seq[Complex]]): Seq[Seq[Complex]] = {
	val idxs = toks.flatMap(_.wordIndexes).toSet

	h.complexes.par.
	flatMap(complexSeq => {
	val rec = complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxs.contains))

	// Drops without tokens (IDL part works with tokens).
	if (rec.nonEmpty) {
	val data = rec ++
	(complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.complexesWords)

	if (!cache.contains(data)) Some(data) else None
	}
	else
	None
	}).seq
	}

	private def add(
	dbgType: String,
	ns: Sentence,
	contCache: Cache,
	elem: NCElement,
	elemToks: Seq[NlpToken],
	sliceToksIdxs: Seq[Int],
	syn: Synonym,
	parts: Seq[TokType] = Seq.empty)
	: Unit = {
	val resIdxs = elemToks.map(_.index)
	val resIdxsSorted = resIdxs.sorted

	if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
	contCache(elem.getId) += sliceToksIdxs

	val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)

	if (ok)
	mark(ns, elem, elemToks, direct = syn.isDirect && U.isIncreased(resIdxs), syn = Some(syn), parts = parts)

	if (DEEP_DEBUG)
	logger.trace(
	s"${if (ok) "Added" else "Skipped"} element [" +
	s"id=${elem.getId}, " +
	s"type=$dbgType, " +
	s"text='${elemToks.map(_.origText).mkString(" ")}', " +
	s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
	s"allTokensIndexes=${sliceToksIdxs.mkString("[", ",", "]")}, " +
	s"synonym=$syn" +
	s"]"
	)
	}

	@throws[NCE]
	override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
	require(isStarted)

	startScopedSpan(
	"enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
	) { span =>
	val req = NCRequestImpl(senMeta, ns.srvReqId)
	val combToks = combos(ns.toSeq)
	lazy val ch = mkComplexes(mdl, ns)

	def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
	startScopedSpan(
	"execute", span, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
	) { _ =>
	if (DEEP_DEBUG)
	logger.trace(s"Execution started [simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]")

	val contCache = mutable.HashMap.empty ++
	mdl.elements.keys.map(k => k -> mutable.ArrayBuffer.empty[Seq[Int]])
	lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]

	for (
	toks <- combToks;
	idxs = toks.map(_.index);
	e <- mdl.elements.values;
	eId = e.getId
	if
	!contCache(eId).exists(_.containsSlice(idxs)) &&
	!alreadyMarked(ns, eId, toks, idxs)
	) {
	// 1. SIMPLE.
	if (simpleEnabled && (if (idlEnabled) mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
	lazy val tokStems = toks.map(_.stem).mkString(" ")

	// 1.1 Continuous.
	var found = false

	if (mdl.hasContinuousSynonyms)
	fastAccess(mdl.continuousSynonyms, eId, toks.length) match {
	case Some(h) =>
	def tryMap(syns: Map[String, Synonym], notFound: () => Unit): Unit =
	syns.get(tokStems) match {
	case Some(s) =>
	found = true
	add("simple continuous", ns, contCache, e, toks, idxs, s)
	case None => notFound()
	}

	def tryScan(syns: Seq[Synonym]): Unit =
	for (s <- syns if !found)
	if (s.isMatch(toks)) {
	found = true
	add("simple continuous scan", ns, contCache, e, toks, idxs, s)
	}

	tryMap(
	h.txtDirectSynonyms,
	() => {
	tryScan(h.notTxtDirectSynonyms)

	if (!found)
	tryMap(h.txtNotDirectSynonyms, () => tryScan(h.notTxtNotDirectSynonyms))
	}
	)
	case None => // No-op.
	}

	// 1.2 Sparse.
	if (!found && mdl.hasSparseSynonyms)
	for (s <- get(mdl.sparseSynonyms, eId))
	s.sparseMatch(toks) match {
	case Some(res) => add("simple sparse", ns, contCache, e, res, idxs, s)
	case None => // No-op.
	}
	}

	// 2. IDL.
	if (idlEnabled) {
	val allSyns = get(mdl.idlSynonyms, eId)
	lazy val allCombs = mkCombinations(ch, toks, idlCache.toSet)

	// 2.1 Continuous.

	if (!mdl.hasSparseSynonyms) {
	var found = false

	for (
	s <- allSyns;
	comb <- allCombs
	if !found;
	data = comb.map(_.data)
	)
	if (s.isMatch(data, req)) {
	add("IDL continuous", ns, contCache, e, toks, idxs, s, toParts(data, s))

	idlCache += comb

	found = true
	}
	}
	else
	// 2.2 Sparse.
	for (
	s <- allSyns;
	comb <- allCombs
	)
	s.sparseMatch(comb.map(_.data), req) match {
	case Some(res) =>
	val typ = if (s.sparse) "IDL sparse" else "IDL continuous"

	add(typ, ns, contCache, e, toTokens(res, ns), idxs, s, toParts(res, s))

	idlCache += comb
	case None => // No-op.
	}
	}
	}
	}

	if (ns.firstProbePhase) {
	ns.firstProbePhase = false

	if (mdl.hasNoIdlSynonyms)
	execute(simpleEnabled = true, idlEnabled = false)
	execute(simpleEnabled = mdl.hasNoIdlSynonyms, idlEnabled = mdl.hasIdlSynonyms)
	}
	else if (mdl.hasIdlSynonyms)
	execute(simpleEnabled = false, idlEnabled = true)

	processParsers(mdl, ns, span, req)
	}
	}

	// TODO: simplify, add tests, check model properties (sparse etc) for optimization.
	/**
	*
	* @param elmId
	* @param toks
	* @param sliceToksIdxsSorted
	*/
	private def alreadyMarked(ns: Sentence, elmId: String, toks: Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
	lazy val toksIdxsSorted = toks.map(_.index).sorted

	sliceToksIdxsSorted.map(ns).forall(_.exists(n => n.noteType == elmId && n.sparsity == 0)) \|\|
	toks.exists(_.exists(n =>
	n.noteType == elmId &&
	(
	(n.sparsity == 0 &&
	(sliceToksIdxsSorted.containsSlice(n.tokenIndexes) \|\| n.tokenIndexes.containsSlice(toksIdxsSorted))
	)
	\|\|
	(
	n.tokenIndexes == toksIdxsSorted \|\|
	n.tokenIndexes.containsSlice(toksIdxsSorted) &&
	U.isContinuous(toksIdxsSorted) &&
	U.isContinuous(n.tokenIndexes)
	)
	)
	))
	}
	}