Merge branch 'master' into NLPCRAFT-70_NEW

commit: 2ba59911423d8d69e2954aaed06944e3c841ea4a [log] [tgz]
author: Sergey Kamov <skhdlemail@gmail.com> Mon Aug 16 10:35:52 2021 +0300
committer: Sergey Kamov <skhdlemail@gmail.com> Mon Aug 16 10:35:52 2021 +0300
tree: 7a750fd9357979614edee4e6a61260654d4f9f53
parent: 3d2f865fe9af6f81bf80c41b8e16518782ac2742 [diff]
parent: e6de478d1b735922948eee54d4b778a5bd43a53f [diff]
diff --git a/nlpcraft/pom.xml b/nlpcraft/pom.xml
index 42726f7..adfbf52 100644
--- a/nlpcraft/pom.xml
+++ b/nlpcraft/pom.xml

@@ -232,6 +232,11 @@
             <groupId>org.jline</groupId>
             <artifactId>jline</artifactId>
         </dependency>
+        <!-- TODO: add this library licence description. -->
+        <dependency>
+            <groupId>org.jibx</groupId>
+            <artifactId>jibx-tools</artifactId>
+        </dependency>
 
         <!-- Test dependencies. -->
         <dependency>

diff --git a/nlpcraft/src/main/resources/log4j2.xml b/nlpcraft/src/main/resources/log4j2.xml
index d9a627b..44590c3 100644
--- a/nlpcraft/src/main/resources/log4j2.xml
+++ b/nlpcraft/src/main/resources/log4j2.xml

@@ -36,7 +36,7 @@
             <AppenderRef ref="stdout"/>
             <AppenderRef ref="stderr"/>
         </Root>
-        <Logger name="org.apache.nlpcraft" level="INFO" additivity="false">
+        <Logger name="org.apache.nlpcraft" level="${env:NLPCRAFT_LOG_LEVEL:-INFO}" additivity="false">
             <AppenderRef ref="stdout"/>
             <AppenderRef ref="stderr"/>
         </Logger>

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index f508745..7b5d058 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala

@@ -18,6 +18,7 @@
 package org.apache.nlpcraft.common.nlp
 
 import org.apache.nlpcraft.common._
+import org.apache.nlpcraft.server.mdo.NCCtxWordCategoriesConfigMdo
 
 import java.io.{Serializable => JSerializable}
 import java.util.{Collections, List => JList}
@@ -40,6 +41,7 @@
   * @param srvReqId Server request ID.
   * @param text Normalized text.
   * @param enabledBuiltInToks Enabled built-in tokens.
+  * @param ctxWordConfig Machine learning configuration. Optional.
   * @param tokens Initial buffer.
   * @param firstProbePhase Processing phase flag.
   * @param deletedNotes Deleted overridden notes with their tokens.
@@ -50,6 +52,8 @@
     val srvReqId: String,
     val text: String,
     val enabledBuiltInToks: Set[String],
+    val ctxWordConfig: Option[NCCtxWordCategoriesConfigMdo] = None,
+    var ctxWordCategories: Map[/** Token index*/Int, Map[/** Elements ID*/String, /** Confidence*/Double]] = Map.empty,
     override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
     var firstProbePhase: Boolean = true,
     private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
@@ -67,6 +71,7 @@
             srvReqId = srvReqId,
             text = text,
             enabledBuiltInToks = enabledBuiltInToks,
+            ctxWordConfig = ctxWordConfig,
             tokens = tokens.map(_.clone()),
             deletedNotes = deletedNotes.map(p => p._1.clone() -> p._2.map(_.clone())),
             initNlpNotes = initNlpNotes,

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/pos/NCPennTreebank.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/pos/NCPennTreebank.scala
index a61c63a..0c6e0de 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/pos/NCPennTreebank.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/pos/NCPennTreebank.scala

@@ -68,7 +68,9 @@
     final val SYNTH_POS_DESC = "Synthetic tag"
 
     // Useful POS tags sets.
-    final val NOUNS_POS = Seq("NN", "NNS", "NNP", "NNPS")
+    final val NOUNS_POS_PLURALS = Seq("NNS", "NNPS")
+    final val NOUNS_POS_SINGULAR = Seq("NN", "NNP")
+    final val NOUNS_POS = NOUNS_POS_PLURALS ++ NOUNS_POS_SINGULAR
     final val VERBS_POS = Seq("VB", "VBD", "VBG", "VBN", "VBP", "VBZ")
     final val WHS_POS = Seq("WDT", "WP", "WP$", "WRB")
     final val JJS_POS = Seq("JJ", "JJR", "JJS")

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala
index da194cc..e2b4bf1 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala

@@ -37,8 +37,8 @@
     final val U = NCUtils
     
     // Internal deep debug flag (more verbose tracing).
-    final val DEEP_DEBUG = false
-    
+    final val DEEP_DEBUG = U.isSysEnvSet("NLPCRAFT_DEEP_DEBUG")
+
     // Model and token **internal** metadata keys.
     final val TOK_META_ALIASES_KEY = "__NLPCRAFT_TOK_META_ALIASES"
     final val MDL_META_MODEL_CLASS_KEY = "__NLPCRAFT_MDL_CLASS_NAME"

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 9f5872a..0e119c0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java

@@ -382,4 +382,9 @@
     default Optional<Boolean> isSparse() {
         return Optional.empty();
     }
+
+    // TODO:
+    default Optional<Double> getCategoryConfidence() {
+        return Optional.empty();
+    }
 }

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index c313bf7..469858e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java

@@ -349,15 +349,20 @@
 
                         @Override
                         public Optional<Boolean> isPermutateSynonyms() {
-                            return nvl(js.isPermutateSynonyms(), proxy.isPermutateSynonyms());
+                            return nvlOpt(js.isPermutateSynonyms(), proxy.isPermutateSynonyms());
                         }
 
                         @Override
                         public Optional<Boolean> isSparse() {
-                            return nvl(js.isSparse(), proxy.isSparse());
+                            return nvlOpt(js.isSparse(), proxy.isSparse());
                         }
 
-                        private<T> Optional<T> nvl(T t, T dflt) {
+                        @Override
+                        public Optional<Double> getCategoryConfidence() {
+                            return Optional.ofNullable(js.getCategoryConfidence());
+                        }
+
+                        private<T> Optional<T> nvlOpt(T t, T dflt) {
                             return Optional.of(t != null ? t : dflt);
                         }
                     };

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 2bbc72a..2a44f39 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala

@@ -17,10 +17,9 @@
 
 package org.apache.nlpcraft.model.impl
 
-import java.text.SimpleDateFormat
+import java.text.{DecimalFormat, SimpleDateFormat}
 import java.util
 import java.util.{List => JList}
-
 import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.ascii._
@@ -38,6 +37,8 @@
 //noinspection DuplicatedCode
 object NCTokenLogger extends LazyLogging {
     case class NoteMetadata(noteType: String, filtered: Seq[String], isFull: Boolean)
+
+    private final val FMT_NUM = new DecimalFormat("#0.00000")
     
     // Order and sorting of notes for ASCII output.
     private final val NOTE_TYPES = Seq[String](
@@ -617,6 +618,11 @@
                                 if (parts.nonEmpty)
                                     s = s"$s, parts=[$parts]"
 
+                                t.meta(s"${t.getId}:confidence").asInstanceOf[java.lang.Double] match {
+                                    case null => // No-op.
+                                    case conf => s = s"$s, confidence=${FMT_NUM.format(conf)}"
+                                }
+
                                 s
                             }
 

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index addca45..8217a6a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java

@@ -36,6 +36,8 @@
     private Boolean isPermutateSynonyms;
     // Can be null.
     private Boolean isSparse;
+    // Can be null.
+    private Double categoryConfidence;
 
     public String getParentId() {
         return parentId;
@@ -97,4 +99,10 @@
     public void setSparse(Boolean sparse) {
         isSparse = sparse;
     }
+    public Double getCategoryConfidence() {
+        return categoryConfidence;
+    }
+    public void setCategoryConfidence(Double categoryConfidence) {
+        this.categoryConfidence = categoryConfidence;
+    }
 }

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index d2a4619..1c0add2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala

@@ -32,9 +32,10 @@
 import java.net.{InetAddress, NetworkInterface}
 import java.util
 import java.util.concurrent.CountDownLatch
-import java.util.{Properties, TimeZone}
+import java.util.{Collections, Properties, TimeZone}
 import scala.collection.mutable
-import scala.jdk.CollectionConverters.{SetHasAsJava, SetHasAsScala}
+import scala.compat.java8.OptionConverters.RichOptionalGeneric
+import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, SeqHasAsJava, SetHasAsJava, SetHasAsScala}
 
 /**
   * Probe down/up link connection manager.
@@ -214,6 +215,51 @@
                         NCModelManager.getAllModels().map(wrapper => {
                             val mdl = wrapper.model
 
+                            val (
+                                singleValues,
+                                corpus,
+                                categoriesElements
+                            ): (
+                                java.util.Map[String, java.util.Map[String, java.util.Set[String]]],
+                                java.util.Set[String],
+                                java.util.Map[String, java.lang.Double]
+                            ) = {
+                                val ctxCatElems = mdl.getElements.asScala.flatMap(e =>
+                                    e.getCategoryConfidence.asScala match {
+                                        case Some(v) => Some(e.getId -> v)
+                                        case None => None
+                                    }
+                                ).toMap
+
+                                if (ctxCatElems.isEmpty)
+                                    (Collections.emptyMap(), Collections.emptySet(), Collections.emptyMap())
+                                else {
+                                    val values =
+                                        mdl.getElements.
+                                            asScala.
+                                            filter(p => ctxCatElems.contains(p.getId)).
+                                            map(e =>
+                                        e.getId ->
+                                            e.getValues.asScala.map(p => p.getName -> {
+                                                val set: util.Set[String] =
+                                                    new util.HashSet(
+                                                        p.getSynonyms.asScala.filter(p => !p.contains(" ")).asJava
+                                                    )
+
+                                                set.add(p.getName)
+
+                                                set
+                                            }).toMap.asJava
+                                    ).toMap
+
+                                    (
+                                        values.asJava,
+                                        wrapper.samples.flatMap(_._2.flatMap(p => p)).asJava,
+                                        ctxCatElems.asJava
+                                    )
+                                }
+                            }
+
                             // Model already validated.
 
                             // util.HashSet created to avoid scala collections serialization error.
@@ -223,7 +269,10 @@
                                 mdl.getName,
                                 mdl.getVersion,
                                 new util.HashSet[String](mdl.getEnabledBuiltInTokens),
-                                new util.HashSet[String](mdl.getElements.asScala.map(_.getId).asJava)
+                                new util.HashSet[String](mdl.getElements.asScala.map(_.getId).asJava),
+                                singleValues,
+                                corpus,
+                                categoriesElements
                             )
                         })
                 ), cryptoKey)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 8d89477..bc7c5ed 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala

@@ -92,6 +92,9 @@
     private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
     private final val SUSP_SYNS_CHARS = Seq("?", "*", "+")
 
+    private final val MAX_CTXWORD_VALS_CNT = 10000
+    private final val MAX_CTXWORD_SAMPLES_CNT = 1000
+
     @volatile private var data: mutable.ArrayBuffer[NCProbeModel] = _
     @volatile private var mdlFactory: NCModelFactory = _
 
@@ -445,6 +448,44 @@
                   s"max=$maxCnt" +
               s"]")
 
+        // Validates context words parameters.
+        val elems = mdl.getElements.asScala
+
+        val ctxCatElems = elems.flatMap(e =>
+            e.getCategoryConfidence.asScala match {
+                case Some(v) => Some(e.getId -> v)
+                case None => None
+            }
+        ).toMap
+
+        if (ctxCatElems.nonEmpty) {
+            val ids = ctxCatElems.filter { case (_, conf) => conf < 0 || conf > 1  }.keys
+
+            if (ids.nonEmpty)
+                // TODO:
+                throw new NCE(s"Context word confidences are out of range (0..1) for elements : ${ids.mkString(", ")}")
+
+            val cnt =
+                elems.map(e =>
+                    if (e.getValues != null)
+                        e.getValues.asScala.map(
+                            p => if (p.getSynonyms != null) p.getSynonyms.asScala.count(!_.contains(" ")) else 0
+                        ).sum + 1 // 1 for value name.
+                    else
+                        0
+                ).sum
+
+            if (cnt > MAX_CTXWORD_VALS_CNT)
+                // TODO: do we need print recommended value.?
+                logger.warn(
+                    s"Too many values synonyms detected for context words elements [" +
+                        s"mdlId=$mdlId, " +
+                        s"cnt=$cnt," +
+                        s"recommendedMax=$MAX_CTXWORD_VALS_CNT" +
+                        s"]"
+                )
+        }
+
         // Discard value loaders.
         for (elm <- mdl.getElements.asScala)
             elm.getValueLoader.ifPresent(_.onDiscard())
@@ -530,11 +571,23 @@
         else
             logger.warn(s"Model has no intent: $mdlId")
 
-        def toMap(set: Set[SynonymHolder]): Map[String, Seq[NCProbeSynonym]] =
-            set.groupBy(_.elmId).map(p => p._1 -> p._2.map(_.syn).toSeq.sorted.reverse)
+        val samples = scanSamples(mdl)
+
+        if (ctxCatElems.nonEmpty && samples.size > MAX_CTXWORD_SAMPLES_CNT)
+            // TODO: do we need print recommended value.?
+            logger.warn(
+                s"Too many samples detected for context words elements [" +
+                    s"mdlId=$mdlId, " +
+                    s"cnt=${samples.size}," +
+                    s"recommended=$MAX_CTXWORD_SAMPLES_CNT" +
+                    s"]"
+            )
 
         val simple = idl(syns.toSet, idl = false)
 
+        def toMap(set: Set[SynonymHolder]): Map[String, Seq[NCProbeSynonym]] =
+            set.groupBy(_.elmId).map(p => p._1 -> p._2.map(_.syn).toSeq.sorted.reverse)
+
         NCProbeModel(
             model = mdl,
             solver = solver,
@@ -554,7 +607,7 @@
             exclStopWordsStems = exclStopWords,
             suspWordsStems = suspWords,
             elements = mdl.getElements.asScala.map(elm => (elm.getId, elm)).toMap,
-            samples = scanSamples(mdl)
+            samples = samples
         )
     }
 
@@ -1692,7 +1745,7 @@
                 s"origin=${mdl.getOrigin}, " +
                 s"intentIds=${unusedIntents.map(_.id).mkString("(", ", ", ")")}]"
             )
-        
+
         intents.toSet
     }
 

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 1f81711..67bacf0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala

@@ -29,6 +29,7 @@
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
 
 import java.io.Serializable
+import java.lang
 import java.util.{List => JList}
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -447,6 +448,18 @@
         startScopedSpan(
             "enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
         ) { span =>
+            if (ns.firstProbePhase)
+                for ((tokIdx, map) <- ns.ctxWordCategories; (elemId, conf) <- map)
+                    mark(
+                        ns = ns,
+                        elem =
+                            mdl.elements.find(_._1 == elemId).
+                            getOrElse(throw new NCE(s"Element not found: $elemId"))._2,
+                        toks = Seq(ns.tokens(tokIdx)),
+                        direct = true,
+                        metaOpt = Some(Map("confidence" -> lang.Double.valueOf(conf)))
+                    )
+
             val req = NCRequestImpl(senMeta, ns.srvReqId)
             val combToks = combos(ns.toSeq)
             lazy val ch = mkComplexes(mdl, ns)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index 1b6001b..a727912 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala

@@ -19,6 +19,14 @@
 
 import org.apache.nlpcraft.server.mdo.impl._
 
+@NCMdoEntity(sql = false)
+case class NCCtxWordCategoriesConfigMdo(
+    @NCMdoField probeId: String,
+    @NCMdoField modelId: String,
+    @NCMdoField singleValues: Map[String /*Element ID*/, Map[/*Value*/String, /*Synonym*/Set[String]]],
+    @NCMdoField corpus: Set[String],
+    @NCMdoField elements: Map[String /*Element ID*/, /*Confidence*/ Double]
+)
 /**
   * Probe model MDO.
   */
@@ -28,7 +36,8 @@
     @NCMdoField name: String,
     @NCMdoField version: String,
     @NCMdoField enabledBuiltInTokens: Set[String],
-    @NCMdoField elementIds: Set[String]
+    @NCMdoField elementIds: Set[String],
+    @NCMdoField ctxWordConfig: Option[NCCtxWordCategoriesConfigMdo]
 ) extends NCAnnotatedMdo[NCProbeModelMdo] {
     override def hashCode(): Int = s"$id$name".hashCode()
     

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 636b263..32e8909 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala

@@ -26,9 +26,11 @@
 import org.apache.nlpcraft.common.{NCService, _}
 import org.apache.nlpcraft.server.ignite.NCIgniteHelpers._
 import org.apache.nlpcraft.server.ignite.NCIgniteInstance
+import org.apache.nlpcraft.server.mdo.NCCtxWordCategoriesConfigMdo
 import org.apache.nlpcraft.server.nlp.core.{NCNlpNerEnricher, NCNlpServerManager}
 import org.apache.nlpcraft.server.nlp.enrichers.basenlp.NCBaseNlpEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.coordinate.NCCoordinatesEnricher
+import org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordCategoriesEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.geo.NCGeoEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.numeric.NCNumericEnricher
@@ -90,6 +92,7 @@
       * @param srvReqId Server request ID.
       * @param normTxt Normalized text.
       * @param enabledBuiltInToks Enabled built-in tokens.
+      * @param ctxWordCatConf Machine learning configuration.
       * @param parent Optional parent span.
       * @return
       */
@@ -97,9 +100,11 @@
         srvReqId: String,
         normTxt: String,
         enabledBuiltInToks: Set[String],
-        parent: Span = null): NCNlpSentence =
+        ctxWordCatConf: Option[NCCtxWordCategoriesConfigMdo],
+        parent: Span = null
+    ): NCNlpSentence =
         startScopedSpan("process", parent, "srvReqId" -> srvReqId, "txt" -> normTxt) { span =>
-            val s = new NCNlpSentence(srvReqId, normTxt, enabledBuiltInToks)
+            val s = new NCNlpSentence(srvReqId, normTxt, enabledBuiltInToks, ctxWordCatConf)
 
             // Server-side enrichment pipeline.
             // NOTE: order of enrichers is IMPORTANT.
@@ -121,6 +126,8 @@
                     NCCoordinatesEnricher.enrich(s, span)
             }
 
+            NCContextWordCategoriesEnricher.enrich(s, span)
+
             ner(s, enabledBuiltInToks)
 
             prepareAsciiTable(s).info(logger, Some(s"Server-side enrichment (built-in tokens only) for: '$normTxt'"))
@@ -134,6 +141,7 @@
       * @param srvReqId Server request ID.
       * @param txt Input text.
       * @param enabledBuiltInToks Set of enabled built-in token IDs.
+      * @param ctxWordCatConf Machine learning configuration.
       * @param parent Optional parent span.
       */
     @throws[NCE]
@@ -141,29 +149,34 @@
         srvReqId: String,
         txt: String,
         enabledBuiltInToks: Set[String],
-        parent: Span = null): NCNlpSentence = {
+        ctxWordCatConf: Option[NCCtxWordCategoriesConfigMdo],
+        parent: Span = null
+    ): NCNlpSentence = {
         startScopedSpan("enrichPipeline", parent, "srvReqId" -> srvReqId, "txt" -> txt) { span =>
             val normTxt = NCPreProcessManager.normalize(txt, spellCheck = true, span)
 
             if (normTxt != txt)
                 logger.info(s"Sentence normalized: $normTxt")
 
-            val normEnabledBuiltInToks = enabledBuiltInToks.map(_.toLowerCase)
+            def execute(): NCNlpSentence = process(srvReqId, normTxt, enabledBuiltInToks, ctxWordCatConf, span)
 
-            catching(wrapIE) {
-                cache(normTxt) match {
-                    case Some(h) =>
-                        if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
-                            prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'"))
+            if (U.isSysEnvSet("NLPCRAFT_DISABLE_SENTENCE_CACHE"))
+                execute()
+            else
+                catching(wrapIE) {
+                    cache(normTxt) match {
+                        case Some(h) =>
+                            if (h.enabledBuiltInTokens == enabledBuiltInToks.map(_.toLowerCase)) {
+                                prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'"))
 
-                            h.sentence
-                        }
-                        else
-                            process(srvReqId, normTxt, enabledBuiltInToks, span)
-                    case None =>
-                        process(srvReqId, normTxt, enabledBuiltInToks, span)
+                                h.sentence
+                            }
+                            else
+                                execute()
+                        case None =>
+                            execute()
+                    }
                 }
-            }
         }
     }
 
@@ -273,7 +286,8 @@
                 () => NCDateEnricher.start(span),
                 () => NCNumericEnricher.start(span),
                 () => NCGeoEnricher.start(span),
-                () => NCCoordinatesEnricher.start(span)
+                () => NCCoordinatesEnricher.start(span),
+                () => NCContextWordCategoriesEnricher.start(span)
             )
         }
 
@@ -291,6 +305,7 @@
         ackStopping()
 
         if (Config.isBuiltInEnrichers) {
+            NCContextWordCategoriesEnricher.stop(span)
             NCCoordinatesEnricher.stop(span)
             NCGeoEnricher.stop(span)
             NCNumericEnricher.stop(span)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
new file mode 100644
index 0000000..4a827a2
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala

@@ -0,0 +1,626 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.nlp.enrichers.ctxword
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.ascii.NCAsciiTable
+import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager.stem
+import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank._
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.{DEEP_DEBUG, NCE, NCService}
+import org.apache.nlpcraft.server.mdo.NCCtxWordCategoriesConfigMdo
+import org.apache.nlpcraft.server.nlp.core.{NCNlpParser, NCNlpServerManager, NCNlpWord}
+import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
+import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager, NCSuggestionRequest => Request, NCWordSuggestion => Suggestion}
+import org.jibx.schema.codegen.extend.DefaultNameConverter
+
+import java.text.DecimalFormat
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.Await
+import scala.concurrent.duration.Duration
+
+/**
+  * ContextWord enricher.
+  * Starting the server, set following environment variables for deep debugging.
+  *  - NLPCRAFT_LOG_LEVEL=TRACE
+  *  - NLPCRAFT_DEEP_DEBUG=true
+  *  - NLPCRAFT_DISABLE_SENTENCE_CACHE=true
+  */
+object NCContextWordCategoriesEnricher extends NCServerEnricher {
+    private final val MAX_CTXWORD_SCORE = 2
+    private final val INCL_MAX_CONFIDENCE = 1.0
+
+    private final val CONVERTER = new DefaultNameConverter
+    private final val FMT = new DecimalFormat("#0.00000")
+
+    private case class Reason(word: String, suggConf: Double, valOrCorpConf: Double) {
+        override def toString: String =
+            s"Word: $word, confidences: suggestion=${FMT.format(suggConf)}, value or corpus=${FMT.format(valOrCorpConf)}"
+    }
+
+    private case class Confidence(value: Double, reason: Option[Reason] = None) {
+        override def toString: String =
+            s"${FMT.format(value)}(${if (reason.isDefined) s"via:'${reason.get}'" else "direct"})}"
+    }
+
+    private case class ModelProbeKey(probeId: String, modelId: String)
+
+    private case class ElementConfidence(elementId: String, confidence: Confidence) {
+        override def toString: String = s"Element [id=$elementId, confidence=$confidence]]"
+    }
+
+    // Maps: Key is word, values are all element IDs.
+    private case class ValuesHolder(normal: Map[String, Set[String]], stems: Map[String, Set[String]]) {
+        private def map2Str(m: Map[String, Set[String]]): String =
+            m.toSeq.flatMap { case (v, elems) =>
+                elems.toSeq.map(_ -> v) }.groupBy { case (v, _) => v }.map { case (v, seq) => v -> toStr(seq.map(_._2))
+            }.mkString(", ")
+
+        override def toString: String = s"Values [normal=${map2Str(normal)}, stems=${map2Str(stems)}]"
+    }
+
+    // Maps: Key is elementID, values are all values synonyms for this element.
+    private case class ElementData(normals: Map[String, Double], stems: Map[String, Double], lemmas: Map[String, Double]) {
+        def get(norm: String, stem: String, lemma: String): Option[Double] =
+            normals.get(norm) match {
+                case Some(v) => Some(v)
+                case None =>
+                    stems.get(stem) match {
+                        case Some(v) => Some(v)
+                        case None => lemmas.get(lemma)
+                    }
+            }
+    }
+
+    // Service which responsible for all confidences calculations.
+    private object ConfMath {
+        /**
+          * Squeeze word's confidences values list (result of corpus processing) to single value.
+          *
+          * @param confs Word's confidences values for some.
+          * @return Calculated single value. `None` means that this word shouldn't ne taken into account for element.
+          */
+        def squeeze(confs: Seq[Double]): Option[Double] = {
+            // Drops if there is not enough data.
+            // For one element we have few samples. Each word should be offered few times.
+            if (confs.length < 3)
+                None
+            else {
+                // Takes 50% of most important (or first 2 at least) and calculates average value.
+                val n = Math.max((confs.length * 0.5).intValue(), 2)
+
+                val maxN = confs.sortBy(-_).take(n)
+
+                Some(maxN.sum / maxN.length)
+            }
+        }
+
+        /**
+          * Calculates confidence values based on suggested confidence for given word and corpus confidence.
+          *
+          * @param suggConf Suggestion confidence for noun of given sentence.
+          * @param corpusConf Corpus confidence which found via suggestion, co-reference.
+          */
+        def calculate(suggConf: Double, corpusConf: Double): Double =
+            // Corpus data is more important. Empirical factors configured.
+            calcWeightedGeoMean(Map(suggConf -> 1, corpusConf -> 2))
+
+        /**
+          * Calculates weighted geometrical mean value.
+          *
+          * @param vals2Weights Values with their weights.
+          */
+        private def calcWeightedGeoMean(vals2Weights: Map[Double, Double]): Double =
+            Math.pow(vals2Weights.map { case (v, weight) => Math.pow(v, weight) }.product, 1.0 / vals2Weights.values.sum)
+    }
+
+    @volatile private var valuesStems: mutable.HashMap[ModelProbeKey, ValuesHolder] = _
+    @volatile private var elemsCorpuses: mutable.HashMap[ModelProbeKey, Map[String, ElementData]] = _
+    @volatile private var parser: NCNlpParser = _
+
+    override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ =>
+        ackStarting()
+
+        valuesStems = mutable.HashMap.empty
+        elemsCorpuses = mutable.HashMap.empty
+        parser = NCNlpServerManager.getParser
+
+        ackStarted()
+    }
+
+    override def stop(parent: Span = null): Unit =
+        startScopedSpan("stop", parent) { _ =>
+            ackStopping()
+
+            parser = null
+            elemsCorpuses = null
+            valuesStems = null
+
+            ackStopped()
+        }
+
+    /**
+      *
+      * @param seq
+      * @return
+      */
+    private def toStr(seq: Seq[String]): String = seq.mkString("{ ", ", ", " }")
+
+    /**
+      *
+      * @param s
+      * @return
+      */
+    private def norm(s: String): String = s.toLowerCase
+
+    /**
+      *
+      * @param awaitable
+      * @tparam T
+      * @return
+      */
+    private def syncExec[T](awaitable: scala.concurrent.Awaitable[T]): T = Await.result(awaitable, Duration.Inf)
+
+    /**
+      *
+      * @param corpusNlpSeq
+      * @param elemSingleVals
+      * @return
+      */
+    private def mkRequests(corpusNlpSeq: Seq[Seq[NCNlpWord]], elemSingleVals: Set[String]): Iterable[Request] =
+        corpusNlpSeq.
+            flatMap {
+                corpusNlp =>
+                    lazy val corpusWords = corpusNlp.map(_.word)
+
+                    def getIndexes(corpVals: Seq[String], vals: Set[String]): Set[Int] =
+                        vals.flatMap(v => {
+                            val i = corpVals.indexOf(v)
+
+                            if (i >= 0) Some(i) else None
+                        })
+
+                    val elemSingleValsNorm = elemSingleVals.map(norm)
+                    val elemSingleValsStem = elemSingleVals.map(stem)
+
+                    val idxs =
+                        getIndexes(corpusNlp.map(_.normalWord), elemSingleValsNorm) ++
+                        getIndexes(corpusNlp.map(_.stem), elemSingleValsStem) ++
+                        // Sample can have word in plural forms.
+                        // We can compare them with synonyms values (suppose that model synonyms value defined as lemma)
+                        getIndexes(corpusNlp.map(p => norm(p.lemma)), elemSingleValsNorm)
+
+                    def mkRequest(idx: Int, syn: String): Request = {
+                        var newSen = substitute(corpusWords, syn, idx)
+
+                        val nlpWordsNew = parser.parse(newSen.mkString(" "))
+
+                        require(corpusWords.size == nlpWordsNew.size)
+
+                        val pos = corpusNlp(idx).pos
+                        val posNew = nlpWordsNew(idx).pos
+
+                        if (NOUNS_POS_SINGULAR.contains(pos) && NOUNS_POS_PLURALS.contains(posNew))
+                            newSen = substitute(corpusWords, CONVERTER.depluralize(syn), idx)
+                        else if (NOUNS_POS_PLURALS.contains(pos) && NOUNS_POS_SINGULAR.contains(posNew))
+                            newSen = substitute(corpusWords, CONVERTER.pluralize(syn), idx)
+
+                        Request(newSen, idx)
+                    }
+
+                    for (idx <- idxs; syn <- elemSingleVals)
+                        yield mkRequest(idx, syn)
+            }
+
+    /**
+      * Context word server returned values have confidence in range (0..2).
+      *
+      * @param conf Context word server confidence value.
+      */
+    private def normalizeConf(conf: Double): Double = conf / MAX_CTXWORD_SCORE
+
+    /**
+      *
+      * @param cfg
+      * @param key
+      * @param vh
+      * @param parent
+      * @return
+      */
+    private def getCorpusData(cfg: NCCtxWordCategoriesConfigMdo, key: ModelProbeKey, vh: ValuesHolder, parent: Span = null):
+        Map[/** Element ID */String, ElementData] =
+        elemsCorpuses.synchronized { elemsCorpuses.get(key) } match {
+            case Some(cache) => cache
+            case None =>
+                val res = askSamples(cfg, vh, parent)
+
+                elemsCorpuses.synchronized { elemsCorpuses += key -> res }
+
+                res
+        }
+
+    /**
+      *
+      * @param cfg
+      * @param key
+      * @return
+      */
+    private def getValuesData(cfg: NCCtxWordCategoriesConfigMdo, key: ModelProbeKey): ValuesHolder =
+        valuesStems.synchronized { valuesStems.get(key) } match {
+            case Some(cache) => cache
+            case None =>
+                def mkMap(convert: String => String): Map[String, Set[String]] =
+                    cfg.singleValues.
+                        flatMap { case (elemId, vals) => vals.map { case (_, vals) => vals.map(convert(_) -> elemId) } }.
+                        flatten.
+                        groupBy { case (converted, _) => converted }.
+                        map { case (converted, map) => converted -> map.map { case (_, elemId) => elemId }.toSet }
+
+                val normsMap = mkMap(norm)
+                val stemsMap = mkMap(stem)
+
+                val h = ValuesHolder(normal = normsMap, stems = stemsMap.filter(p => !normsMap.keySet.contains(p._1)))
+
+                valuesStems.synchronized { valuesStems += key -> h }
+
+                h
+        }
+
+    /**
+      *
+      * @param words
+      * @param word
+      * @param index
+      * @return
+      */
+    private def substitute(words: Seq[String], word: String, index: Int): Seq[String] = {
+        require(index < words.length)
+
+        words.zipWithIndex.map { case (w, i) => if (i != index) w else word }
+    }
+
+    /**
+      *
+      * @param req
+      * @param sugg
+      * @return
+      */
+    private def getLemma(req: Request, sugg: Suggestion): String =
+        parser.parse(substitute(req.words, sugg.word, req.index).mkString(" "))(req.index).lemma
+
+    /**
+      *
+      * @param cfg
+      * @param vh
+      * @param parent
+      */
+    @throws[NCE]
+    private def askSamples(cfg: NCCtxWordCategoriesConfigMdo, vh: ValuesHolder, parent: Span = null):
+        Map[/** Element ID */String, ElementData] = {
+        val corpusNlp = cfg.corpus.toSeq.map(s => parser.parse(s))
+
+        val recs: Map[String, Seq[Request]] =
+            (
+                for (
+                    (elemId, elemSingleVals) <- cfg.singleValues.toSeq;
+                    elemSingleValsSet = elemSingleVals.flatMap(_._2).toSet;
+                    suggReq <- mkRequests(corpusNlp, elemSingleValsSet)
+                ) yield (elemId, suggReq)
+            ).
+                groupBy { case (elemId, _) => elemId }.
+                map { case (elemId, m) => elemId -> m.map(_._2) }
+
+        if (recs.nonEmpty) {
+            val respsSeq: Seq[(Request, Seq[Suggestion])] =
+                syncExec(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2).toSeq, parent = parent)).
+                    toSeq.sortBy(p => (p._1.words.mkString, p._1.index))
+
+            if (DEEP_DEBUG) {
+                val t = NCAsciiTable()
+
+                t #= ("Request", "Responses")
+
+                for ((req, resp) <- respsSeq)
+                    t += (req, s"${resp.map(p => s"${p.word}=${FMT.format(normalizeConf(p.score))}").mkString(", ")}")
+
+                t.trace(logger, Some("Corpus requests:"))
+            }
+
+            val req2Elem = recs.flatMap { case (elemId, recs) => recs.map(p => p -> elemId) }
+
+            def mkMap(convert: (Request, Suggestion) => String):
+                Map[/** Element ID */ String, /** Word key */ Map[String, /** Confidences */ Seq[Double]]] = {
+                val seq: Seq[(String, Map[String, Double])] =
+                    respsSeq.
+                        map { case (req, suggs) =>
+                            (
+                                req2Elem(req),
+                                suggs.groupBy(sygg => convert(req, sygg)).
+                                    // If different word forms have different confidence (`Abc`- 0.9, `abc`- 0.7),
+                                    // we use maximum (0.9).
+                                    map { case (key, suggs) => key -> suggs.map(p => normalizeConf(p.score)).max }
+                            )
+                        }
+                seq.
+                    groupBy { case (elemId, _) => elemId }.
+                    map { case (elemId, data) =>
+                        elemId ->
+                            data.flatMap(_._2).
+                                groupBy { case (word, _) => word }.
+                                map { case (word, data) => word -> data.map { case (_, confs) => confs } }
+                    }
+            }
+
+            val normals = mkMap { (_, sugg) => norm(sugg.word) }
+            val stems = mkMap { (_, sugg) => stem(sugg.word) }
+            val lemmas = mkMap { (req, sugg) => getLemma(req, sugg) }
+
+            def mkTable(): NCAsciiTable =
+                if (DEEP_DEBUG) {
+                    val t = NCAsciiTable()
+
+                    t #= ("Element", "Confidences for normal forms")
+
+                    t
+                }
+                else
+                    null
+
+            val (tabAll, tabNorm) = (mkTable(), mkTable())
+
+            val res =
+                (normals.keySet ++ stems.keySet ++ lemmas.keySet).map(elemId =>
+                    elemId -> {
+                        def get[T, K](m: Map[String, Map[T, K]]): Map[T, K] = m.getOrElse(elemId, Map.empty)
+
+                        (get(normals), get(stems), get(lemmas))
+                    }
+                ).
+                    toMap.
+                    map { case (elemId, (normals, stems, lemmas)) =>
+                        // Skips suggestions, which already exists as values for element.
+                        def dropValues[T](words: Map[String, Seq[Double]], vals: Map[String, Set[String]]):
+                            Map[String, Seq[Double]] =
+                            words.filter { case (word, _) => vals.get(word) match {
+                                case Some(elemIds) => !elemIds.contains(elemId)
+                                case None => true
+                            }}
+
+                        val normalsAll = dropValues(normals, vh.normal)
+                        val stemsAll = dropValues(stems -- normalsAll.keySet, vh.stems)
+                        val lemmasAll = lemmas -- normals.keySet -- stemsAll.keySet
+
+                        def mkDebugElementCell(normsSize: Int, stemsSize: Int, lemmasSize: Int): String =
+                            s"Element: $elemId [normals=$normsSize, stems=$stemsSize, lemmas=$lemmasSize]"
+
+                        if (DEEP_DEBUG)
+                            tabAll += (
+                                mkDebugElementCell(normalsAll.size, stemsAll.size, lemmasAll.size),
+                                toStr(
+                                    normalsAll.toSeq.
+                                        sortBy(p => (-p._2.max, -p._2.size)).map(
+                                        { case (k, confs) => s"$k=${toStr(confs.sortBy(-_).map(p => FMT.format(p)))}" }
+                                    )
+                                )
+                            )
+
+                        def squeeze(map: Map[String, Seq[Double]]): Map[String, Double] =
+                            map.flatMap { case (wordKey, confs) =>
+                                ConfMath.squeeze(confs) match {
+                                    case Some(conf) => Some(wordKey -> conf)
+                                    case None => None
+                                }
+                            }
+
+                        val normalsSingle = squeeze(normalsAll)
+                        val stemsSingle = squeeze(stemsAll)
+                        val lemmasSingle = squeeze(lemmasAll)
+
+                        if (DEEP_DEBUG)
+                            tabNorm += (
+                                mkDebugElementCell(normalsSingle.size, stemsSingle.size, lemmasSingle.size),
+                                toStr(
+                                    normalsSingle.toSeq.sortBy(-_._2).map(
+                                        { case (k, factor) => s"$k=${FMT.format(factor)}" }
+                                    )
+                                )
+                            )
+
+                        elemId -> ElementData(normalsSingle, stemsSingle, lemmasSingle)
+                    }
+
+            if (DEEP_DEBUG) {
+                tabAll.trace(logger, Some("Model corpus all confidences:"))
+                tabNorm.trace(logger, Some("Model corpus normalized confidences:"))
+            }
+
+            res
+        }
+        else
+            Map.empty[String, ElementData]
+    }
+
+    override def enrich(ns: NCNlpSentence, parent: Span): Unit =
+        startScopedSpan("enrich", parent) { _ =>
+            ns.ctxWordConfig match {
+                case Some(cfg) =>
+                    val detected = mutable.HashMap.empty[NCNlpSentenceToken, mutable.HashSet[ElementConfidence]]
+
+                    def add(nounTok: NCNlpSentenceToken, elemId: String, conf: Confidence): Unit = {
+                        val tokElems = detected.getOrElseUpdate(nounTok, mutable.HashSet.empty[ElementConfidence])
+
+                        tokElems.find(_.elementId == elemId) match {
+                            case Some(exConf) =>
+                                if (conf.value > exConf.confidence.value) {
+                                    tokElems += ElementConfidence(elemId, conf)
+                                    tokElems -= exConf
+                                }
+                            case None =>
+                                tokElems += ElementConfidence(elemId, conf)
+                        }
+                    }
+
+                    val nouns = ns.tokens.filter(t => NOUNS_POS.contains(t.pos))
+
+                    if (nouns.nonEmpty) {
+                        val key = ModelProbeKey(cfg.probeId, cfg.modelId)
+
+                        // 1. Values. Direct.
+                        val vh = getValuesData(cfg, key)
+
+                        val (vNorms, vStems) = (vh.normal, vh.stems)
+
+                        if (DEEP_DEBUG)
+                            logger.trace(
+                                s"Model loaded [" +
+                                s"key=$key, elements: " +
+                                s"${cfg.elements.mkString(", ")}, " +
+                                s"values data=$vh]"
+                            )
+
+                        def get(m: Map[String, Set[String]], key: String): Set[String] = m.getOrElse(key, Set.empty)
+
+                        for (
+                            n <- nouns;
+                            elemId <- get(vNorms, n.normText) ++ get(vNorms, norm(n.lemma)) ++ get(vStems, n.stem)
+                        )
+                            add(n, elemId, Confidence(INCL_MAX_CONFIDENCE))
+
+                        // 2. Via corpus.
+                        val corpusData = getCorpusData(cfg, key, vh, parent)
+
+                        for (
+                            nounTok <- nouns;
+                            (elemId, elemData) <- corpusData;
+                            confOpt = elemData.get(nounTok.normText, nounTok.stem, nounTok.lemma)
+                            if confOpt.isDefined && confOpt.get >= cfg.elements(elemId)
+                        )
+                            add(nounTok, elemId, Confidence(confOpt.get))
+
+                        // 3. Ask for sentence (via co-references)
+                        val idxs = ns.tokens.flatMap(p => if (p.pos.startsWith("N")) Some(p.index) else None).toSeq
+                        val reqs = idxs.map(idx => Request(ns.tokens.map(_.origText).toSeq, idx))
+
+                        val resps: Map[Suggestion, Request] =
+                            syncExec(NCSuggestSynonymManager.suggestWords(reqs, parent = parent)).
+                                flatMap { case (req, suggs) => suggs.map(_ -> req) }
+
+                        if (DEEP_DEBUG) {
+                            val t = NCAsciiTable()
+
+                            t #= ("Request", "Responses")
+
+                            resps.toSeq.groupBy(_._2.index).foreach { case (_, seq) =>
+                                val sorted = seq.sortBy(-_._1.score)
+
+                                t += (
+                                    sorted.head._2,
+                                    s"${
+                                        sorted.map(_._1).
+                                            map(p => s"${p.word}=${FMT.format(normalizeConf(p.score))}").
+                                            mkString(", ")
+                                    }"
+                                )
+                            }
+
+                            t.trace(logger, Some(s"Sentence requests processing [key=$key, sentence=${ns.text}]"))
+                        }
+
+                        case class Key(elementId: String, token: NCNlpSentenceToken)
+
+                        val missed = if (DEEP_DEBUG) mutable.HashMap.empty[Key, ArrayBuffer[Confidence]] else null
+
+                        def calcConf(elemId: String, data: ElementData, req: Request, s: Suggestion): Option[Double] = {
+                            val suggNorm = norm(s.word)
+                            val suggStem = stem(s.word)
+
+                            if (
+                                vh.normal.getOrElse(suggNorm, Set.empty).contains(elemId) ||
+                                vh.stems.getOrElse(suggStem, Set.empty).contains(elemId)
+                            )
+                                Some(1.0)
+                            else
+                                data.get(norm = suggNorm, stem = suggStem, lemma = getLemma(req, s))
+                        }
+
+                        for (
+                            // Token index (tokIdx) should be correct because request created from original words,
+                            // separated by space, and Suggestion Manager uses space tokenizer.
+                            (sugg, req) <- resps.toSeq.sortBy(_._2.index);
+                            suggConf = normalizeConf(sugg.score);
+                            (elemId, elemData) <- corpusData;
+                            elemConf = cfg.elements(elemId);
+                            valOrCorpConfOpt = calcConf(elemId, elemData, req, sugg)
+                            if valOrCorpConfOpt.isDefined;
+                            valOrCorpConf = valOrCorpConfOpt.get;
+                            normConf = ConfMath.calculate(suggConf, valOrCorpConf)
+                        ) {
+                            def mkConf(): Confidence = Confidence(normConf, Some(Reason(sugg.word, suggConf, valOrCorpConf)))
+                            def getToken: NCNlpSentenceToken = ns.tokens(req.index)
+
+                            if (normConf >= elemConf)
+                                add(getToken, elemId, mkConf())
+                            else if (DEEP_DEBUG)
+                                missed.getOrElseUpdate(Key(elemId, getToken), mutable.ArrayBuffer.empty) += mkConf()
+                        }
+
+                        ns.ctxWordCategories = detected.map {
+                            case (tok, confs) => tok.index -> confs.map(p => p.elementId -> p.confidence.value).toMap
+                        }.toMap
+
+                        if (DEEP_DEBUG) {
+                            require(missed != null)
+
+                            missed.filter { case (key, _) =>
+                                !detected.exists {
+                                    case (tok, confs) => confs.exists(conf => Key(conf.elementId, tok) == key)
+                                }
+                            }.sortBy { case (key, _) => (key.token.index, key.elementId) }.
+                                foreach { case (key, confs) =>
+                                    logger.trace(
+                                        s"Unsuccessful attempt [" +
+                                        s"elementId=${key.elementId}, " +
+                                        s"tokenWordIndexes=${key.token.wordIndexes.mkString(",")}, " +
+                                        s"confidences=${confs.sortBy(-_.value).mkString(", ")}" +
+                                        s"]"
+                                    )
+                                }
+
+                            logger.trace("Sentence detected elements:")
+
+                            for ((tok, elems) <- detected)
+                                logger.trace(s"${tok.origText}: ${elems.mkString(", ")}")
+                        }
+                    }
+
+                case None => // No-op.
+            }
+        }
+
+    /**
+      *
+      * @param probeId
+      * @param parent
+      */
+    def onDisconnectProbe(probeId: String, parent: Span = null): Unit =
+        startScopedSpan("onDisconnectProbe", parent) { _ =>
+            valuesStems.synchronized { valuesStems --= valuesStems.keySet.filter(_.probeId == probeId) }
+            elemsCorpuses.synchronized { elemsCorpuses --= elemsCorpuses.keySet.filter(_.probeId == probeId) }
+        }
+}
\ No newline at end of file

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index cd0d5a5..7f8437e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala

@@ -32,8 +32,9 @@
 import org.apache.nlpcraft.common.{NCService, _}
 import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
 import org.apache.nlpcraft.server.company.NCCompanyManager
-import org.apache.nlpcraft.server.mdo.{NCCompanyMdo, NCProbeMdo, NCProbeModelMdo, NCUserMdo}
+import org.apache.nlpcraft.server.mdo._
 import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnrichmentManager
+import org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordCategoriesEnricher
 import org.apache.nlpcraft.server.proclog.NCProcessLogManager
 import org.apache.nlpcraft.server.query.NCQueryManager
 import org.apache.nlpcraft.server.sql.NCSql
@@ -264,6 +265,9 @@
 
                         // Clears unused models.
                         mdls --= mdls.keys.filter(id => !probes.exists { case (_, p) => p.probe.models.exists(_.id == id) })
+
+                        // TODO: add new interface for server enrichers? (services)
+                        NCContextWordCategoriesEnricher.onDisconnectProbe(probeKey.probeId)
                 }
 
             case Some(hld) =>
@@ -603,7 +607,8 @@
             s"probeToken=$probeTkn, " +
             s"probeId=$probeId, " +
             s"proveGuid=$probeGuid" +
-            s"]")
+            s"]"
+        )
     
         if (isMultipleProbeRegistrations(probeKey))
             respond("S2P_PROBE_MULTIPLE_INSTANCES")
@@ -621,7 +626,10 @@
                             String,
                             String,
                             java.util.Set[String],
-                            java.util.Set[String]
+                            java.util.Set[String],
+                            java.util.Map[String, java.util.Map[String, java.util.Set[String]]],
+                            java.util.Set[String],
+                            java.util.Map[String, Double]
                         )]]("PROBE_MODELS").
                         map {
                             case (
@@ -629,20 +637,44 @@
                                 mdlName,
                                 mdlVer,
                                 enabledBuiltInToks,
-                                elmIds
+                                elmIds,
+                                singleValues,
+                                corpus,
+                                categoriesElements
                             ) =>
                                 require(mdlId != null)
                                 require(mdlName != null)
                                 require(mdlVer != null)
                                 require(enabledBuiltInToks != null)
                                 require(elmIds != null)
+                                require(singleValues.isEmpty && corpus.isEmpty || !singleValues.isEmpty && !corpus.isEmpty)
 
                                 NCProbeModelMdo(
                                     id = mdlId,
                                     name = mdlName,
                                     version = mdlVer,
                                     enabledBuiltInTokens = enabledBuiltInToks.asScala.toSet,
-                                    elementIds = elmIds.asScala.toSet
+                                    elementIds = elmIds.asScala.toSet,
+                                    ctxWordConfig =
+                                        if (!singleValues.isEmpty) {
+                                            Some(
+                                                NCCtxWordCategoriesConfigMdo(
+                                                    probeId = probeId,
+                                                    modelId = mdlId,
+                                                    singleValues = singleValues.asScala.map {
+                                                        case (elemId, map) =>
+                                                            elemId ->
+                                                                map.asScala.map {
+                                                                    case (value, syns) => value -> syns.asScala.toSet
+                                                                }.toMap
+                                                    }.toMap,
+                                                    corpus = corpus.asScala.toSet,
+                                                    elements = categoriesElements.asScala.toMap
+                                                )
+                                            )
+                                        }
+                                        else
+                                            None
                                 )
                         }.toSet
 
@@ -710,7 +742,7 @@
         else
             logger.warn(s"Message ignored: $probeMsg")
     }
-    
+
     /**
       * Processes the messages received from the probe.
       *

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
index a12a4e8..301bc59 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala

@@ -272,7 +272,7 @@
 
                 logger.info(s"New user request received:\n$tbl")
 
-                val enabledBuiltInToks = NCProbeManager.getModel(mdlId, span).enabledBuiltInTokens
+                val mdl = NCProbeManager.getModel(mdlId, span)
 
                 @throws[NCE]
                 def unzipProperties(gzipOpt: Option[String]): Option[JavaMeta] =
@@ -288,7 +288,7 @@
                     company,
                     mdlId,
                     txt0,
-                    NCServerEnrichmentManager.enrichPipeline(srvReqId, txt0, enabledBuiltInToks),
+                    NCServerEnrichmentManager.enrichPipeline(srvReqId, txt0, mdl.enabledBuiltInTokens, mdl.ctxWordConfig),
                     usrAgent,
                     rmtAddr,
                     data,

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
index 2bbe409..3530e26 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala

@@ -809,7 +809,7 @@
 
                 checkModelId(req.mdlId, admUsr.companyId)
 
-                val fut = NCSuggestSynonymManager.suggest(req.mdlId, req.minScore, span)
+                val fut = NCSuggestSynonymManager.suggestModel(req.mdlId, req.minScore, span)
 
                 successWithJs(
                     fut.collect {

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index d89ba98..20b8f70 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala

@@ -24,7 +24,7 @@
 import org.apache.http.client.ResponseHandler
 import org.apache.http.client.methods.HttpPost
 import org.apache.http.entity.StringEntity
-import org.apache.http.impl.client.HttpClients
+import org.apache.http.impl.client.{CloseableHttpClient, HttpClients}
 import org.apache.http.util.EntityUtils
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.config.NCConfigurable
@@ -55,7 +55,7 @@
     private final val MIN_CNT_MODEL = 20
 
     private final val GSON = new Gson
-    private final val TYPE_RESP = new TypeToken[util.List[util.List[Suggestion]]]() {}.getType
+    private final val TYPE_RESP = new TypeToken[util.List[util.List[NCWordSuggestion]]]() {}.getType
     private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
 
     private implicit final val ec: ExecutionContext = NCThreadPoolManager.getSystemContext
@@ -64,7 +64,7 @@
         val urlOpt: Option[String] = getStringOpt("nlpcraft.server.ctxword.url")
     }
 
-    private final val HANDLER: ResponseHandler[Seq[Seq[Suggestion]]] =
+    private final val HANDLER: ResponseHandler[Seq[Seq[NCWordSuggestion]]] =
         (resp: HttpResponse) => {
             val code = resp.getStatusLine.getStatusCode
             val e = resp.getEntity
@@ -76,9 +76,9 @@
 
             code match {
                 case 200 =>
-                    val data: util.List[util.List[Suggestion]] = GSON.fromJson(js, TYPE_RESP)
+                    val data: util.List[util.List[NCWordSuggestion]] = GSON.fromJson(js, TYPE_RESP)
 
-                    data.asScala.map(p => if (p.isEmpty) Seq.empty else p.asScala.tail.toSeq).toSeq
+                    data.asScala.map(p => if (p.isEmpty) Seq.empty else p.asScala.toSeq).toSeq
 
                 case _ =>
                     throw new NCE(
@@ -90,9 +90,14 @@
             }
         }
 
-    case class Suggestion(word: String, score: Double)
+
     case class RequestData(sentence: String, ex: String, elmId: String, index: Int)
     case class RestRequestSentence(text: String, indexes: util.List[Int])
+    object RestRequestSentence {
+        def apply(text: String, index: Int): RestRequestSentence = new RestRequestSentence(text, Seq(index).asJava)
+
+
+    }
     case class RestRequest(sentences: util.List[RestRequestSentence], limit: Int, minScore: Double)
     case class Word(word: String, stem: String) {
         require(!word.contains(" "), s"Word cannot contains spaces: $word")
@@ -111,6 +116,19 @@
     private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
     private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
 
+    @throws[NCE]
+    private def mkUrl = s"${Config.urlOpt.getOrElse(throw new NCE("Context word server is not configured."))}/suggestions"
+
+    private def request(cli: CloseableHttpClient, post: HttpPost): Seq[Seq[NCWordSuggestion]] = {
+        val resps: Seq[Seq[NCWordSuggestion]] =
+            try
+                cli.execute(post, HANDLER)
+            finally
+                post.releaseConnection()
+
+        resps
+    }
+
     /**
      *
      * @param seq1
@@ -131,14 +149,14 @@
     }
 
     /**
-     *
+     * TODO: refactor async call (waiting should be dropped.)
      * @param mdlId
      * @param minScoreOpt
      * @param parent
      * @return
      */
-    def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] =
-        startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ =>
+    def suggestModel(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] =
+        startScopedSpan("suggestModel", parent, "mdlId" -> mdlId) { _ =>
             val now = U.now()
 
             val promise = Promise[NCSuggestSynonymResult]()
@@ -178,7 +196,7 @@
                         if (mdlExs.isEmpty)
                             onError(s"Missed intents samples for: `$mdlId``")
                         else {
-                            val url = s"${Config.urlOpt.getOrElse(throw new NCE("Context word server is not configured."))}/suggestions"
+                            val url = mkUrl
 
                             val allSamplesCnt = mdlExs.map { case (_, samples) => samples.size }.sum
 
@@ -281,9 +299,9 @@
                             if (allReqsCnt == 0)
                                 onError(s"Suggestions cannot be generated for model: '$mdlId'")
                             else {
-                                val allSgsts = new ConcurrentHashMap[String, util.List[Suggestion]]()
+                                val allSgsts = new ConcurrentHashMap[String, util.List[NCWordSuggestion]]()
                                 val cdl = new CountDownLatch(1)
-                                val debugs = mutable.HashMap.empty[RequestData, Seq[Suggestion]]
+                                val debugs = mutable.HashMap.empty[RequestData, Seq[NCWordSuggestion]]
                                 val cnt = new AtomicInteger(0)
 
                                 val cli = HttpClients.createDefault
@@ -299,7 +317,7 @@
                                                 new StringEntity(
                                                     GSON.toJson(
                                                         RestRequest(
-                                                            sentences = batch.map(p => RestRequestSentence(p.sentence, Seq(p.index).asJava)).asJava,
+                                                            sentences = batch.map(p => RestRequestSentence(p.sentence, p.index)).asJava,
                                                             minScore = 0,
                                                             limit = MAX_LIMIT
                                                         )
@@ -308,10 +326,7 @@
                                                 )
                                             )
 
-                                            val resps: Seq[Seq[Suggestion]] = try
-                                                cli.execute(post, HANDLER)
-                                            finally
-                                                post.releaseConnection()
+                                            val resps = request(cli, post)
 
                                             require(batch.size == resps.size, s"Batch: ${batch.size}, responses: ${resps.size}")
 
@@ -322,7 +337,7 @@
                                             logger.debug(s"Executed: $i requests...")
 
                                             allSgsts.
-                                                computeIfAbsent(elmId, (_: String) => new CopyOnWriteArrayList[Suggestion]()).
+                                                computeIfAbsent(elmId, (_: String) => new CopyOnWriteArrayList[NCWordSuggestion]()).
                                                 addAll(resps.flatten.asJava)
 
                                             if (i == allReqsCnt)
@@ -441,6 +456,74 @@
         }
 
     /**
+      *
+      * @param reqs
+      * @param minScoreOpt
+      * @param parent
+      * @return
+      */
+    def suggestWords(reqs: Seq[NCSuggestionRequest], minScoreOpt: Option[Double] = None, parent: Span = null):
+        Future[Map[NCSuggestionRequest, Seq[NCWordSuggestion]]] =
+        startScopedSpan("suggestWords", parent) { _ =>
+            val promise = Promise[Map[NCSuggestionRequest, Seq[NCWordSuggestion]]]()
+
+            case class Result(request: NCSuggestionRequest, suggestions: Seq[NCWordSuggestion])
+
+            val data = new CopyOnWriteArrayList[Result]()
+            val cli = HttpClients.createDefault
+            val batches = reqs.sliding(BATCH_SIZE, BATCH_SIZE).map(_.toSeq).toSeq
+            val cnt = new AtomicInteger(0)
+
+            for (batch <- batches)
+                U.asFuture(
+                    _ => {
+                        val post = new HttpPost(mkUrl)
+
+                        post.setHeader("Content-Type", "application/json")
+                        post.setEntity(
+                            new StringEntity(
+                                GSON.toJson(
+                                    RestRequest(
+                                        sentences = batch.map(p => RestRequestSentence(p.words.mkString(" "), p.index)).asJava,
+                                        minScore = 0,
+                                        limit = MAX_LIMIT
+                                    )
+                                ),
+                                "UTF-8"
+                            )
+                        )
+
+                        val resps = request(cli, post)
+
+                        require(batch.size == resps.size, s"Batch: ${batch.size}, responses: ${resps.size}")
+
+                        data.addAll(batch.zip(resps).map { case (req, resp) => Result(req, resp) }.asJava )
+
+                        if (cnt.incrementAndGet() == batches.size) {
+                            val min = minScoreOpt.getOrElse(DFLT_MIN_SCORE)
+
+                            promise.success(
+                                data.asScala.groupBy(_.request).map {
+                                    case (req, ress) =>
+                                        req -> ress.flatMap(_.suggestions.filter(_.score >= min).toSeq).sortBy(-_.score)
+                                }
+                            )
+                        }
+                        ()
+                    },
+                    (e: Throwable) => {
+                        U.prettyError(logger, "Unexpected error:", e)
+
+                        promise.failure(e)
+
+                    },
+                    (_: Unit) => ()
+                )
+
+            promise.future
+        }
+
+    /**
      *
      * @param parent Optional parent span.
      * @return

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionRequest.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionRequest.scala
new file mode 100644
index 0000000..108a5f8
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionRequest.scala

@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.sugsyn
+
+/**
+  *
+  * @param words
+  * @param index
+  */
+case class NCSuggestionRequest(words: Seq[String], index: Int) {
+    require(index >= 0 && index < words.length)
+
+    override def toString: String =
+        s"Request: ${words.zipWithIndex.map { case (w, i) => if (i != index) w else s"<$w>" }.mkString(" ")}"
+}
+

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala
new file mode 100644
index 0000000..a09b2ca
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala

@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.sugsyn
+
+/**
+  *
+  * @param word
+  * @param score
+  */
+case class NCWordSuggestion(word: String, score: Double)
\ No newline at end of file

diff --git a/nlpcraft/src/test/resources/log4j2.xml b/nlpcraft/src/test/resources/log4j2.xml
index d9a627b..44590c3 100644
--- a/nlpcraft/src/test/resources/log4j2.xml
+++ b/nlpcraft/src/test/resources/log4j2.xml

@@ -36,7 +36,7 @@
             <AppenderRef ref="stdout"/>
             <AppenderRef ref="stderr"/>
         </Root>
-        <Logger name="org.apache.nlpcraft" level="INFO" additivity="false">
+        <Logger name="org.apache.nlpcraft" level="${env:NLPCRAFT_LOG_LEVEL:-INFO}" additivity="false">
             <AppenderRef ref="stdout"/>
             <AppenderRef ref="stderr"/>
         </Logger>

diff --git a/nlpcraft/src/test/resources/org/apache/nlpcraft/model/ctxword/lightswitch_model2.yaml b/nlpcraft/src/test/resources/org/apache/nlpcraft/model/ctxword/lightswitch_model2.yaml
new file mode 100644
index 0000000..3b4e07b
--- /dev/null
+++ b/nlpcraft/src/test/resources/org/apache/nlpcraft/model/ctxword/lightswitch_model2.yaml

@@ -0,0 +1,92 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+id: "nlpcraft.lightswitch.ex2"
+name: "Light Switch Example Model 2"
+version: "1.0"
+description: "NLI-powered light switch example model 2."
+macros:
+  - name: "<ACTION>"
+    macro: "{turn|switch|dial|let|set|get|put}"
+  - name: "<KILL>"
+    macro: "{shut|kill|stop|eliminate}"
+enabledBuiltInTokens: [] # This example doesn't use any built-in tokens.
+permutateSynonyms: true
+abstractTokens:
+  - "ls:part:place"
+  - "ls:part:placeFloor"
+  - "ls:part:placeType"
+  - "ls:part:light"
+sparse: true
+elements:
+  - id: "ls:part:place"
+    description: "Abstract element. Used for top level element `ls:loc`"
+    # TODO: Value set for examples set.
+    categoryConfidence: 0.65
+    values:
+      - name: "room"
+      - name: "bedroom"
+
+  # For simplifying example, concrete floor type can be recognized by these synonyms words.
+  - id: "ls:part:placeFloor"
+    description: "Abstract element. Used for top level element `ls:loc`"
+    synonyms:
+       - "{upstairs|downstairs|{1st|first|2nd|second|3rd|third|4th|5th|top|ground} floor|_}"
+
+  # For simplifying example, concrete place type can be recognized by these synonyms words.
+  - id: "ls:part:placeType"
+    description: "Abstract element. Used for top level element `ls:loc`"
+    synonyms:
+      - "{dinning|laundry|play|master|kid|children|child|guest}"
+
+  - id: "ls:part:light"
+    description: "Abstract element. Used for top level elements `ls:on` and `ls:of`"
+    synonyms:
+      - "{light|illumination|lamp|lamplight}"
+
+  - id: "ls:loc"
+    description: "Top level element. Used in intents.`"
+    synonyms:
+      # Parts can be extracted from `ls:loc` to specify certain location point.
+      # Part `ls:part:place` is mandatory.
+      # Parts `ls:part:placeFloor` and `ls:part:placeType` are optional.
+      - "{^^{tok_id() == 'ls:part:placeFloor'}^^|_} ^^{tok_id() == 'ls:part:place'}^^ {^^{tok_id() == 'ls:part:placeType'}^^|_}"
+      - "{^^{tok_id() == 'ls:part:placeFloor'}^^|_} {^^{tok_id() == 'ls:part:placeType'}^^|_} ^^{tok_id() == 'ls:part:place'}^^"
+
+  - id: "ls:on"
+    groups:
+      - "act"
+    description: "Light switch ON action.`"
+    synonyms:
+      # It's parts help to catch this element, after they can be ignored.
+      - "<ACTION> {on|up|_} ^^{tok_id() == 'ls:part:light'}^^ {on|up|_}"
+      - "^^{tok_id() == 'ls:part:light'}^^ {on|up}"
+
+  - id: "ls:off"
+    groups:
+      - "act"
+    description: "Light switch OFF action.`"
+    synonyms:
+      # It's parts help to catch this element, after they can be ignored.
+      - "<ACTION> ^^{tok_id() == 'ls:part:light'}^^ {off|out}"
+      - "{<ACTION>|<KILL>} {off|out} ^^{tok_id() == 'ls:part:light'}^^"
+      - "<KILL> ^^{tok_id() == 'ls:part:light'}^^"
+      - "^^{tok_id() == 'ls:part:light'}^^ <KILL>"
+      - "no ^^{tok_id() == 'ls:part:light'}^^"
+
+intents:
+  - "intent=ls term(act)={has(tok_groups(), 'act')} term(loc)={tok_id() == 'ls:loc'}"
\ No newline at end of file

diff --git a/nlpcraft/src/test/resources/samples.txt b/nlpcraft/src/test/resources/org/apache/nlpcraft/model/samples.txt
similarity index 100%
rename from nlpcraft/src/test/resources/samples.txt
rename to nlpcraft/src/test/resources/org/apache/nlpcraft/model/samples.txt


diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
index 2efbbd2..5ac2f64 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala

@@ -39,7 +39,8 @@
     @NCIntentSample(Array("unknown", "unknown"))
     private def onX1(ctx: NCIntentMatch): NCResult = "OK"
 
-    @NCIntentSampleRef("samples.txt")
+    // Look at resources folder.
+    @NCIntentSampleRef("org/apache/nlpcraft/model/samples.txt")
     @NCIntent("intent=intent2 term~{tok_id()=='x2'}")
     private def onX2(ctx: NCIntentMatch): NCResult = "OK"
 }

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
new file mode 100644
index 0000000..e9f28f2
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala

@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.ctxword
+
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCIntent, NCIntentSample, NCModel, NCResult, NCValue}
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util.{Collections, Optional}
+import java.{lang, util}
+import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters.{CollectionHasAsScala, SeqHasAsJava, SetHasAsJava}
+
+object NCContextWordSpecModel {
+    case class Value(name: String, syns: String*) extends NCValue {
+        override def getName: String = name
+        override def getSynonyms: util.List[String] = (Seq(name) ++ syns).asJava
+    }
+
+    case class Element(id: String, level: Double, values: NCValue*) extends NCElement {
+        override def getId: String = id
+        override def getValues: util.List[NCValue] = values.asJava
+        override def getGroups: util.List[String] = Collections.singletonList("testGroup")
+        override def getCategoryConfidence: Optional[lang.Double] = Optional.of(level)
+    }
+
+    var expected: String = _
+}
+
+import org.apache.nlpcraft.model.ctxword.NCContextWordSpecModel._
+
+class NCContextWordSpecModel extends NCModel {
+    override def getId: String = this.getClass.getSimpleName
+    override def getName: String = this.getClass.getSimpleName
+    override def getVersion: String = "1.0.0"
+
+    // Empirical detected confidence for given model and requests.
+    val MDL_LEVEL: java.lang.Double = 0.68
+
+    @NCIntentSample(
+        Array(
+            "I like drive my new BMW",
+            "BMW has the best engine",
+            "Luxury cars like Mercedes and BMW  are prime targets",
+            "BMW will install side air bags up front",
+            "I want to change BMW engine",
+            "I want to try BMW driver dynamics",
+            "BMW has excellent driver protection",
+            "BMW pricing are going up",
+            "BMW drivers have the highest loyalty",
+
+            "A wild cat is very dangerous",
+            "A fox eat hens",
+            "The fox was already in your chicken house",
+
+            "What is the local temperature?",
+            "This is the first day of heavy rain",
+            "It is the beautiful day, the sun is shining"
+        )
+    )
+    @NCIntent("intent=i term(t)={false}")
+    def x(): NCResult = NCResult.text("OK")
+
+    override def getElements: util.Set[NCElement] =
+        Set(
+            Element("class:cars", MDL_LEVEL, Value("BMW")),
+            Element("class:animal", MDL_LEVEL, Value("fox"), Value("cat", "tomcat")),
+            Element("class:weather", MDL_LEVEL, Value("temperature"), Value("rain"), Value("sun"))
+        ).map(p => {
+            val e: NCElement = p
+
+            e
+        }).asJava
+
+    override def onContext(ctx: NCContext): NCResult = {
+        val varRes = ArrayBuffer.empty[String]
+
+        require(ctx.getVariants.size() == 1)
+
+        val v = ctx.getVariants.asScala.head
+
+        val testGroupToks = v.asScala.toSeq.filter(_.getGroups.contains("testGroup"))
+
+        val elemIds = testGroupToks.map(_.getId).distinct.mkString(" ")
+        val words = testGroupToks.map(_.getOriginalText).mkString(" ")
+
+        val res =
+            if (NCContextWordSpecModel.expected == s"$elemIds $words")
+                "OK"
+            else
+                s"ERROR: variant '${NCContextWordSpecModel.expected}' not found. Found: ${varRes.mkString(", ")}"
+
+        NCResult.text(res)
+    }
+
+    override def getEnabledBuiltInTokens: util.Set[String] = Collections.emptySet()
+}
+
+/**
+  * @see NCConversationSpecModel
+  */
+@NCTestEnvironment(model = classOf[NCContextWordSpecModel], startClient = true)
+class NCContextWordSpec extends NCTestContext {
+    private def checkSingleVariant(txt: String, elemId: String, words: String*): Unit = {
+        NCContextWordSpecModel.expected = s"$elemId ${words.mkString(" ")}"
+
+        val res = getClient.ask(txt).getResult.get()
+
+        require(res == "OK", s"Unexpected: $res")
+    }
+
+    @Test
+    private[ctxword] def test(): Unit = {
+        checkSingleVariant("I want to have dogs and foxes", "class:animal", "dogs", "foxes")
+        checkSingleVariant("I bought dog's meat", "class:animal", "dog")
+        checkSingleVariant("I bought meat dog's", "class:animal", "dog")
+
+        checkSingleVariant("I want to have a dog and fox", "class:animal", "dog", "fox")
+        checkSingleVariant("I fed your fish", "class:animal", "fish")
+
+        checkSingleVariant("I like to drive my Porsche and Volkswagen", "class:cars", "Porsche", "Volkswagen")
+        checkSingleVariant("Peugeot added motorcycles to its range year ago", "class:cars", "Peugeot")
+
+        checkSingleVariant("The frost is possible today", "class:weather", "frost")
+        checkSingleVariant("There's a very strong wind from the east now", "class:weather", "wind")
+    }
+}

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
new file mode 100644
index 0000000..57b41e3
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala

@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.ctxword
+
+import org.apache.nlpcraft.model.{NCContext, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+/**
+  * Test model.
+  */
+class NCContextWordSpecModel2 extends NCContextWordSpecModel {
+    override val MDL_LEVEL = 0
+    override def onContext(ctx: NCContext): NCResult = NCResult.text("OK")
+}
+
+/**
+  * Run this test just for manual review all found categories for given model.
+  * Note that initial confidence set as zero.
+  */
+@NCTestEnvironment(model = classOf[NCContextWordSpecModel2], startClient = true)
+class NCContextWordSpec2 extends NCTestContext {
+    @Test
+    private[ctxword] def test(): Unit =
+        Seq(
+            "I want to have dogs and foxes",
+            "I bought dog's meat",
+            "I bought meat dog's",
+
+            "I want to have a dog and fox",
+            "I fed your fish",
+
+            "I like to drive my Porsche and Volkswagen",
+            "Peugeot added motorcycles to its range year ago",
+
+            "The frost is possible today",
+            "There's a very strong wind from the east now"
+        ).foreach(getClient.ask)
+}

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCLightSwitchScalaModel2Spec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCLightSwitchScalaModel2Spec.scala
new file mode 100644
index 0000000..57b076c
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCLightSwitchScalaModel2Spec.scala

@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.ctxword
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
+import org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator
+import org.apache.nlpcraft.model.{NCIntentRef, NCIntentSample, NCIntentTerm, NCModelFileAdapter, NCResult, NCToken}
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.{Assertions, Test}
+
+import scala.jdk.CollectionConverters.ListHasAsScala
+
+object NCContextWordSpecModel3Data {
+    final val MAPPER = new ObjectMapper()
+
+    MAPPER.registerModule(DefaultScalaModule)
+}
+
+case class NCContextWordSpecModel3Data(
+    action: String,
+    place: String,
+    placeType: Option[String] = None,
+    placeFloor: Option[String] = None,
+    placeConfidence: java.lang.Double = 0
+)
+
+import org.apache.nlpcraft.model.ctxword.NCContextWordSpecModel3Data._
+
+/**
+  * Test model.
+  */
+class NCLightSwitchScalaModel2 extends NCModelFileAdapter("org/apache/nlpcraft/model/ctxword/lightswitch_model2.yaml") {
+    @NCIntentRef("ls")
+    @NCIntentSample(Array(
+        "Turn the lights off in the room.",
+        "Set the lights on in in the room.",
+        "Lights up in the kitchen.",
+        "Please, put the light out in the upstairs bedroom.",
+        "Turn the lights off in the guest bedroom.",
+        "No lights in the first floor guest washroom, please.",
+        "Light up the garage, please!",
+        "Kill the illumination now second floor kid closet!"
+    ))
+    def onMatch(@NCIntentTerm("act") actTok: NCToken, @NCIntentTerm("loc") locTok: NCToken): NCResult = {
+        def getPart(id: String): NCToken =
+            locTok.getPartTokens.asScala.find(_.getId == id).
+            getOrElse(throw new AssertionError(s"Token not found: $id"))
+        def getPartTextOpt(id: String): Option[String] = locTok.getPartTokens.asScala.find(_.getId == id) match {
+            case Some(t) => Some(t.getOriginalText.toLowerCase)
+            case None => None
+        }
+
+        val place = getPart("ls:part:place")
+        val conf: Double = place.meta("ls:part:place:confidence")
+
+        NCResult.json(
+            MAPPER.writeValueAsString(
+                NCContextWordSpecModel3Data(
+                    action = if (actTok.getId == "ls:on") "on" else "off",
+                    place = place.getOriginalText.toLowerCase,
+                    placeConfidence = conf,
+                    placeType = getPartTextOpt("ls:part:placeType"),
+                    placeFloor = getPartTextOpt("ls:part:placeFloor")
+                )
+            )
+        )
+    }
+}
+
+/**
+  * Verifies samples set.
+  */
+class NCLightSwitchScalaModel2SpecSamples {
+    @Test
+    private[ctxword] def testSamplesStandard(): Unit = {
+        System.setProperty("NLPCRAFT_TEST_MODELS", classOf[NCLightSwitchScalaModel2].getName)
+
+        Assertions.assertTrue(NCTestAutoModelValidator.isValid(),"See error logs above.")
+    }
+}
+
+/**
+  *  Extra values set.
+  */
+@NCTestEnvironment(model = classOf[NCLightSwitchScalaModel2], startClient = true)
+class NCLightSwitchScalaModel2Spec extends NCTestContext {
+    import org.apache.nlpcraft.model.ctxword.{NCContextWordSpecModel3Data => R}
+
+    private def check(testsData: (String, NCContextWordSpecModel3Data)*): Unit = {
+        val errs = collection.mutable.HashMap.empty[String, String]
+        val okMsgs = collection.mutable.ArrayBuffer.empty[String]
+
+        testsData.foreach { case (txt, expected) =>
+            def addError(msg: String): Unit = errs += txt -> msg
+
+            val res = getClient.ask(txt)
+
+            if (!res.isOk)
+                addError(res.getResultError.get())
+            else {
+                val actual = MAPPER.readValue(res.getResult.get(), classOf[R])
+
+                def getMainData(d: NCContextWordSpecModel3Data): String =
+                    s"Main [action=${d.action}, place=${d.place}, placeType=${d.placeType}, placeFloor=${d.placeFloor}]"
+
+                val actualData = getMainData(actual)
+                val expData = getMainData(expected)
+
+                if (expData != actualData)
+                    addError(s"Expected: $expData, actual: $actualData")
+                else
+                    okMsgs += s"`$txt` processed ok with detected place `${actual.place}` and confidence `${actual.placeConfidence}`."
+            }
+        }
+
+        println(s"Test passed: ${okMsgs.size}")
+        println(s"Test errors: ${errs.size}")
+
+        okMsgs.foreach(println)
+
+        if (errs.nonEmpty)
+            throw new AssertionError(errs.mkString("\n"))
+    }
+
+    /**
+      * `ls:part:place` has 2 values: room and bedroom.
+      * Samples contains also: kitchen, washroom, garage, closet. These words detected with some confidence < 1.
+      */
+    @Test
+    def testSamplesDetailed(): Unit =
+        check(
+            "Turn the lights off in the room." ->
+                R(action = "off", place = "room"),
+            "Set the lights on in in the room." ->
+                R(action = "on", place = "room"),
+            "Lights up in the kitchen." ->
+                R(action = "on", place = "kitchen"),
+            "Please, put the light out in the upstairs bedroom." ->
+                R(action = "off", place = "bedroom", placeFloor = Some("upstairs")),
+            "Turn the lights off in the guest bedroom." ->
+                R(action = "off", place = "bedroom", placeType = Some("guest")),
+            "No lights in the first floor guest washroom, please." ->
+                R(action = "off", place = "washroom", placeType = Some("guest"), placeFloor = Some("first floor")),
+            "Light up the garage, please!" ->
+                R(action = "on", place = "garage"),
+            "Kill the illumination now second floor kid closet!" ->
+                R(action = "off", place = "closet",  placeType = Some("kid"), placeFloor = Some("second floor"))
+        )
+
+    /**
+      * `ls:part:place` has 2 values: room and bedroom.
+      * Samples contains also: loft, hallway, library, chamber, office.
+      * Note, that These words are not provided as in samples.
+      * These words detected with some confidence < 1.
+      */
+    @Test
+    def testSynonymsSameCategory(): Unit =
+        check(
+            "Turn the lights off in the loft." ->
+                R(action = "off", place = "loft"),
+            "Set the lights on in in the loft." ->
+                R(action = "on", place = "loft"),
+            "Lights up in the hallway." ->
+                R(action = "on", place = "hallway"),
+            "Please, put the light out in the upstairs library." ->
+                R(action = "off", place = "library", placeFloor = Some("upstairs")),
+            "Turn the lights off in the guest office." ->
+                R(action = "off", place = "office", placeType = Some("guest")),
+            "No lights in the first floor guest chamber, please." ->
+                R(action = "off", place = "chamber", placeType = Some("guest"), placeFloor = Some("first floor")),
+            "Light up the office, please!" ->
+                R(action = "on", place = "office"),
+            "Kill the illumination now second floor kid chamber!" ->
+                R(action = "off", place = "chamber",  placeType = Some("kid"), placeFloor = Some("second floor"))
+        )
+}
\ No newline at end of file

diff --git a/pom.xml b/pom.xml
index f06819c..0a459a1 100644
--- a/pom.xml
+++ b/pom.xml

@@ -156,6 +156,7 @@
         <lightstep.grpc.ver>0.15.8</lightstep.grpc.ver>
         <junit.ver>5.5.1</junit.ver>
         <jsonpath.ver>2.4.0</jsonpath.ver>
+        <jibx.tools.ver>1.3.3</jibx.tools.ver>
 
         <!-- Force specific encoding on text resources. -->
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -476,6 +477,12 @@
             </dependency>
 
             <dependency>
+                <groupId>org.jibx</groupId>
+                <artifactId>jibx-tools</artifactId>
+                <version>${jibx.tools.ver}</version>
+            </dependency>
+
+            <dependency>
                 <groupId>edu.stanford.nlp</groupId>
                 <artifactId>stanford-corenlp</artifactId>
                 <version>${stanford.corenlp.ver}</version>
commit	2ba59911423d8d69e2954aaed06944e3c841ea4a	[log] [tgz]
author	Sergey Kamov <skhdlemail@gmail.com>	Mon Aug 16 10:35:52 2021 +0300
committer	Sergey Kamov <skhdlemail@gmail.com>	Mon Aug 16 10:35:52 2021 +0300
tree	7a750fd9357979614edee4e6a61260654d4f9f53
parent	3d2f865fe9af6f81bf80c41b8e16518782ac2742 [diff]
parent	e6de478d1b735922948eee54d4b778a5bd43a53f [diff]