Merge branch 'master' into NLPCRAFT-376

commit: 3a2c0081cac55529969d29b15428a11c8c1bef64 [log] [tgz]
author: Aaron Radzinski <aradzinski@datalingvo.com> Thu Jul 29 12:35:54 2021 -0700
committer: Aaron Radzinski <aradzinski@datalingvo.com> Thu Jul 29 12:35:54 2021 -0700
tree: 218be07f64d81c1d18670e59380cfafc6ba6c621
parent: ac726a5499321206c6dfad3bb861602e012ac3fc [diff]
parent: 64ab9ae208c7acccd3f063c29fedfffc39931272 [diff]
diff --git a/nlpcraft-examples/sql/src/main/java/org/apache/nlpcraft/examples/sql/SqlModel.scala b/nlpcraft-examples/sql/src/main/java/org/apache/nlpcraft/examples/sql/SqlModel.scala
index 181ae8b..2163a75 100644
--- a/nlpcraft-examples/sql/src/main/java/org/apache/nlpcraft/examples/sql/SqlModel.scala
+++ b/nlpcraft-examples/sql/src/main/java/org/apache/nlpcraft/examples/sql/SqlModel.scala

@@ -400,7 +400,6 @@
       * cleared between user questions, except for the obvious clarifying questions. We assume that question is being
       * clarified if its tokens satisfy one of criteria:
       *  - all these tokens are values (What about 'Exotic Liquids')
-      *  - all these tokens are columns (Give me 'last name')
       *  - new token is single date token (What about 'tomorrow')
       *  <p>
       *  If new sentence tokens satisfied any of these criteria,
@@ -418,10 +417,9 @@
                 case Some(col) => col.getValue != null
                 case None => false
             }
-            def isColumn(t: NCToken): Boolean = findAnyColumnTokenOpt(t).isDefined
             def isDate(t: NCToken): Boolean = t.getId == "nlpcraft:date"
 
-            val ok = toks.forall(isValue) || toks.forall(isColumn) || toks.size == 1 && isDate(toks.head)
+            val ok = toks.forall(isValue) || toks.size == 1 && isDate(toks.head)
 
             if (!ok) {
                 m.getContext.getConversation.clearStm(_ => true)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala
index e16e8a5..983c36c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala

@@ -28,9 +28,9 @@
 import org.apache.nlpcraft.model.{NCContext, NCIntentMatch, NCIntentSkip, NCModel, NCRejection, NCResult, NCToken, NCVariant}
 import org.apache.nlpcraft.probe.mgrs.dialogflow.NCDialogFlowManager
 
-import java.util.{List => JList}
-
-import scala.jdk.CollectionConverters.SeqHasAsJava
+import java.util.{Collections, List => JList}
+import scala.collection.mutable
+import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava}
 
 /**
  * Front-end for intent solver.
@@ -95,10 +95,21 @@
         for (res <- results if res != null) {
             try {
                 i += 1
-                
+
+                val allConvToks = ctx.getConversation.getTokens.asScala
+                val vrntNotConvToks = res.variant.tokens.asScala.filterNot(allConvToks.contains)
+
+                val intentToks =
+                    res.groups.map(_.tokens).map(toks => {
+                        toks.filter(allConvToks.contains).foreach(convTok =>
+                            fixBuiltTokenMeta(convTok, vrntNotConvToks, allConvToks))
+
+                        toks.asJava
+                    }).asJava
+
                 val intentMatch: NCIntentMatch = new NCMetadataAdapter with NCIntentMatch {
                     override val getContext: NCContext = ctx
-                    override val getIntentTokens: JList[JList[NCToken]] = res.groups.map(_.tokens.asJava).asJava
+                    override val getIntentTokens: JList[JList[NCToken]] = intentToks
                     override val getVariant: NCVariant = new NCVariantImpl(res.variant.tokens)
                     override val getIntentId: String = res.intentId
                     override def getTermTokens(idx: Int): JList[NCToken] = res.groups(idx).tokens.asJava
@@ -161,4 +172,150 @@
         
         throw new NCRejection("No matching intent found - all intents were skipped.")
     }
+
+    /**
+      *
+      * @param convTok4Fix
+      * @param vrntNotConvToks
+      * @param allConvToks
+      */
+    @throws[NCE]
+    @throws[NCIntentSkip]
+    private def fixBuiltTokenMeta(convTok4Fix: NCToken, vrntNotConvToks: Seq[NCToken], allConvToks: Seq[NCToken]): Unit = {
+        def isReference(tok: NCToken, id: String, idx: Int): Boolean = tok.getId == id && tok.getIndex == idx
+
+        /**
+          * Gets new references candidates.
+          *
+          * 1. It finds references in conversation. It should be here because not found among non conversation tokens.
+          * 2. Next, it finds common group for all found conversation's references, it also should be.
+          * 3. Next, for found group, it tries to find actual tokens with this group among non-conversation tokens.
+          * If these non-conversation tokens found, they should be validated and returned,
+          * If not found - conversation tokens returned.
+
+          * @param refId Reference token ID.
+          * @param refIdxs Reference indexes.
+          * @param validate Validate predicate.
+          * @throws NCE It means that we sentence is invalid, internal error.
+          * @throws NCIntentSkip It means that we try to process invalid variant and it should be skipped.
+          */
+        @throws[NCE]
+        @throws[NCIntentSkip]
+        def getForRecalc(refId: String, refIdxs: Seq[Int], validate: Seq[NCToken] => Boolean): Seq[NCToken] = {
+            val convRefs = allConvToks.filter(_.getId == refId)
+
+            if (convRefs.map(_.getIndex).sorted != refIdxs.sorted)
+                throw new NCE(s"Conversation references are not found [id=$refId, indexes=${refIdxs.mkString(", ")}]")
+
+            val convGs = convRefs.map(_.getGroups.asScala)
+            val commonConvGs = convGs.foldLeft(convGs.head)((g1, g2) => g1.intersect(g2))
+
+            if (commonConvGs.isEmpty)
+                throw new NCE(s"Conversation references don't have common group [id=$refId]")
+
+            val actNonConvRefs = vrntNotConvToks.filter(_.getGroups.asScala.exists(commonConvGs.contains))
+
+            if (actNonConvRefs.nonEmpty) {
+                if (!validate(actNonConvRefs))
+                    throw new NCIntentSkip(
+                        s"Actual valid variant references are not found for recalculation [" +
+                            s"id=$refId, " +
+                            s"actualNonConvRefs=${actNonConvRefs.mkString(",")}" +
+                            s"]"
+                    )
+
+                actNonConvRefs
+            }
+            else
+                convRefs
+        }
+
+        convTok4Fix.getId match {
+            case "nlpcraft:sort" =>
+                def getNotNullSeq[T](tok: NCToken, name: String): Seq[T] = {
+                    val list = tok.meta[JList[T]](name)
+
+                    if (list == null) Seq.empty else list.asScala
+                }
+
+                def process(notesName: String, idxsName: String): Unit = {
+                    val refIds: Seq[String] = getNotNullSeq(convTok4Fix, s"nlpcraft:sort:$notesName")
+                    val refIdxs: Seq[Int] = getNotNullSeq(convTok4Fix, s"nlpcraft:sort:$idxsName")
+
+                    require(refIds.length == refIdxs.length)
+
+                    // Can be empty section.
+                    if (refIds.nonEmpty) {
+                        var data = mutable.ArrayBuffer.empty[(String, Int)]
+                        val notFound = mutable.ArrayBuffer.empty[(String, Int)]
+
+                        // Sort references can be different types.
+                        // Part of them can be in conversation, part of them - in actual variant.
+                        refIds.zip(refIdxs).map { case (refId, refIdx) =>
+                            val seq =
+                                vrntNotConvToks.find(isReference(_, refId, refIdx)) match {
+                                    case Some(_) => data
+                                    case None => notFound
+                                }
+
+                            seq += refId -> refIdx
+                        }
+
+                        if (notFound.nonEmpty) {
+                            notFound.
+                                groupBy { case (refId, _) => refId }.
+                                map { case (refId, data) =>  refId -> data.map(_._2) }.
+                                foreach { case (refId, refIdxs) =>
+                                    getForRecalc(refId, refIdxs, _.size == refIdxs.size).
+                                        foreach(t => data += t.getId -> t.getIndex)
+                                }
+
+                            data = data.sortBy(_._2)
+
+                            convTok4Fix.getMetadata.put(s"nlpcraft:sort:$notesName", data.map(_._1).asJava)
+                            convTok4Fix.getMetadata.put(s"nlpcraft:sort:$idxsName", data.map(_._2).asJava)
+                        }
+                    }
+                }
+
+                process("bynotes", "byindexes")
+                process("subjnotes", "subjindexes")
+            case "nlpcraft:limit" =>
+                val refId = convTok4Fix.meta[String]("nlpcraft:limit:note")
+                val refIdxs = convTok4Fix.meta[JList[Int]]("nlpcraft:limit:indexes").asScala
+
+                require(refIdxs.size == 1)
+
+                val refIdx = refIdxs.head
+
+                if (!vrntNotConvToks.exists(isReference(_, refId, refIdx))) {
+                    val newRef = getForRecalc(refId, Seq(refIdx), _.size == 1).head
+
+                    convTok4Fix.getMetadata.put(s"nlpcraft:limit:note", newRef.getId)
+                    convTok4Fix.getMetadata.put(s"nlpcraft:limit:indexes", Collections.singletonList(newRef.getIndex))
+                }
+
+            case "nlpcraft:relation" =>
+                val refId = convTok4Fix.meta[String]("nlpcraft:relation:note")
+                val refIdxs = convTok4Fix.meta[JList[Int]]("nlpcraft:relation:indexes").asScala.sorted
+
+                val nonConvRefs = vrntNotConvToks.filter(t => t.getId == refId && refIdxs.contains(t.getIndex))
+
+                if (nonConvRefs.nonEmpty && nonConvRefs.size != refIdxs.size)
+                    throw new NCE(s"References are not found [id=$refId, indexes=${refIdxs.mkString(", ")}]")
+
+                if (nonConvRefs.isEmpty) {
+                    val newRefs = getForRecalc(refId, refIdxs, _.size == refIdxs.size)
+                    val newRefsIds = newRefs.map(_.getId).distinct
+
+                    if (newRefsIds.size != 1)
+                        throw new NCE(s"Valid variant references are not found [id=$refId]")
+
+                    convTok4Fix.getMetadata.put(s"nlpcraft:relation:note", newRefsIds.head)
+                    convTok4Fix.getMetadata.put(s"nlpcraft:relation:indexes", newRefs.map(_.getIndex).asJava)
+                }
+
+            case _ => // No-op.
+        }
+    }
 }

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 965d80d..286c8b4 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala

@@ -377,19 +377,21 @@
 
                                     case TYPE_SUBJ_BY =>
                                         require(seq1.nonEmpty)
-                                        require(seq2.nonEmpty)
                                         require(sortToks.nonEmpty)
                                         require(byToks.nonEmpty)
 
-                                        res = Some(
-                                            Match(
-                                                asc = asc,
-                                                main = sortToks,
-                                                stop = byToks ++ orderToks,
-                                                subjSeq = seq1,
-                                                bySeq = seq2
+                                        if (seq2.isEmpty)
+                                            res = None
+                                        else
+                                            res = Some(
+                                                Match(
+                                                    asc = asc,
+                                                    main = sortToks,
+                                                    stop = byToks ++ orderToks,
+                                                    subjSeq = seq1,
+                                                    bySeq = seq2
+                                                )
                                             )
-                                        )
 
                                     case TYPE_BY =>
                                         require(seq1.nonEmpty)

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala
index 74e0345..0201315 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala

@@ -150,6 +150,21 @@
         assertEquals(expResp, res.getResult.get)
     }
 
+    /**
+      *
+      * @param req
+      * @param resExtractor
+      * @param expResp
+      * @tparam T
+      */
+    protected def checkResult[T](req: String, resExtractor: String => T, expResp: T): Unit = {
+        val res = getClient.ask(req)
+
+        assertTrue(res.isOk, s"Unexpected result, error=${res.getResultError.orElse(null)}")
+        assertTrue(res.getResult.isPresent)
+        assertEquals(expResp, resExtractor(res.getResult.get))
+    }
+
     final protected def getClient: NCTestClient = {
         if (cli == null)
             throw new IllegalStateException("Client is not started.")

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCLimitSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCLimitSpec.scala
new file mode 100644
index 0000000..9e1cb8f
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCLimitSpec.scala

@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.models.stm.indexes
+
+import org.apache.nlpcraft.model.{NCIntent, NCIntentMatch, NCResult, _}
+import org.apache.nlpcraft.models.stm.indexes.NCSpecModelAdapter.mapper
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util.{List => JList}
+import scala.jdk.CollectionConverters.ListHasAsScala
+import scala.language.implicitConversions
+
+case class NCLimitSpecModelData(intentId: String, note: String, indexes: Seq[Int])
+
+class NCLimitSpecModel extends NCSpecModelAdapter {
+    private def mkResult(intentId: String, limit: NCToken) =
+        NCResult.json(
+            mapper.writeValueAsString(
+                NCLimitSpecModelData(
+                    intentId = intentId,
+                    note = limit.meta[String]("nlpcraft:limit:note"),
+                    indexes = limit.meta[JList[Int]]("nlpcraft:limit:indexes").asScala.toSeq
+                )
+            )
+        )
+
+    @NCIntent(
+        "intent=limit1 " +
+        "term(limit)~{tok_id() == 'nlpcraft:limit'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}"
+    )
+    private def onLimit1(ctx: NCIntentMatch, @NCIntentTerm("limit") limit: NCToken): NCResult =
+        mkResult(intentId = "limit1", limit = limit)
+
+    // `x` is mandatory (difference with `limit3`)
+    @NCIntent(
+        "intent=limit2 " +
+        "term(x)={tok_id() == 'X'} " +
+        "term(limit)~{tok_id() == 'nlpcraft:limit'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}"
+    )
+    private def onLimit2(ctx: NCIntentMatch, @NCIntentTerm("limit") limit: NCToken): NCResult =
+        mkResult(intentId = "limit2", limit = limit)
+
+    // `y` is optional (difference with `limit2`)
+    @NCIntent(
+        "intent=limit3 " +
+            "term(y)~{tok_id() == 'Y'} " +
+            "term(limit)~{tok_id() == 'nlpcraft:limit'} " +
+            "term(elem)~{has(tok_groups(), 'G1')}"
+    )
+    private def onLimit3(ctx: NCIntentMatch, @NCIntentTerm("limit") limit: NCToken): NCResult =
+        mkResult(intentId = "limit3", limit = limit)
+}
+
+@NCTestEnvironment(model = classOf[NCLimitSpecModel], startClient = true)
+class NCLimitSpec extends NCTestContext {
+    private def extract(s: String): NCLimitSpecModelData = mapper.readValue(s, classOf[NCLimitSpecModelData])
+
+    @Test
+    private[stm] def test1(): Unit = {
+        checkResult(
+            "top 23 a a",
+            extract,
+            // Reference to variant.
+            NCLimitSpecModelData(intentId = "limit1", note = "A2", indexes = Seq(1))
+        )
+        checkResult(
+            "test test b b",
+            extract,
+            // Reference to recalculated variant (new changed indexes).
+            NCLimitSpecModelData(intentId = "limit1", note = "B2", indexes = Seq(2))
+        )
+    }
+
+    @Test
+    private[stm] def test2(): Unit = {
+        checkResult(
+            "x test top 23 a a",
+            extract,
+            // Reference to variant.
+            NCLimitSpecModelData(intentId = "limit2", note = "A2", indexes = Seq(3))
+        )
+        checkResult(
+            "test x",
+            extract,
+            // Reference to conversation (tokens by these ID and indexes can be found in conversation).
+            NCLimitSpecModelData(intentId = "limit2", note = "A2", indexes = Seq(3))
+        )
+    }
+
+    @Test
+    private[stm] def test3(): Unit = {
+        checkResult(
+            "y test top 23 a a",
+            extract,
+            // Reference to variant.
+            NCLimitSpecModelData(intentId = "limit3", note = "A2", indexes = Seq(3))
+        )
+        checkResult(
+            "test y",
+            extract,
+            // Reference to conversation (tokens by these ID and indexes can be found in conversation).
+            NCLimitSpecModelData(intentId = "limit3", note = "A2", indexes = Seq(3))
+        )
+    }
+}
\ No newline at end of file

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCRelationSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCRelationSpec.scala
new file mode 100644
index 0000000..4198c77
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCRelationSpec.scala

@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.models.stm.indexes
+
+import org.apache.nlpcraft.model.{NCIntent, NCIntentMatch, NCResult, _}
+import org.apache.nlpcraft.models.stm.indexes.NCSpecModelAdapter.mapper
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util.{List => JList}
+import scala.jdk.CollectionConverters.ListHasAsScala
+import scala.language.implicitConversions
+
+case class NCRelationSpecModelData(intentId: String, note: String, indexes: Seq[Int])
+
+class NCRelationSpecModel extends NCSpecModelAdapter {
+    private def mkResult(intentId: String, rel: NCToken) =
+        NCResult.json(
+            mapper.writeValueAsString(
+                NCRelationSpecModelData(
+                    intentId = intentId,
+                    note = rel.meta[String]("nlpcraft:relation:note"),
+                    indexes = rel.meta[JList[Int]]("nlpcraft:relation:indexes").asScala.toSeq
+                )
+            )
+        )
+
+    @NCIntent(
+        "intent=rel1 " +
+        "term(rel)~{tok_id() == 'nlpcraft:relation'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}*"
+    )
+    private def onRelation1(ctx: NCIntentMatch, @NCIntentTerm("rel") rel: NCToken): NCResult =
+        mkResult(intentId = "rel1", rel = rel)
+
+    // `x` is mandatory (difference with `rel3`)
+    @NCIntent(
+        "intent=rel2 " +
+            "term(x)={tok_id() == 'X'} " +
+            "term(rel)~{tok_id() == 'nlpcraft:relation'} " +
+            "term(elem)~{has(tok_groups(), 'G1')}*"
+    )
+    private def onRelation2(ctx: NCIntentMatch, @NCIntentTerm("rel") rel: NCToken): NCResult =
+        mkResult(intentId = "rel2", rel = rel)
+
+    // `y` is optional (difference with `rel2`)
+    @NCIntent(
+        "intent=rel3 " +
+        "term(y)~{tok_id() == 'Y'} " +
+        "term(rel)~{tok_id() == 'nlpcraft:relation'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}*"
+    )
+    private def onRelation3(ctx: NCIntentMatch, @NCIntentTerm("rel") rel: NCToken): NCResult =
+        mkResult(intentId = "rel3", rel = rel)
+}
+
+@NCTestEnvironment(model = classOf[NCRelationSpecModel], startClient = true)
+class NCRelationSpec extends NCTestContext {
+    private def extract(s: String): NCRelationSpecModelData = mapper.readValue(s, classOf[NCRelationSpecModelData])
+
+    @Test
+    private[stm] def test1(): Unit = {
+        checkResult(
+            "compare a a and a a",
+            extract,
+            // Reference to variant.
+            NCRelationSpecModelData(intentId = "rel1", note = "A2", indexes = Seq(1, 3))
+        )
+        checkResult(
+            "b b and b b",
+            extract,
+            // Reference to recalculated variant (new changed indexes).
+            NCRelationSpecModelData(intentId = "rel1", note = "B2", indexes = Seq(0, 2))
+        )
+    }
+
+    @Test
+    private[stm] def test2(): Unit = {
+        checkResult(
+            "x compare a a and a a",
+            extract,
+            // Reference to variant.
+            NCRelationSpecModelData(intentId = "rel2", note = "A2", indexes = Seq(2, 4))
+        )
+        checkResult(
+            "test x",
+            extract,
+            // Reference to conversation (tokens by these ID and indexes can be found in conversation).
+            NCRelationSpecModelData(intentId = "rel2", note = "A2", indexes = Seq(2, 4))
+        )
+    }
+
+    @Test
+    private[stm] def test3(): Unit = {
+        checkResult(
+            "y compare a a and a a",
+            extract,
+            // Reference to variant.
+            NCRelationSpecModelData(intentId = "rel3", note = "A2", indexes = Seq(2, 4))
+        )
+        checkResult(
+            "test y",
+            extract,
+            // Reference to conversation (tokens by these ID and indexes can be found in conversation).
+            NCRelationSpecModelData(intentId = "rel3", note = "A2", indexes = Seq(2, 4))
+        )
+    }
+}
\ No newline at end of file

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCSortSpec.scala
new file mode 100644
index 0000000..4658df6
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCSortSpec.scala

@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.models.stm.indexes
+
+import org.apache.nlpcraft.model.{NCIntent, NCIntentMatch, NCResult, _}
+import org.apache.nlpcraft.models.stm.indexes.NCSpecModelAdapter.mapper
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util.{List => JList}
+import scala.jdk.CollectionConverters.ListHasAsScala
+import scala.language.implicitConversions
+
+object NCSortSpecModelData {
+    private def nvl[T](list: JList[T]): Seq[T] = if (list == null) Seq.empty else list.asScala.toSeq
+
+    def apply(
+        intentId: String,
+        subjnotes: JList[String],
+        subjindexes: JList[Int],
+        bynotes: JList[String],
+        byindexes: JList[Int]
+    ): NCSortSpecModelData =
+        new NCSortSpecModelData(
+            intentId = intentId,
+            subjnotes = nvl(subjnotes),
+            subjindexes = nvl(subjindexes),
+            bynotes = nvl(bynotes),
+            byindexes = nvl(byindexes)
+        )
+}
+
+case class NCSortSpecModelData(
+    intentId: String,
+    subjnotes: Seq[String] = Seq.empty,
+    subjindexes: Seq[Int] = Seq.empty,
+    bynotes: Seq[String] = Seq.empty,
+    byindexes: Seq[Int] = Seq.empty
+)
+
+class NCSortSpecModel extends NCSpecModelAdapter {
+    private def mkResult(intentId: String, sort: NCToken) =
+        NCResult.json(
+            mapper.writeValueAsString(
+                NCSortSpecModelData(
+                    intentId = intentId,
+                    subjnotes = sort.meta[JList[String]]("nlpcraft:sort:subjnotes"),
+                    subjindexes = sort.meta[JList[Int]]("nlpcraft:sort:subjindexes"),
+                    bynotes = sort.meta[JList[String]]("nlpcraft:sort:bynotes"),
+                    byindexes = sort.meta[JList[Int]]("nlpcraft:sort:byindexes")
+                )
+            )
+        )
+
+    @NCIntent(
+        "intent=onSort1 " +
+        "term(sort)~{tok_id() == 'nlpcraft:sort'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}*"
+    )
+    private def onSort1(ctx: NCIntentMatch, @NCIntentTerm("sort") sort: NCToken): NCResult =
+        mkResult(intentId = "onSort1", sort = sort)
+
+    // `x` is mandatory (difference with `onSort3`)
+    @NCIntent(
+        "intent=onSort2 " +
+        "term(x)={tok_id() == 'X'} " +
+        "term(sort)~{tok_id() == 'nlpcraft:sort'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}*"
+    )
+    private def onSort2(ctx: NCIntentMatch, @NCIntentTerm("sort") sort: NCToken): NCResult =
+        mkResult(intentId = "onSort2", sort = sort)
+
+    // `y` is optional (difference with `onSort2`)
+    @NCIntent(
+        "intent=onSort3 " +
+        "term(y)~{tok_id() == 'Y'} " +
+        "term(sort)~{tok_id() == 'nlpcraft:sort'} " +
+        "term(elem)~{has(tok_groups(), 'G1')}*"
+    )
+    private def onSort3(ctx: NCIntentMatch, @NCIntentTerm("sort") sort: NCToken): NCResult =
+        mkResult(intentId = "onSort3", sort = sort)
+
+    @NCIntent(
+        "intent=onSort4 " +
+        "term(z)~{tok_id() == 'Z'} " +
+        "term(elem1)~{has(tok_groups(), 'G1')}+ " +
+        "term(elem2)~{has(tok_groups(), 'G2')}+ " +
+        "term(sort)~{tok_id() == 'nlpcraft:sort'}"
+    )
+    private def onSort4(ctx: NCIntentMatch, @NCIntentTerm("sort") sort: NCToken): NCResult =
+        mkResult(intentId = "onSort4", sort = sort)
+}
+
+@NCTestEnvironment(model = classOf[NCSortSpecModel], startClient = true)
+class NCSortSpec extends NCTestContext {
+    private def extract(s: String): NCSortSpecModelData = mapper.readValue(s, classOf[NCSortSpecModelData])
+
+    @Test
+    private[stm] def testOnSort11(): Unit = {
+        checkResult(
+            "test test sort by a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(intentId = "onSort1", bynotes = Seq("A2"), byindexes = Seq(3))
+        )
+        checkResult(
+            "test b b",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(intentId = "onSort1", bynotes = Seq("B2"), byindexes = Seq(1))
+        )
+        checkResult(
+            "test test sort a a by a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(
+                intentId = "onSort1",
+                subjnotes = Seq("A2"),
+                subjindexes = Seq(3),
+                bynotes = Seq("A2"),
+                byindexes = Seq(5)
+            )
+        )
+
+        checkResult(
+            "test test sort a a, a a by a a, a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(
+                intentId = "onSort1",
+                subjnotes = Seq("A2", "A2"),
+                subjindexes = Seq(3, 5),
+                bynotes = Seq("A2", "A2"),
+                byindexes = Seq(7, 9)
+            )
+        )
+    }
+
+    @Test
+    private[stm] def testOnSort12(): Unit = {
+        checkResult(
+            "test test sort by a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(intentId = "onSort1", bynotes = Seq("A2"), byindexes = Seq(3))
+        )
+
+        checkResult(
+            "test b b",
+            extract,
+            // Reference to recalculated variant (new changed indexes).
+            NCSortSpecModelData(intentId = "onSort1", bynotes = Seq("B2"), byindexes = Seq(1))
+        )
+    }
+
+    @Test
+    private[stm] def testOnSort2(): Unit = {
+        checkResult(
+            "test test x sort by a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(intentId = "onSort2", bynotes = Seq("A2"), byindexes = Seq(4))
+        )
+
+        checkResult(
+            "test x",
+            extract,
+            // Reference to conversation (tokens by these ID and indexes can be found in conversation).
+            NCSortSpecModelData(intentId = "onSort2", bynotes = Seq("A2"), byindexes = Seq(4))
+        )
+    }
+
+    @Test
+    private[stm] def testOnSort3(): Unit = {
+        checkResult(
+            "test test y sort by a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(intentId = "onSort3", bynotes = Seq("A2"), byindexes = Seq(4))
+        )
+
+        checkResult(
+            "test y",
+            extract,
+            // Reference to conversation (tokens by these ID and indexes can be found in conversation).
+            NCSortSpecModelData(intentId = "onSort3", bynotes = Seq("A2"), byindexes = Seq(4))
+        )
+    }
+
+    // Like `testOnSort11` and `testOnSort12`, but more complex.
+    @Test
+    private[stm] def testOnSort4(): Unit = {
+        checkResult(
+            "test z test sort x by a a",
+            extract,
+            // Reference to variant.
+            NCSortSpecModelData(
+                intentId = "onSort4",
+                subjnotes = Seq("X"),
+                subjindexes = Seq(4),
+                bynotes = Seq("A2"),
+                byindexes = Seq(6)
+            )
+        )
+
+        checkResult(
+            "test z y b b",
+            extract,
+            // Reference to recalculated variant (new changed indexes).
+            NCSortSpecModelData(
+                intentId = "onSort4",
+                subjnotes = Seq("Y"),
+                subjindexes = Seq(2),
+                bynotes = Seq("B2"),
+                byindexes = Seq(3)
+            )
+        )
+    }
+}
\ No newline at end of file

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCSpecModelAdapter.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCSpecModelAdapter.scala
new file mode 100644
index 0000000..81e5563
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/indexes/NCSpecModelAdapter.scala

@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.models.stm.indexes
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
+import org.apache.nlpcraft.model.{NCElement, NCModelAdapter}
+
+import java.util
+import java.util.Collections
+import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
+
+object NCSpecModelAdapter {
+    val mapper = new ObjectMapper()
+
+    mapper.registerModule(new DefaultScalaModule())
+}
+
+class NCSpecModelAdapter extends NCModelAdapter("nlpcraft.stm.idxs.test", "STM Indexes Test Model", "1.0") {
+    override def getElements: util.Set[NCElement] =
+        Set(
+            mkElement("A2", "G1", "a a"),
+            mkElement("B2", "G1", "b b"),
+
+            mkElement("X", "G2", "x"),
+            mkElement("Y", "G2", "y"),
+
+            mkElement("Z", "G3", "z")
+        ).asJava
+
+    private def mkElement(id: String, group: String, syns: String*): NCElement =
+        new NCElement {
+            override def getId: String = id
+            override def getSynonyms: util.List[String] = {
+                val seq: Seq[String] = syns
+
+                seq.asJava
+            }
+            override def getGroups: util.List[String] = Collections.singletonList(group)
+        }
+}

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index df089e3..100334f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala

@@ -17,10 +17,10 @@
 
 package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort
 
-import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
 import org.apache.nlpcraft.model.NCElement
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.NCTestSortTokenType._
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel, NCEnricherBaseSpec, NCTestNlpToken => nlp, NCTestSortToken => srt, NCTestUserToken => usr}
+import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
 import org.junit.jupiter.api.Test
 
 import java.util
@@ -222,177 +222,6 @@
                 nlp(text = ", asc", isStop = true)
             ),
             _ => checkExists(
-                "sort A",
-                srt(text = "sort", typ = SUBJ_ONLY, note = "A", index = 1),
-                usr("A", "A")
-            ),
-            _ => checkExists(
-                "sort A by A",
-                srt(text = "sort", subjNote = "A", subjIndex = 1, byNote = "A", byIndex = 3),
-                usr(text = "A", id = "A"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A")
-            ),
-            _ => checkExists(
-                "sort A, C by A, C",
-                srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 3), byNotes = Seq("A", "C"), byIndexes = Seq(5, 7)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "C", id = "C"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "C", id = "C")
-            ),
-            _ => checkExists(
-                "sort A C by A C",
-                srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "C"), byIndexes = Seq(4, 5)),
-                usr(text = "A", id = "A"),
-                usr(text = "C", id = "C"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A"),
-                usr(text = "C", id = "C")
-            ),
-            _ => checkExists(
-                "sort A B by A B",
-                srt(text = "sort", subjNotes = Seq("A", "B"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "B"), byIndexes = Seq(4, 5)),
-                usr(text = "A", id = "A"),
-                usr(text = "B", id = "B"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A"),
-                usr(text = "B", id = "B")
-            ),
-            _ => checkExists(
-                "sort A B by A B",
-                srt(text = "sort", subjNote = "AB", subjIndex = 1, byNote = "AB", byIndex = 3),
-                usr(text = "A B", id = "AB"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A B", id = "AB")
-            ),
-            _ => checkExists(
-                "A classify",
-                usr(text = "A", id = "A"),
-                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
-            ),
-            _ => checkExists(
-                "the A the classify",
-                nlp(text = "the", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "the", isStop = true),
-                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 1)
-            ),
-            _ => checkExists(
-                "segment A by top down",
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1, asc = false),
-                usr(text = "A", id = "A"),
-                nlp(text = "by top down", isStop = true)
-            ),
-            _ => checkExists(
-                "segment A in bottom up order",
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1, asc = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "in bottom up order", isStop = true)
-            ),
-            // `by` is redundant word here
-            _ => checkExists(
-                "segment A by in bottom up order",
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1),
-                usr(text = "A", id = "A"),
-                nlp(text = "by"),
-                nlp(text = "in"),
-                nlp(text = "bottom"),
-                nlp(text = "up"),
-                nlp(text = "order")
-            ),
-            _ => checkExists(
-                "the segment the A the in bottom up the order the",
-                nlp(text = "the", isStop = true),
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3, asc = true),
-                nlp(text = "the", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "the in bottom up the order the", isStop = true)
-            ),
-            _ => checkExists(
-                "the segment the A the by bottom up the order the",
-                nlp(text = "the", isStop = true),
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3, asc = true),
-                nlp(text = "the", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "the by bottom up the order the", isStop = true)
-            ),
-            _ => checkExists(
-                "A classify",
-                usr(text = "A", id = "A"),
-                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
-            ),
-            _ => checkAll(
-                "A B classify",
-                Seq(
-                    usr(text = "A B", id = "AB"),
-                    srt(text = "classify", typ = SUBJ_ONLY, note = "AB", index = 0)
-                ),
-                Seq(
-                    usr(text = "A", id = "A"),
-                    usr(text = "B", id = "B"),
-                    srt(text = "classify", subjNotes = Seq("A", "B"), subjIndexes = Seq(0, 1))
-                ),
-                Seq(
-                    usr(text = "A", id = "A"),
-                    usr(text = "B", id = "B"),
-                    srt(text = "classify", subjNotes = Seq("B"), subjIndexes = Seq(1))
-                )
-            ),
-            _ => checkAll(
-                "D classify",
-                Seq(
-                    usr(text = "D", id = "D1"),
-                    srt(text = "classify", typ = SUBJ_ONLY, note = "D1", index = 0)
-                ),
-                Seq(
-                    usr(text = "D", id = "D2"),
-                    srt(text = "classify", typ = SUBJ_ONLY, note = "D2", index = 0)
-                )
-            ),
-            _ => checkAll(
-                "sort by A",
-                Seq(
-                    srt(text = "sort by", typ = BY_ONLY, note = "A", index = 1),
-                    usr(text = "A", id = "A")
-                )
-            ),
-            _ => checkExists(
-                "organize by A, B top down",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(false)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = "top down", isStop = true)
-            ),
-            _ => checkExists(
-                "organize by A, B from bottom up order",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(true)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = "from bottom up order", isStop = true)
-            ),
-            _ => checkExists(
-                "organize by A, B the descending",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(false)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = "the descending", isStop = true)
-            ),
-            _ => checkExists(
-                "organize by A, B, asc",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(true)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = ", asc", isStop = true)
-            ),
-            _ => checkExists(
                 "sort A the A the A",
                 srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index = 1),
                 usr("A A A", "wrapperA"),
commit	3a2c0081cac55529969d29b15428a11c8c1bef64	[log] [tgz]
author	Aaron Radzinski <aradzinski@datalingvo.com>	Thu Jul 29 12:35:54 2021 -0700
committer	Aaron Radzinski <aradzinski@datalingvo.com>	Thu Jul 29 12:35:54 2021 -0700
tree	218be07f64d81c1d18670e59380cfafc6ba6c621
parent	ac726a5499321206c6dfad3bb861602e012ac3fc [diff]
parent	64ab9ae208c7acccd3f063c29fedfffc39931272 [diff]