WIP on NLPCRAFT-369.

commit: 58d85dc1a29da9a8da09587059acdfabca06a822 [log] [tgz]
author: Aaron Radzinski <aradzinski@datalingvo.com> Thu Jul 22 23:22:31 2021 -0700
committer: Aaron Radzinski <aradzinski@datalingvo.com> Thu Jul 22 23:22:31 2021 -0700
tree: 65d6ecc5eb9d3ec53ccd856e0f4f27850a2eccfe
parent: 4418fa774e166f98e19142ed605a608ba723f5f2 [diff]
diff --git a/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt b/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
index 599387c..188d1d6 100644
--- a/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
+++ b/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt

@@ -19,7 +19,7 @@
 # Set of samples (corpus) for automatic unit and regression testing.
 #
 
-Ping me in 3 minutes
+Ping me in 3 minutes tomorrow
 Buzz me in an hour and 15mins
 Set my alarm for 30s
 Please, wake me up in twenty five minutes!
\ No newline at end of file

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
index 262fa1d..4640657 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala

@@ -21,28 +21,20 @@
  * Intent options container.
  */
 class NCIdlIntentOptions {
-    /**
-     * Whether to ignore unused free words for intent match.
-     */
-    var ignoreUnusedFreeWords: Boolean = true
+    var ignoreUnusedFreeWords: Boolean = true // Whether to ignore unused free words for intent match.
+    var ignoreUnusedSystemTokens: Boolean = true // Whether to ignore unused system tokens for intent match.
+    var ignoreUnusedUserTokens: Boolean = false // Whether to ignore unused user tokens for intent match.
+    var allowStmTokenOnly: Boolean = false // Whether or not to allow intent to match if all matching tokens came from STM only.
+    var ordered: Boolean = false // Whether or not the order of term is important for intent match.
+}
 
-    /**
-     * Whether to ignore unused system tokens for intent match.
-     */
-    var ignoreUnusedSystemTokens: Boolean = true
-
-    /**
-     * Whether to ignore unused user tokens for intent match.
-     */
-    var ignoreUnusedUserTokens: Boolean = false
-
-    /**
-     * Whether or not to allow intent to match if all matching tokens came from STM only.
-     */
-    var allowStmTokenOnly: Boolean = false
-
-    /**
-     * Whether or not the order of term is important for intent match.
-     */
-    var ordered: Boolean = false
+object NCIdlIntentOptions {
+    /*
+    * JSON field names.
+    */
+    final val JSON_UNUSED_FREE_WORDS = "unused_free_words"
+    final val JSON_UNUSED_SYS_TOKS = "unused_sys_toks"
+    final val JSON_UNUSED_USER_TOKS = "unused_user_toks"
+    final val JSON_ALLOW_STM_ONLY = "allow_stm_only"
+    final val JSON_ORDERED = "ordered"
 }

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index 2066f18..0324f57 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala

@@ -138,16 +138,18 @@
                     case _ => throw newSyntaxError(s"Expecting boolean value for intent option: $k")(ctx)
                 }
 
+            import NCIdlIntentOptions._
+
             for ((k, v) <- json) {
-                if (k == "ordered")
+                if (k == JSON_ORDERED)
                     opts.ordered = boolVal(k, v)
-                else if (k == "unused_free_words")
+                else if (k == JSON_UNUSED_FREE_WORDS)
                     opts.ignoreUnusedFreeWords = boolVal(k, v)
-                else if (k == "unused_sys_toks")
+                else if (k == JSON_UNUSED_SYS_TOKS)
                     opts.ignoreUnusedSystemTokens = boolVal(k, v)
-                else if (k == "unused_user_toks")
+                else if (k == JSON_UNUSED_USER_TOKS)
                     opts.ignoreUnusedUserTokens = boolVal(k, v)
-                else if (k == "allow_stm_only")
+                else if (k == JSON_ALLOW_STM_ONLY)
                     opts.allowStmTokenOnly = boolVal(k, v)
                 else
                     throw newSyntaxError(s"Unknown intent option: $k")(ctx)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 8612482..c939a05 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala

@@ -24,13 +24,12 @@
 import org.apache.nlpcraft.common.opencensus.NCOpenCensusTrace
 import org.apache.nlpcraft.model.impl.NCTokenLogger
 import org.apache.nlpcraft.model.impl.NCTokenPimp._
-import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction, NCIdlIntent, NCIdlTerm, NCIdlStackItem => Z}
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction, NCIdlIntent, NCIdlIntentOptions, NCIdlTerm, NCIdlStackItem => Z}
 import org.apache.nlpcraft.model.{NCContext, NCDialogFlowItem, NCIntentMatch, NCResult, NCToken}
 import org.apache.nlpcraft.probe.mgrs.dialogflow.NCDialogFlowManager
 
 import java.util.function.Function
 import java.util.{List => JList}
-
 import scala.collection.mutable
 import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsScala, SeqHasAsJava}
 
@@ -548,28 +547,49 @@
 
                 var res: Option[IntentMatch] = None
 
+                import NCIdlIntentOptions._
+
                 if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty)
-                    logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr.")
+                    logger.info(
+                        s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr. " +
+                        s"See intent '${c(JSON_ALLOW_STM_ONLY)}' option."
+                    )
                 else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord))
-                    logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr.")
+                    NCTokenLogger.prepareTable(
+                        unusedSenToks.filter(_.token.isFreeWord).map(_.token)
+                    ).
+                    info(
+                        logger,
+                        Some(
+                            s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr. " +
+                            s"See intent '${c(JSON_UNUSED_FREE_WORDS)}' option. " +
+                            s"Unused free words:"
+                        )
+                    )
                 else if (!opts.ignoreUnusedUserTokens && unusedSenToks.exists(_.token.isUserDefined))
-                    NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)).
-                        info(
-                            logger,
-                            Some(
-                                s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr." +
-                                s"\nUnused user tokens for intent '$intentId' $varStr:"
-                            )
+                    NCTokenLogger.prepareTable(
+                        unusedSenToks.filter(_.token.isUserDefined).map(_.token)
+                    ).
+                    info(
+                        logger,
+                        Some(
+                            s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr. " +
+                            s"See intent '${c(JSON_UNUSED_USER_TOKS)}' option. " +
+                            s"Unused user tokens:"
                         )
-                else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(_.token.isSystemDefined))
-                    NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)).
-                        info(
-                            logger,
-                            Some(
-                                s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr." +
-                                s"\nUnused system tokens for intent '$intentId' $varStr:"
-                            )
+                    )
+                else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(tok => !tok.token.isFreeWord && tok.token.isSystemDefined))
+                    NCTokenLogger.prepareTable(
+                        unusedSenToks.filter(tok => !tok.token.isFreeWord && tok.token.isSystemDefined).map(_.token)
+                    ).
+                    info(
+                        logger,
+                        Some(
+                            s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr. " +
+                            s"See intent '${c(JSON_UNUSED_SYS_TOKS)}' option. " +
+                            s"Unused system tokens:"
                         )
+                    )
                 else {
                     if (usedSenToks.isEmpty && usedConvToks.isEmpty)
                         logger.warn(s"Intent '$intentId' ${bo(y("matched"))} but no tokens were used $varStr.")

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index d1a624f..3106911 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala

@@ -102,8 +102,11 @@
               | */
               |intent=i1
               |     options={
-              |         'ordered': true,
-              |         'unused_free_words': false
+              |         'ordered': false,
+              |         'unused_free_words': true,
+              |         'unused_sys_toks': true,
+              |         'unused_usr_toks': false,
+              |         'allow_stm_only': false
               |     }
               |     flow="a[^0-9]b" // Flow comment.
               |     term(t1)={has(json("{'a': true, 'b\'2': {'arr': [1, 2, 3]}}"), list("موسكو\"", 'v1\'v1', "k2", "v2"))}
commit	58d85dc1a29da9a8da09587059acdfabca06a822	[log] [tgz]
author	Aaron Radzinski <aradzinski@datalingvo.com>	Thu Jul 22 23:22:31 2021 -0700
committer	Aaron Radzinski <aradzinski@datalingvo.com>	Thu Jul 22 23:22:31 2021 -0700
tree	65d6ecc5eb9d3ec53ccd856e0f4f27850a2eccfe
parent	4418fa774e166f98e19142ed605a608ba723f5f2 [diff]