WIP on NLPCRAFT-369.
diff --git a/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt b/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
index 599387c..188d1d6 100644
--- a/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
+++ b/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
@@ -19,7 +19,7 @@
# Set of samples (corpus) for automatic unit and regression testing.
#
-Ping me in 3 minutes
+Ping me in 3 minutes tomorrow
Buzz me in an hour and 15mins
Set my alarm for 30s
Please, wake me up in twenty five minutes!
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
index 262fa1d..4640657 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
@@ -21,28 +21,20 @@
* Intent options container.
*/
class NCIdlIntentOptions {
- /**
- * Whether to ignore unused free words for intent match.
- */
- var ignoreUnusedFreeWords: Boolean = true
+ var ignoreUnusedFreeWords: Boolean = true // Whether to ignore unused free words for intent match.
+ var ignoreUnusedSystemTokens: Boolean = true // Whether to ignore unused system tokens for intent match.
+ var ignoreUnusedUserTokens: Boolean = false // Whether to ignore unused user tokens for intent match.
+ var allowStmTokenOnly: Boolean = false // Whether or not to allow intent to match if all matching tokens came from STM only.
+ var ordered: Boolean = false // Whether or not the order of term is important for intent match.
+}
- /**
- * Whether to ignore unused system tokens for intent match.
- */
- var ignoreUnusedSystemTokens: Boolean = true
-
- /**
- * Whether to ignore unused user tokens for intent match.
- */
- var ignoreUnusedUserTokens: Boolean = false
-
- /**
- * Whether or not to allow intent to match if all matching tokens came from STM only.
- */
- var allowStmTokenOnly: Boolean = false
-
- /**
- * Whether or not the order of term is important for intent match.
- */
- var ordered: Boolean = false
+object NCIdlIntentOptions {
+ /*
+ * JSON field names.
+ */
+ final val JSON_UNUSED_FREE_WORDS = "unused_free_words"
+ final val JSON_UNUSED_SYS_TOKS = "unused_sys_toks"
+ final val JSON_UNUSED_USER_TOKS = "unused_user_toks"
+ final val JSON_ALLOW_STM_ONLY = "allow_stm_only"
+ final val JSON_ORDERED = "ordered"
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index 2066f18..0324f57 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -138,16 +138,18 @@
case _ => throw newSyntaxError(s"Expecting boolean value for intent option: $k")(ctx)
}
+ import NCIdlIntentOptions._
+
for ((k, v) <- json) {
- if (k == "ordered")
+ if (k == JSON_ORDERED)
opts.ordered = boolVal(k, v)
- else if (k == "unused_free_words")
+ else if (k == JSON_UNUSED_FREE_WORDS)
opts.ignoreUnusedFreeWords = boolVal(k, v)
- else if (k == "unused_sys_toks")
+ else if (k == JSON_UNUSED_SYS_TOKS)
opts.ignoreUnusedSystemTokens = boolVal(k, v)
- else if (k == "unused_user_toks")
+ else if (k == JSON_UNUSED_USER_TOKS)
opts.ignoreUnusedUserTokens = boolVal(k, v)
- else if (k == "allow_stm_only")
+ else if (k == JSON_ALLOW_STM_ONLY)
opts.allowStmTokenOnly = boolVal(k, v)
else
throw newSyntaxError(s"Unknown intent option: $k")(ctx)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 8612482..c939a05 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -24,13 +24,12 @@
import org.apache.nlpcraft.common.opencensus.NCOpenCensusTrace
import org.apache.nlpcraft.model.impl.NCTokenLogger
import org.apache.nlpcraft.model.impl.NCTokenPimp._
-import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction, NCIdlIntent, NCIdlTerm, NCIdlStackItem => Z}
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction, NCIdlIntent, NCIdlIntentOptions, NCIdlTerm, NCIdlStackItem => Z}
import org.apache.nlpcraft.model.{NCContext, NCDialogFlowItem, NCIntentMatch, NCResult, NCToken}
import org.apache.nlpcraft.probe.mgrs.dialogflow.NCDialogFlowManager
import java.util.function.Function
import java.util.{List => JList}
-
import scala.collection.mutable
import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsScala, SeqHasAsJava}
@@ -548,28 +547,49 @@
var res: Option[IntentMatch] = None
+ import NCIdlIntentOptions._
+
if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty)
- logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr.")
+ logger.info(
+ s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr. " +
+ s"See intent '${c(JSON_ALLOW_STM_ONLY)}' option."
+ )
else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord))
- logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr.")
+ NCTokenLogger.prepareTable(
+ unusedSenToks.filter(_.token.isFreeWord).map(_.token)
+ ).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr. " +
+ s"See intent '${c(JSON_UNUSED_FREE_WORDS)}' option. " +
+ s"Unused free words:"
+ )
+ )
else if (!opts.ignoreUnusedUserTokens && unusedSenToks.exists(_.token.isUserDefined))
- NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)).
- info(
- logger,
- Some(
- s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr." +
- s"\nUnused user tokens for intent '$intentId' $varStr:"
- )
+ NCTokenLogger.prepareTable(
+ unusedSenToks.filter(_.token.isUserDefined).map(_.token)
+ ).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr. " +
+ s"See intent '${c(JSON_UNUSED_USER_TOKS)}' option. " +
+ s"Unused user tokens:"
)
- else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(_.token.isSystemDefined))
- NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)).
- info(
- logger,
- Some(
- s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr." +
- s"\nUnused system tokens for intent '$intentId' $varStr:"
- )
+ )
+ else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(tok => !tok.token.isFreeWord && tok.token.isSystemDefined))
+ NCTokenLogger.prepareTable(
+ unusedSenToks.filter(tok => !tok.token.isFreeWord && tok.token.isSystemDefined).map(_.token)
+ ).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr. " +
+ s"See intent '${c(JSON_UNUSED_SYS_TOKS)}' option. " +
+ s"Unused system tokens:"
)
+ )
else {
if (usedSenToks.isEmpty && usedConvToks.isEmpty)
logger.warn(s"Intent '$intentId' ${bo(y("matched"))} but no tokens were used $varStr.")
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index d1a624f..3106911 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -102,8 +102,11 @@
| */
|intent=i1
| options={
- | 'ordered': true,
- | 'unused_free_words': false
+ | 'ordered': false,
+ | 'unused_free_words': true,
+ | 'unused_sys_toks': true,
+ | 'unused_usr_toks': false,
+ | 'allow_stm_only': false
| }
| flow="a[^0-9]b" // Flow comment.
| term(t1)={has(json("{'a': true, 'b\'2': {'arr': [1, 2, 3]}}"), list("موسكو\"", 'v1\'v1', "k2", "v2"))}