Merge branch 'master' into NLPCRAFT-383
diff --git a/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala b/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala
index 9b6cf41..2e79a2b 100644
--- a/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala
+++ b/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala
@@ -27,7 +27,7 @@
*
* @return
*/
- @NCIntentRef("int:navigate")
+ @NCIntentRef("navigate")
@NCIntentSampleRef("samples/cargps_navigate_samples.txt")
def onNavigation(): NCResult = {
NCResult.text(s"")
@@ -37,7 +37,7 @@
*
* @return
*/
- @NCIntentRef("int:cancel")
+ @NCIntentRef("cancel")
@NCIntentSampleRef("samples/cargps_cancel_samples.txt")
def onCancel(): NCResult = {
NCResult.text(s"")
@@ -47,7 +47,7 @@
*
* @return
*/
- @NCIntentRef("int:add:waypoint")
+ @NCIntentRef("add:waypoint")
@NCIntentSampleRef("samples/cargps_add_waypoint_samples.txt")
def onAddWaypoint(): NCResult = {
NCResult.text(s"")
@@ -57,7 +57,7 @@
*
* @return
*/
- @NCIntentRef("int:remove:waypoint")
+ @NCIntentRef("remove:waypoint")
@NCIntentSampleRef("samples/cargps_remove_waypoint_samples.txt")
def onRemoveWaypoint(): NCResult = {
NCResult.text(s"")
diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl b/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl
index e90b7b5..3eae285 100644
--- a/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl
@@ -22,13 +22,26 @@
// Reusable fragments.
fragment=hey term={# == "x:hey" && tok_is_first()}
-// Intents.
-intent=int:cancel
- // Ignore any other user or system tokens if we found 'cancel' token.
- options={'unused_usr_toks': true, 'unused_sys_toks': true}
- fragment(hey)
+// Cancel intent.
+intent=cancel
+ fragment(hey) // Always, salutation 1st.
term={# == "x:cancel"}
-intent=int:navigate options={'ordered': true} fragment(hey) term={# == "x:navigate"} term={# == "x:addr"}
-intent=int:add:waypoint fragment(hey) term={# == "x:add-waypoint"}
-intent=int:remove:waypoint fragment(hey) term={# == "x:remove-waypoint"}
\ No newline at end of file
+// Navigate intent.
+intent=navigate
+ options={'ordered': true} // Order of terms is important.
+ fragment(hey) // Always, salutation 1st.
+ term={# == "x:navigate"} // Then word 'navigate'.
+ term={# == "x:addr"} // Then where to navigate.
+
+// Add a waypoint intent.
+intent=add:waypoint
+ fragment(hey) // Always, salutation 1st.
+ term={# == "x:add-waypoint"}
+ term={# == "x:addr"}
+
+// Remove a waypoint intent.
+// Assumes last waypoint.
+intent=remove:waypoint
+ fragment(hey) // Always, salutation 1st.
+ term={# == "x:remove-waypoint"}
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
index 62f45c8..722103b 100644
--- a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
@@ -29,15 +29,18 @@
- name: "<HEY>"
macro: "{hey|hi|howdy}"
- name: "<NAVIGATE>"
- macro: "{navigate|pilot|plot|drive|route|plan|find|head|ride|direct|steer|operate|sail} {out|_} {course|route|destination|drive|_} {to|_}"
+ macro: "{navigate|pilot|journey|plot|drive|route|plan|find|head|ride|direct|steer|operate|sail} {out|_} {course|route|destination|drive|_} {to|_}"
- name: "<CANCEL>"
macro: "{cancel|stop|abort|finish|cease|quit} {off|_}"
- name: "<WAYPOINT>"
- macro: "{waypoint|location|point|stopover|stop over|way station|stop|checkpoint|stop point} {point|station|_}"
+ macro: "{waypoint|stopover|stop over|way station|stop|checkpoint|stop point} {point|station|_}"
+# Make sure 'howdy' is not marked as a stopword.
excludedStopWords:
- howdy
+# Don't surface these tokens as standalone one. They are used only
+# as building blocks for other tokens.
abstractTokens:
- x:addr:kind
- x:addr:num
@@ -53,31 +56,32 @@
- "{street|drive|court|plaza|avenue|alley|anex|beach|bend|boulevard|bridge|canyon|causeway|way|circle|corner|creek|fork|harbor|highway|expressway|island|lane|lake|loop|motorway|park|path|point|ramp|route|rue|row|skyway|square|station|summit|trail|tunnel|walk|road}"
- "{st|str|dr|crt|plz|ave|blvd|hwy|rd}"
+ # Street number.
- id: "x:addr:num"
synonyms:
+ # Straight number.
- "^^{# == 'nlpcraft:num' && meta_tok('nlpcraft:num:unit') == null && meta_tok('nlpcraft:num:isequalcondition')}^^"
+ # Street name.
- id: "x:addr:st"
greedy: false
synonyms:
+ # Alphanumeric and must be after street number and before street kind.
- "{^^{is_alphanum(tok_txt) && tok_is_between_ids('x:addr:num', 'x:addr:kind') == true}^^}[1,3]"
- id: "x:addr"
synonyms:
- "^^[num]{# == 'x:addr:num'}^^ ^^[name]{# == 'x:addr:st'}^^ ^^[kind]{# == 'x:addr:kind'}^^"
- #
- # Salutation.
- # -----------
- id: "x:hey"
- description: "NLI prompt"
+ description: "NLI prompt, salutation."
synonyms:
- - "<HEY> {car|vehicle|truck}"
+ - "<HEY> {car|vehicle|{pickup|_} truck|pickup|van|sedan|coupe|lorry}"
- id: "x:cancel"
description: "Cancel action."
synonyms:
- - "<CANCEL>"
+ - "<CANCEL> <NAVIGATE>"
- id: "x:navigate"
description: "Start 'navigate' action."
@@ -91,9 +95,11 @@
- "stop by"
- id: "x:remove-waypoint"
- description: "Remove 'waypoint' action."
+ description: "Remove last 'waypoint' action."
synonyms:
+ # NOTE: assumes the LAST waypoint, if any.
- "{skip|remove} {over|_} {last|latest|current|_} <WAYPOINT>"
+ - "<CANCEL> {last|latest|current|_} <WAYPOINT>"
- "<NAVIGATE> without {stopping|<WAYPOINT>}"
intents:
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt
index 4c3f520..4b0c809 100644
--- a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt
@@ -19,3 +19,5 @@
# Set of samples (corpus) for automatic unit and regression testing.
#
+hey car, add a stopover at 21 table rock drive
+howdy, truck - add a waypoint for 2121 5th avenue please
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
index 0e4cb3d..c6b132d 100644
--- a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
@@ -19,10 +19,10 @@
# Set of samples (corpus) for automatic unit and regression testing.
#
-#Hey truck - stop the navigation!
+Hey truck - stop the navigation!
Howdy, car, please cancel the routing now.
-#Hi car - stop the route.
-#Hi car - stop the navigation...
+Hi car - stop the route.
+Hi car - stop the navigation...
Howdy truck - quit navigating.
-#Hi car - finish off the driving.
-#Hi car - cancel the journey.
\ No newline at end of file
+Hi car - finish off the driving.
+Hi car - cancel the journey.
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt
index 029340e..0e6f244 100644
--- a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt
@@ -18,4 +18,7 @@
#
# Set of samples (corpus) for automatic unit and regression testing.
#
+
hey car, navigate to 21 table rock drive
+howdy, truck - drive to 2121 5th avenue please
+hi lorry, how about a drive to 21 x x drive
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt
index 4c3f520..ac13b37 100644
--- a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt
@@ -19,3 +19,7 @@
# Set of samples (corpus) for automatic unit and regression testing.
#
+hey truck, cancel the last waypoint
+hey truck, cancel the stopover location
+Hi truck - drive without stopping at the last checkpoint
+
diff --git a/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala b/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala
index f7f5ca1..a3092ae 100644
--- a/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala
+++ b/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala
@@ -20,10 +20,26 @@
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Test
+/**
+ *
+ */
@NCTestEnvironment(model = classOf[CarGpsModel], startClient = true)
class NCCarGpsModelSpec extends NCTestContext {
@Test
- def test(): Unit = {
- checkIntent("hey truck, drive to 21 x x drive", "int:navigate")
+ def testNavigate(): Unit = {
+ checkIntent("hey truck, drive to 21 x x drive", "navigate")
+ checkIntent("hey car, navigate to 21 table rock drive", "navigate")
+ checkIntent("howdy, truck - drive to 2121 5th avenue please", "navigate")
+ }
+
+ @Test
+ def testCancel(): Unit = {
+ checkIntent("Hey truck - stop the navigation!", "cancel")
+ checkIntent("Howdy, car, please cancel the routing now.", "cancel")
+ checkIntent("Hi car - stop the route.", "cancel")
+ checkIntent("Hi car - stop the navigation...", "cancel")
+ checkIntent("Howdy truck - quit navigating.", "cancel")
+ checkIntent("Hi car - finish off the driving.", "cancel")
+ checkIntent("Hi car - cancel the journey.", "cancel")
}
}
diff --git a/nlpcraft-examples/lightswitch/pom.xml b/nlpcraft-examples/lightswitch/pom.xml
index 0ebbf8a..a26d56b 100644
--- a/nlpcraft-examples/lightswitch/pom.xml
+++ b/nlpcraft-examples/lightswitch/pom.xml
@@ -34,6 +34,7 @@
<properties>
<nlpcraft.server.module>nlpcraft</nlpcraft.server.module>
<nlpcraft.all.deps.jar>apache-${nlpcraft.server.module}-incubating-${project.version}-all-deps.jar</nlpcraft.all.deps.jar>
+ <nlpcraft.lightswitch.deps.jar>apache-nlpcraft-incubating-${project.version}-lightswitch-deps.jar</nlpcraft.lightswitch.deps.jar>
<kotlin.ver>1.5.0</kotlin.ver>
<groovy.ver>3.0.7</groovy.ver>
@@ -206,6 +207,27 @@
</execution>
</executions>
</plugin>
+
+ <!--Uber jar created here because kotlin and groovy dependencies. -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.plugin.ver}</version>
+ <executions>
+ <execution>
+ <id>jar.all.deps.lightswitch</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <outputFile>
+ target/${nlpcraft.lightswitch.deps.jar}
+ </outputFile>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
</project>
\ No newline at end of file
diff --git a/nlpcraft-examples/minecraft-mod/assets/nlpcraft-example-minecraft-mod-1.0.jar b/nlpcraft-examples/minecraft-mod/assets/nlpcraft-example-minecraft-mod-1.0.jar
index a72d54c..9bfd7a4 100644
--- a/nlpcraft-examples/minecraft-mod/assets/nlpcraft-example-minecraft-mod-1.0.jar
+++ b/nlpcraft-examples/minecraft-mod/assets/nlpcraft-example-minecraft-mod-1.0.jar
Binary files differ
diff --git a/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/example/minecraft/NCMinecraftExampleMod.java b/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/examples/minecraft/NCMinecraftExampleMod.java
similarity index 99%
rename from nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/example/minecraft/NCMinecraftExampleMod.java
rename to nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/examples/minecraft/NCMinecraftExampleMod.java
index c493959..d8f3915 100644
--- a/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/example/minecraft/NCMinecraftExampleMod.java
+++ b/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/examples/minecraft/NCMinecraftExampleMod.java
@@ -16,7 +16,7 @@
*
*/
-package org.apache.nlpcraft.example.minecraft;
+package org.apache.nlpcraft.examples.minecraft;
import com.google.gson.Gson;
import net.minecraft.server.MinecraftServer;
diff --git a/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/example/minecraft/utils/NCMinecraftFilesDump.java b/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/examples/minecraft/utils/NCMinecraftFilesDump.java
similarity index 98%
rename from nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/example/minecraft/utils/NCMinecraftFilesDump.java
rename to nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/examples/minecraft/utils/NCMinecraftFilesDump.java
index 61b58bd..1b88552 100644
--- a/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/example/minecraft/utils/NCMinecraftFilesDump.java
+++ b/nlpcraft-examples/minecraft-mod/src/main/java/org/apache/nlpcraft/examples/minecraft/utils/NCMinecraftFilesDump.java
@@ -16,7 +16,7 @@
*
*/
-package org.apache.nlpcraft.example.minecraft.utils;
+package org.apache.nlpcraft.examples.minecraft.utils;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
diff --git a/nlpcraft-examples/minecraft/pom.xml b/nlpcraft-examples/minecraft/pom.xml
index 69c3d40..f0ffcd2 100644
--- a/nlpcraft-examples/minecraft/pom.xml
+++ b/nlpcraft-examples/minecraft/pom.xml
@@ -34,6 +34,7 @@
<properties>
<nlpcraft.server.module>nlpcraft</nlpcraft.server.module>
<nlpcraft.all.deps.jar>apache-${nlpcraft.server.module}-incubating-${project.version}-all-deps.jar</nlpcraft.all.deps.jar>
+ <nlpcraft.minecraft.deps.jar>apache-nlpcraft-incubating-${project.version}-minecraft-deps.jar</nlpcraft.minecraft.deps.jar>
<kotlin.ver>1.5.0</kotlin.ver>
</properties>
@@ -190,6 +191,27 @@
</execution>
</executions>
</plugin>
+
+ <!--Uber jar created here because kotlin dependencies. -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.plugin.ver}</version>
+ <executions>
+ <execution>
+ <id>jar.all.deps.minecraft</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <outputFile>
+ target/${nlpcraft.minecraft.deps.jar}
+ </outputFile>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
</project>
\ No newline at end of file
diff --git a/nlpcraft/pom.xml b/nlpcraft/pom.xml
index 42726f7..b2c5d0c 100644
--- a/nlpcraft/pom.xml
+++ b/nlpcraft/pom.xml
@@ -136,7 +136,6 @@
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect</artifactId>
- <!-- Ignite contains 13 version but it contains 12. -->
<exclusions>
<exclusion>
<groupId>com.intellij</groupId>
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 255e086..fbf4f01 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -36,7 +36,6 @@
private lazy val dataWithoutIndexes = this.filter(p => !SKIP_CLONE.contains(p._1))
private lazy val skipNlp = dataWithoutIndexes.filter { case (key, _) => key != "noteType" }
-
@transient
private lazy val hash = values.hashCode()
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index 2d06412..157a3e2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -1241,7 +1241,7 @@
/**
* // TODO: add javadoc
- * @return
+ * @return TBD
*/
default boolean isStopWordsAllowed() {
return DFLT_IS_STOPWORDS_ALLOWED;
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
index 71162db..9b4eebc 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
@@ -945,7 +945,11 @@
throw InvalidParameter(cmd, "lines")
loadServerBeacon() match {
- case Some(beacon) => tailFile(beacon.logPath, lines)
+ case Some(beacon) =>
+ if (beacon.logPath == null)
+ throw new IllegalStateException(s"Server was started outside or its log file cannot be found.")
+ else
+ tailFile(beacon.logPath, lines)
case None => throw NoLocalServer()
}
}
@@ -962,7 +966,11 @@
throw InvalidParameter(cmd, "lines")
loadProbeBeacon() match {
- case Some(beacon) => tailFile(beacon.logPath, lines)
+ case Some(beacon) =>
+ if (beacon.logPath == null)
+ throw new IllegalStateException(s"Probe log file cannot be found.")
+ else
+ tailFile(beacon.logPath, lines)
case None => throw NoLocalProbe()
}
}
@@ -1692,15 +1700,7 @@
* @param args Arguments, if any, for this command.
* @param repl Whether or not executing from REPL.
*/
- private [cmdline] def cmdSqlGen(cmd: Command, args: Seq[Argument], repl: Boolean): Unit = {
- // Mandatory parameters check (unless --help is specified).
- if (!isParam(cmd, args, "help")) {
- getParam(cmd, args, "driver")
- getParam(cmd, args, "schema")
- getParam(cmd, args, "out")
- getParam(cmd, args, "url")
- }
-
+ private [cmdline] def cmdGenSql(cmd: Command, args: Seq[Argument], repl: Boolean): Unit = {
val addCp = getCpParams(args)
val jvmOpts = getParamOpt(args, "jvmopts") match {
case Some(opts) => U.splitTrimFilter(U.trimQuotes(opts), " ")
@@ -2611,16 +2611,28 @@
}
// For 'help' - add additional auto-completion/suggestion candidates.
- if (cmd == HELP_CMD.name)
- candidates.addAll(CMDS.map(c => s"--cmd=${c.name}").map(s =>
- mkCandidate(
- disp = s,
- grp = CMDS_GRP,
- desc = null,
- completed = true
- ))
- .asJava
- )
+ if (cmd == HELP_CMD.name) {
+ if (words.exists(_.contains("-c=")))
+ candidates.addAll(CMDS.map(c => s"-c=${c.name}").map(s =>
+ mkCandidate(
+ disp = s,
+ grp = CMDS_GRP,
+ desc = null,
+ completed = true
+ ))
+ .asJava
+ )
+ else
+ candidates.addAll(CMDS.map(c => s"--cmd=${c.name}").map(s =>
+ mkCandidate(
+ disp = s,
+ grp = CMDS_GRP,
+ desc = null,
+ completed = true
+ ))
+ .asJava
+ )
+ }
// For 'rest' or 'call' - add '--path' auto-completion/suggestion candidates.
if (cmd == REST_CMD.name || cmd == CALL_CMD.name) {
@@ -3128,8 +3140,8 @@
* @param args
* @return
*/
- private def processParameters(cmd: Command, args: Seq[String]): Seq[Argument] =
- args.map { arg =>
+ private def processParameters(cmd: Command, args: Seq[String]): Seq[Argument] = {
+ val seq = args.map { arg =>
val parts = arg.split("=", 2)
def mkError() = new IllegalArgumentException(s"Invalid parameter: ${c(arg)}, type $C'help --cmd=${cmd.name}'$RST to get help.")
@@ -3165,6 +3177,14 @@
}
}
+ for (param <- cmd.params.filter(!_.optional)) {
+ if (!seq.exists(_.parameter.id == param.id))
+ throw MissingParameter(cmd, param.id)
+ }
+
+ seq
+ }
+
/**
*
* @param args
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
index 0c35f08..307c8fd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
@@ -351,10 +351,12 @@
synopsis = s"Generates NLPCraft model stub from SQL databases.",
desc = Some(
s"You can choose database schema, set of tables and columns for which you want to generate NLPCraft " +
- s"model. After the model is generated you can further configure and customize it for your specific needs. " +
+ s"model. Note that required JDBC driver class must be available on the classpath and therefore its " +
+ s"JAR should be added to the classpathAfter the model is generated you can further configure and " +
+ s"customize it for your specific needs. " +
s"Find more information at https://nlpcraft.apache.org/tools/sql_model_gen.html"
),
- body = NCCli.cmdSqlGen,
+ body = NCCli.cmdGenSql,
params = Seq(
Parameter(
id = "url",
@@ -508,14 +510,6 @@
desc =
s"Flag on whether to use element's parent relationship for defining " +
s"SQL columns and their containing (i.e. parent) tables. Default is ${y("'false'")}."
- ),
- Parameter(
- id = "help",
- names = Seq("--help", "-h"),
- optional = true,
- desc =
- s"Gets extended help and usage information for the ${y("'gen-sql'")} command. " +
- s"Includes information on how to run this tool standalone in a separate process."
)
),
examples = Seq(
@@ -713,6 +707,7 @@
Parameter(
id = "lines",
names = Seq("--lines", "-l"),
+ optional = true,
value = Some("20"),
desc =
s"Number of the server log lines from the end to display. Default is 20."
@@ -736,6 +731,7 @@
params = Seq(
Parameter(
id = "lines",
+ optional = true,
names = Seq("--lines", "-l"),
value = Some("20"),
desc =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
index 5da9808..d4fc27c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
@@ -26,11 +26,46 @@
* @param word
*/
case class NCProbeIdlToken(token: NCToken, word: NCNlpSentenceToken) {
- val (origText: String, wordIndexes: Set[Int], minIndex: Int, maxIndex: Int, isToken: Boolean, isWord: Boolean) =
+ require(token != null ^ word != null)
+
+ val (
+ origText: String,
+ normText: String,
+ stem: String,
+ wordIndexes: Set[Int],
+ minIndex: Int,
+ maxIndex: Int,
+ startCharIndex: Int,
+ endCharIndex: Int,
+ isToken: Boolean,
+ isWord: Boolean
+ ) =
if (token != null)
- (token.origText, token.wordIndexes.toSet, token.wordIndexes.head, token.wordIndexes.last, true, false)
+ (
+ token.origText,
+ token.normText,
+ token.stem,
+ token.wordIndexes.toSet,
+ token.wordIndexes.head,
+ token.wordIndexes.last,
+ token.getStartCharIndex,
+ token.getEndCharIndex,
+ true,
+ false
+ )
else
- (word.origText, word.wordIndexes.toSet, word.wordIndexes.head, word.wordIndexes.last, false, true)
+ (
+ word.origText,
+ word.normText,
+ word.stem,
+ word.wordIndexes.toSet,
+ word.wordIndexes.head,
+ word.wordIndexes.last,
+ word.startCharIndex,
+ word.endCharIndex,
+ false,
+ true
+ )
private lazy val hash = if (isToken) Seq(wordIndexes, token.getId).hashCode() else wordIndexes.hashCode()
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index ea41793..6b6a8e8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -62,6 +62,9 @@
lazy val hasNoIdlSynonyms: Boolean = continuousSynonyms.nonEmpty || sparseSynonyms.nonEmpty
lazy val hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty || idlSynonyms.exists(_._2.exists(_.sparse))
lazy val hasContinuousSynonyms: Boolean = continuousSynonyms.nonEmpty || idlSynonyms.exists(_._2.exists(!_.sparse))
+ lazy val isComplex: Boolean = hasIdlSynonyms || !model.getParsers.isEmpty
def hasIdlSynonyms(elemId: String): Boolean = idlSynonyms.contains(elemId)
+
+
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index b3fe3e1..10b2cf7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -233,7 +233,8 @@
val tbl = NCAsciiTable()
- tbl += (s"${b("Text")}", nlpSens.map(s => rv(" " + s.text + " ")))
+ tbl += (s"${b("Text")}", nlpSens.map(s => bo(s.text)))
+ tbl += ("", bo("-") * nlpSens.maxBy(_.text.length).text.length)
tbl += (s"${b("Model ID")}", mdlId)
tbl += (s"${b("User:")}", "")
tbl += (s"${b(" ID")}", usrId)
@@ -353,12 +354,12 @@
val tbl = NCAsciiTable()
if (errMsg.isEmpty) {
- tbl += (s"${gb(w(" SUCCESS "))}", "")
+ tbl += (s"${gb(w("<SUCCESS>"))}", "")
tbl += (s"${g("---------")}", "")
tbl += (s"${b("Result type")}", resType.getOrElse(""))
}
else {
- tbl += (s"${rb(w(" REJECT "))}", "")
+ tbl += (s"${rb(w("<REJECT>"))}", "")
tbl += (s"${r("--------")}", "")
tbl += (s"${r("Error")}", s"${r(errMsg.get)}")
}
@@ -492,7 +493,7 @@
}).toMap
// Loop has sense if model is complex (has user defined parsers or IDL based synonyms)
- continue = NCModelEnricher.isComplex(mdl) && res.exists { case (_, same) => !same }
+ continue = mdl.isComplex && res.exists { case (_, same) => !same }
if (DEEP_DEBUG)
if (continue) {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 7196985..26bda8b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -83,8 +83,6 @@
ackStopped()
}
- def isComplex(mdl: NCProbeModel): Boolean = mdl.hasIdlSynonyms || !mdl.model.getParsers.isEmpty
-
/**
*
* @param ns
@@ -180,7 +178,8 @@
new NCCustomElement() {
override def getElementId: String = noteId
override def getWords: JList[NCCustomWord] = words
- override def getMetadata: JavaMeta = md.map(p => p._1 -> p._2.asInstanceOf[AnyRef]).asJava
+ override def getMetadata: JavaMeta =
+ md.map { case (k, v) => k -> v.asInstanceOf[AnyRef] }.asJava
}
}).asJava
)
@@ -228,7 +227,7 @@
* Example: Piece: 'x1, x2(stopword), x3(stopword), x4' will be expanded into
* {'x1, x2, x3, x4', 'x1, x2, x4', 'x1, x3, x4', 'x1, x4'}
*
- * 3. All variants collected, duplicated deleted, etc.
+ * 3. All variants collected, duplicated sets deleted, etc.
*
* @param toks
*/
@@ -244,7 +243,7 @@
else
slides += mutable.ArrayBuffer.empty :+ stop
- // Too many stopords inside skipped.
+ // Too many stopwords inside skipped.
val bigSlides = slides.filter(_.size > 2)
var stops4Delete: Seq[Seq[NlpToken]] =
@@ -255,7 +254,7 @@
if (stops4AllCombs.nonEmpty)
for (
seq1 <- Range.inclusive(0, stops4AllCombs.size).flatMap(stops4AllCombs.combinations);
- seq2 <- Range.inclusive(0, bigSlides.size).flatMap(bigSlides.combinations)
+ seq2 <- Range.inclusive(0, bigSlides.size).flatMap(bigSlides.combinations)
)
yield seq1 ++ seq2.flatten
else
@@ -268,11 +267,10 @@
stops4Delete = stops4Delete.filter(seq => !seq.contains(combo.head) && !seq.contains(combo.last))
(Seq(combo) ++ stops4Delete.map(del => combo.filter(t => !del.contains(t)))).map(_ -> combo).distinct
-
}).
- filter(_._1.nonEmpty).
- groupBy(_._1).
- map(p => p._1 -> p._2.map(_._2).minBy(p => (-p.size, p.head.index))).
+ filter { case (seq, _) => seq.nonEmpty }.
+ groupBy { case (seq, _) => seq }.
+ map { case (toksKey, seq) => toksKey -> seq.map(_._2).minBy(p => (-p.size, p.head.index)) }.
sortBy { case(data, combo) => (-combo.size, -data.size, combo.head.index, data.head.index) }
/**
@@ -297,15 +295,17 @@
/**
*
- * @param tows
+ * @param idlToks
* @param ns
*/
- private def toTokens(tows: Seq[IdlToken], ns: Sentence): Seq[NlpToken] =
- (
- tows.filter(_.isWord).map(_.word) ++
- tows.filter(_.isToken).map(_.token).
- flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
- ).sortBy(_.startCharIndex)
+ private def toNlpTokens(idlToks: Seq[IdlToken], ns: Sentence): Seq[NlpToken] = {
+ val words = idlToks.filter(_.isWord).map(_.word)
+ val suitableToks =
+ idlToks.filter(_.isToken).map(_.token).
+ flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
+
+ (words ++ suitableToks).sortBy(_.startCharIndex)
+ }
/**
*
@@ -378,6 +378,7 @@
}
/**
+ * Prepares IDL tokens based on NLP tokens.
*
* @param h
* @param toks
@@ -391,9 +392,7 @@
// Drops without tokens (IDL part works with tokens).
if (rec.nonEmpty)
- Some(rec ++
- (seq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.tokens)
- )
+ Some(rec ++ (seq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.tokens))
else
None
}).seq
@@ -440,11 +439,11 @@
for (
// 'toksExt' is piece of sentence, 'toks' is the same as 'toksExt' or without some stopwords set.
(toks, toksExt) <- combosTokens(ns.toSeq);
- idxs = toks.map(_.index);
- e <- mdl.elements.values;
- elemId = e.getId;
- greedy = e.isGreedy.orElse(mdl.model.isGreedy)
- if !greedy || !alreadyMarked(ns, elemId, toks, idxs)
+ idxs = toks.map(_.index);
+ e <- mdl.elements.values;
+ elemId = e.getId;
+ greedy = e.isGreedy.orElse(mdl.model.isGreedy)
+ if !greedy || !alreadyMarked(ns, elemId, toks, idxs)
) {
def add(
dbgType: String,
@@ -456,7 +455,7 @@
val ok =
(!greedy || !alreadyMarked(ns, elemId, elemToks, idxs)) &&
- ( parts.isEmpty || !parts.exists { case (t, _) => t.getId == elemId })
+ ( parts.isEmpty || !parts.exists { case (tok, _) => tok.getId == elemId })
if (ok)
mark(
@@ -563,7 +562,7 @@
)
}
else
- // 2.2 Sparse.
+ // 2.2 Sparse.
for (syn <- allSyns; comb <- allCombs)
NCSynonymsManager.onSparseMatch(
ns.srvReqId,
@@ -573,7 +572,7 @@
req,
variantsToks,
res => {
- val toks = getSparsedTokens(toTokens(res, ns), toTokens(comb, ns))
+ val toks = getSparsedTokens(toNlpTokens(res, ns), toNlpTokens(comb, ns))
val parts = toParts(mdl, ns.srvReqId, res, syn)
val typ = if (syn.sparse) "IDL sparse"else "IDL continuous"
@@ -607,6 +606,9 @@
* @param ns
*/
private def normalize(ns: Sentence): Unit = {
+ // Find and removes user notes if sentence contains notes with similar structure but less count of swallowed stop-words.
+ // These stop-words can be used for detection another user tokens and if they are free words it is harmless too.
+ // Ignored notes with links and with references on them.
val usrNotes = ns.flatten.filter(_.isUser).distinct
val links = NCSentenceManager.getLinks(usrNotes)
val parts = NCSentenceManager.getPartKeys(usrNotes)
@@ -638,28 +640,34 @@
// TODO: simplify, add tests, check model properties (sparse etc) for optimization.
/**
*
- * @param elmId
- * @param toks
- * @param sliceToksIdxsSorted
+ * @param elmId Element ID.
+ * @param toks Tokens.
+ * @param idxs Indexes, note that it can be not exactly tokens indexes (sparse case)
*/
- private def alreadyMarked(ns: Sentence, elmId: String, toks: Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
+ private def alreadyMarked(ns: Sentence, elmId: String, toks: Seq[NlpToken], idxs: Seq[Int]): Boolean = {
lazy val toksIdxsSorted = toks.map(_.index).sorted
- sliceToksIdxsSorted.map(ns).forall(_.exists(n => n.noteType == elmId && n.sparsity == 0)) ||
- toks.exists(_.exists(n =>
- n.noteType == elmId &&
- (
- (n.sparsity == 0 &&
- (sliceToksIdxsSorted.containsSlice(n.tokenIndexes) || n.tokenIndexes.containsSlice(toksIdxsSorted))
- )
- ||
- (
- n.tokenIndexes == toksIdxsSorted ||
- n.tokenIndexes.containsSlice(toksIdxsSorted) &&
- U.isContinuous(toksIdxsSorted) &&
- U.isContinuous(n.tokenIndexes)
- )
- )
- ))
+ // All tokens with given indexes found with zero sparsity.
+ val ok1 = idxs.map(ns).forall(_.exists(n => n.noteType == elmId && n.sparsity == 0))
+
+ lazy val ok2 =
+ toks.exists(_.exists(n =>
+ if (n.noteType == elmId) {
+ val noteOk1 = n.sparsity == 0 &&
+ (idxs.containsSlice(n.tokenIndexes) || n.tokenIndexes.containsSlice(toksIdxsSorted))
+
+ lazy val noteOk2 =
+ n.tokenIndexes == toksIdxsSorted ||
+ n.tokenIndexes.containsSlice(toksIdxsSorted) &&
+ U.isContinuous(toksIdxsSorted) &&
+ U.isContinuous(n.tokenIndexes)
+
+ noteOk1 || noteOk2
+ }
+ else
+ false
+ ))
+
+ ok1 || ok2
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 00d6bdf..50137a2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -369,7 +369,7 @@
val t = NCNlpSentenceToken(idx)
// Note, it adds stop-words too.
- val content = nsCopyToks.zipWithIndex.filter(p => indexes.contains(p._2)).map(_._1)
+ val content = nsCopyToks.zipWithIndex.filter { case (_, idx) => indexes.contains(idx) }.map { case (tok, _) => tok}
content.foreach(t => history += t.index -> idx)
@@ -378,15 +378,12 @@
val n = content.size - 1
- content.zipWithIndex.foreach(p => {
- val t = p._1
- val idx = p._2
-
+ content.zipWithIndex.foreach { case (t, idx) =>
buf += get(t)
if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
buf += " "
- })
+ }
buf.mkString
}
@@ -459,8 +456,7 @@
for (tok <- ns.filter(_.isTypeOf(noteType)) if ok)
tok.getNoteOpt(noteType, idxsField) match {
case Some(n) =>
- val idxs: Seq[Seq[Int]] =
- n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala.toSeq).toSeq
+ val idxs: Seq[Seq[Int]] = n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala.toSeq).toSeq
var fixed = idxs
history.foreach {
@@ -539,8 +535,7 @@
// Validation (all indexes calculated well)
require(
!res ||
- !ns.flatten.
- exists(n => ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t => !t.contains(n))),
+ !ns.flatten.exists(n => ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t => !t.contains(n))),
s"Invalid sentence:\n" +
ns.map(t =>
// Human readable invalid sentence for debugging.
@@ -604,6 +599,83 @@
}).toMap)
/**
+ *
+ * @param sen
+ * @param mdl
+ * @param lastPhase
+ * @param overlappedNotes
+ */
+ private def mkVariants(
+ sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean, overlappedNotes: Seq[NCNlpSentenceNote]
+ ): Seq[NCNlpSentence] = {
+ def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
+ if (lastPhase)
+ dropAbstract(mdl, ns)
+
+ if (collapseSentence(ns, getNotNlpNotes(ns.tokens).map(_.noteType).distinct, lastPhase)) Some(ns) else None
+ }
+
+ if (overlappedNotes.nonEmpty) {
+ val overlappedVars: Seq[Set[NCNlpSentenceNote]] =
+ overlappedNotes.flatMap(note => note.wordIndexes.map(_ -> note)).
+ groupBy { case (idx, _) => idx }.
+ map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
+ toSeq.
+ sortBy(-_.size)
+
+ val delCombs: Seq[Seq[NCNlpSentenceNote]] =
+ combCache.
+ getOrElseUpdate(
+ sen.srvReqId,
+ mutable.HashMap.empty[CacheKey, CacheValue]
+ ).
+ getOrElseUpdate(
+ overlappedVars,
+ NCSentenceHelper.findCombinations(
+ overlappedVars.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq
+ )
+ )
+
+ val seqSens =
+ delCombs.
+ par.
+ flatMap(delComb => {
+ val nsClone = sen.clone()
+
+ // Saves deleted notes for sentence and their tokens.
+ addDeleted(sen, nsClone, delComb)
+ delComb.foreach(nsClone.removeNote)
+
+ // Has overlapped notes for some tokens.
+ require(!nsClone.exists(_.count(!_.isNlp) > 1))
+
+ collapse0(nsClone)
+ }).seq
+
+ // It removes sentences which have only one difference - 'direct' flag of their user tokens.
+ // `Direct` sentences have higher priority.
+ type Key = Seq[Map[String, JSerializable]]
+ case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
+
+ def mkHolder(sen: NCNlpSentence): Holder = {
+ val notes = sen.flatten
+
+ Holder(
+ // We have to delete some keys to have possibility to compare sentences.
+ notes.map(_.clone().toMap.filter { case (name, _) => name != "direct" }).toSeq,
+ sen,
+ notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
+ )
+ }
+
+ seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) => seq.minBy(_.factor).sentence }.toSeq
+ }
+ else
+ collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)
+
+ }
+
+ /**
* This collapser handles several tasks:
* - "overall" collapsing after all other individual collapsers had their turn.
* - Special further enrichment of tokens like linking, etc.
@@ -612,14 +684,7 @@
* lengths - the winning note is chosen based on this priority.
*/
@throws[NCE]
- private def collapseSentence(sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
- def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
- if (lastPhase)
- dropAbstract(mdl, ns)
-
- if (collapseSentence(ns, getNotNlpNotes(ns.tokens).map(_.noteType).distinct, lastPhase)) Some(ns) else None
- }
-
+ def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
// Always deletes `similar` notes.
// Some words with same note type can be detected various ways.
// We keep only one variant - with `best` direct and sparsity parameters,
@@ -649,7 +714,7 @@
redundant.foreach(sen.removeNote)
- var delCombs: Seq[NCNlpSentenceNote] =
+ var overlappedNotes: Seq[NCNlpSentenceNote] =
getNotNlpNotes(sen.tokens).
flatMap(note => getNotNlpNotes(note.tokenIndexes.map(sen(_))).filter(_ != note)).
distinct
@@ -658,7 +723,7 @@
val links = getLinks(sen.tokens.toSeq.flatten)
val swallowed =
- delCombs.
+ overlappedNotes.
// There aren't links on it.
filter(n => !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
// It doesn't have links.
@@ -668,7 +733,7 @@
val key = NCTokenPartKey(note, sen)
val delCombOthers =
- delCombs.filter(_ != note).flatMap(n => if (getPartKeys(n).contains(key)) Some(n) else None)
+ overlappedNotes.filter(_ != note).flatMap(n => if (getPartKeys(n).contains(key)) Some(n) else None)
if (
delCombOthers.nonEmpty &&
@@ -679,61 +744,11 @@
None
})
- delCombs = delCombs.filter(p => !swallowed.contains(p))
+ overlappedNotes = overlappedNotes.filter(p => !swallowed.contains(p))
addDeleted(sen, sen, swallowed)
swallowed.foreach(sen.removeNote)
- var sens =
- if (delCombs.nonEmpty) {
- val toksByIdx =
- delCombs.flatMap(note => note.wordIndexes.map(_ -> note)).
- groupBy { case (idx, _) => idx }.
- map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
- toSeq.sortBy(-_.size)
-
- def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
- NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq)
-
- val seqSens =
- combCache.
- getOrElseUpdate(sen.srvReqId, mutable.HashMap.empty[CacheKey, CacheValue]).
- getOrElseUpdate(
- toksByIdx,
- findCombinations()
- ).par.
- flatMap(delComb => {
- val nsClone = sen.clone()
-
- // Saves deleted notes for sentence and their tokens.
- addDeleted(sen, nsClone, delComb)
- delComb.foreach(nsClone.removeNote)
-
- // Has overlapped notes for some tokens.
- require(!nsClone.exists(_.count(!_.isNlp) > 1))
-
- collapse0(nsClone)
- }).seq
-
- // It removes sentences which have only one difference - 'direct' flag of their user tokens.
- // `Direct` sentences have higher priority.
- type Key = Seq[Map[String, JSerializable]]
- case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
-
- def mkHolder(sen: NCNlpSentence): Holder = {
- val notes = sen.flatten
-
- Holder(
- // We have to delete some keys to have possibility to compare sentences.
- notes.map(_.clone().toMap.filter { case (name, _) => name != "direct" }).toSeq,
- sen,
- notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
- )
- }
-
- seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) => seq.minBy(_.factor).sentence }.toSeq
- }
- else
- collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)
+ var sens = mkVariants( sen, mdl, lastPhase, overlappedNotes)
sens.par.foreach(sen =>
sen.foreach(tok =>
@@ -745,18 +760,18 @@
)
)
- def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp)
+ // There are optimizations below. Similar variants by some criteria deleted.
- // Drops similar sentences (with same notes structure). Keeps with more found.
+ // Drops similar sentences with same notes structure based on greedy elements. Keeps with more notes found.
val notGreedyElems =
mdl.getElements.asScala.flatMap(e => if (!e.isGreedy.orElse(mdl.isGreedy)) Some(e.getId) else None).toSet
- sens = sens.groupBy(notNlpNotes(_).groupBy(_.noteType).keys.toSeq.sorted.distinct).
+ sens = sens.groupBy(p => getNotNlpNotes(p.tokens).groupBy(_.noteType).keys.toSeq.sorted.distinct).
flatMap { case (types, sensSeq) =>
if (types.exists(notGreedyElems.contains))
sensSeq
else {
- val m: Map[NCNlpSentence, Int] = sensSeq.map(p => p -> notNlpNotes(p).size).toMap
+ val m: Map[NCNlpSentence, Int] = sensSeq.map(p => p -> getNotNlpNotes(p.tokens).size).toMap
val max = m.values.max
@@ -768,6 +783,7 @@
var sensWithNotesIdxs = sensWithNotes.zipWithIndex
+ // Drops similar sentences if there are other sentences with superset of notes.
sens =
sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
!sensWithNotesIdxs.
@@ -775,13 +791,12 @@
exists { case((_, notNlpNotes2), _) => notNlpNotes1.subsetOf(notNlpNotes2) }
}.map { case ((sen, _), _) => sen }
- // Drops similar sentences (with same tokens structure).
- // Among similar sentences we prefer one with minimal free words count.
- sens = sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
+ // Drops similar sentences. Among similar sentences we prefer one with minimal free words count.
+ sens = sens.groupBy(p => getNotNlpNotes(p.tokens).map(_.getKey(withIndexes = false))).
map { case (_, seq) => seq.minBy(_.filter(p => p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
toSeq
- // Drops sentences if they are just subset of another.
+ // Drops sentences if they are just subset of another (indexes ignored here)
sensWithNotes = sensWithNotes.filter { case (sen, _) => sens.contains(sen) }
sensWithNotesIdxs = sensWithNotes.zipWithIndex
@@ -816,15 +831,6 @@
/**
*
- * @param mdl
- * @param sen
- * @param lastPhase
- */
- def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean = false): Seq[NCNlpSentence] =
- collapseSentence(sen, mdl, lastPhase)
-
- /**
- *
* @param srvReqId
*/
def clearRequestData(srvReqId: String): Unit = combCache -= srvReqId
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index e2d59f6..fa31f26 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -38,7 +38,7 @@
private lazy val cache =
mutable.HashMap.empty[String, mutable.HashMap[Int, mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]]
- def isUnprocessed(elemId: String, s: Synonym, tokens: Seq[T]): Boolean =
+ def isUnprocessed(elemId: String, syn: Synonym, tokens: Seq[T]): Boolean =
cache.
getOrElseUpdate(
elemId,
@@ -51,7 +51,7 @@
getOrElseUpdate(
tokens,
mutable.HashSet.empty[Synonym]
- ).add(s)
+ ).add(syn)
}
private case class SavedIdlKey(id: String, startCharIndex: Int, endCharIndex: Int, other: Map[String, AnyRef] = Map.empty)
@@ -72,24 +72,22 @@
)
}
- private case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction) {
- override def toString: String = variants.toString()
- }
+ private case class SavedIdlValue(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction)
- private val savedIdl = mutable.HashMap.empty[String, mutable.HashMap[SavedIdlKey, mutable.ArrayBuffer[Value]]]
- private val idlChunksCache = mutable.HashMap.empty[String, mutable.HashMap[(IdlToken, NCProbeSynonymChunk), Boolean]]
+ private case class IdlChunkKey(token: IdlToken, chunk: NCProbeSynonymChunk)
+
+ private val savedIdl = mutable.HashMap.empty[String, mutable.HashMap[SavedIdlKey, mutable.ArrayBuffer[SavedIdlValue]]]
+ private val idlChunksCache = mutable.HashMap.empty[String, mutable.HashMap[IdlChunkKey, Boolean]]
private val idlCaches = mutable.HashMap.empty[String, CacheHolder[IdlToken]]
private val tokCaches = mutable.HashMap.empty[String, CacheHolder[Int]]
override def start(parent: Span): NCService = {
ackStarting()
-
ackStarted()
}
override def stop(parent: Span): Unit = {
ackStopping()
-
ackStopped()
}
@@ -120,7 +118,7 @@
/**
*
- * @param s
+ * @param syn
* @param toks
* @param isMatch
* @param getIndex
@@ -128,19 +126,23 @@
* @tparam T
*/
private def sparseMatch0[T](
- s: Synonym,
+ syn: Synonym,
toks: Seq[T],
isMatch: (T, NCProbeSynonymChunk) => Boolean,
getIndex: T => Int,
shouldBeNeighbors: Boolean
): Option[Seq[T]] =
- if (toks.size >= s.size) {
+ if (toks.size >= syn.size) {
lazy val res = mutable.ArrayBuffer.empty[T]
lazy val all = mutable.HashSet.empty[T]
+ // There are 3 states:
+ // 0 - initial working state, first step.
+ // 1 - working state, not first step.
+ // -1 - stop state.
var state = 0
- for (chunk <- s if state != -1) {
+ for (chunk <- syn if state != -1) {
val seq =
if (state == 0) {
state = 1
@@ -153,12 +155,12 @@
if (seq.nonEmpty) {
val head = seq.head
- if (!s.permute && res.nonEmpty && getIndex(head) <= getIndex(res.last))
+ if (!syn.permute && res.nonEmpty && getIndex(head) <= getIndex(res.last))
state = -1
else {
all ++= seq
- if (all.size > s.size)
+ if (all.size > syn.size)
state = -1
else
res += head
@@ -168,7 +170,12 @@
state = -1
}
- if (state != -1 && all.size == res.size && (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).toSeq.sorted)))
+ if (
+ state != -1 && // State ok.
+ all.size == res.size && // There aren't excess processed tokens.
+ // `neighbors` conditions, important for simple not sparse synonyms.
+ (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).toSeq.sorted))
+ )
Some(res.toSeq)
else
None
@@ -186,69 +193,75 @@
private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit = {
savedIdl.
getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty).
- getOrElseUpdate(SavedIdlKey(tok), mutable.ArrayBuffer.empty) +=
- Value(req, variantsToks, pred)
+ getOrElseUpdate(SavedIdlKey(tok), mutable.ArrayBuffer.empty) +=
+ SavedIdlValue(req, variantsToks, pred)
}
/**
+ * Checks that given synonym is not checked yet with given NLP tokens' indexes.
*
* @param srvReqId
* @param elemId
- * @param s
+ * @param syn
* @param tokens
*/
- private def isUnprocessedTokens(srvReqId: String, elemId: String, s: Synonym, tokens: Seq[Int]): Boolean =
- tokCaches.getOrElseUpdate(srvReqId, new CacheHolder[Int]).isUnprocessed(elemId, s, tokens)
+ private def isUnprocessedTokens(srvReqId: String, elemId: String, syn: Synonym, tokens: Seq[Int]): Boolean =
+ tokCaches.getOrElseUpdate(srvReqId, new CacheHolder[Int]).isUnprocessed(elemId, syn, tokens)
/**
+ * Checks that given synonym is not checked yet with given IDL tokens.
*
* @param srvReqId
* @param elemId
- * @param s
+ * @param syn
* @param tokens
*/
- private def isUnprocessedIdl(srvReqId: String, elemId: String, s: Synonym, tokens: Seq[IdlToken]): Boolean =
- idlCaches.getOrElseUpdate(srvReqId, new CacheHolder[IdlToken]).isUnprocessed(elemId, s, tokens)
+ private def isUnprocessedIdl(srvReqId: String, elemId: String, syn: Synonym, tokens: Seq[IdlToken]): Boolean =
+ idlCaches.getOrElseUpdate(srvReqId, new CacheHolder[IdlToken]).isUnprocessed(elemId, syn, tokens)
/**
+ * Checks matching IDL token with synonym's chunk.
*
- * @param tow
- * @param chunk
- * @param req
- * @param variantsToks
+ * @param t IDL token.
+ * @param chunk Synonym's chunk.
+ * @param req Request.
+ * @param variantsToks All possible request's variants.
*/
private def isMatch(
- tow: IdlToken, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
+ t: IdlToken, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
): Boolean =
idlChunksCache.
- getOrElseUpdate(req.getServerRequestId,
- mutable.HashMap.empty[(IdlToken, NCProbeSynonymChunk), Boolean]
+ getOrElseUpdate(
+ req.getServerRequestId,
+ mutable.HashMap.empty[IdlChunkKey, Boolean]
).
getOrElseUpdate(
- (tow, chunk),
+ IdlChunkKey(t, chunk),
{
- def get0[T](fromToken: NCToken => T, fromWord: NlpToken => T): T =
- if (tow.isToken) fromToken(tow.token) else fromWord(tow.word)
-
chunk.kind match {
- case TEXT => chunk.wordStem == get0(_.stem, _.stem)
+ case TEXT => chunk.wordStem == t.stem
case REGEX =>
- chunk.regex.matcher(get0(_.origText, _.origText)).matches() ||
- chunk.regex.matcher(get0(_.normText, _.normText)).matches()
+ chunk.regex.matcher(t.origText).matches() || chunk.regex.matcher(t.normText).matches()
case IDL =>
- val ok =
+ val ok = {
+ // IDL condition just for tokens.
+ t.isToken &&
+ // Should be found at least one suitable variant (valid NCIdlContext) for given token.
+ // This variant will be checked again on last processing phase.
variantsToks.par.exists(vrntToks =>
- get0(t =>
- chunk.idlPred.apply(t, NCIdlContext(toks = vrntToks, req = req)).
- value.asInstanceOf[Boolean],
- _ => false
+ chunk.idlPred.apply(
+ t.token,
+ NCIdlContext(toks = vrntToks, req = req)).value.asInstanceOf[Boolean]
)
- )
+ }
+ // Saves all variants for next validation.
+ // All suitable variants can be deleted, so this positive result can be abolished
+ // on last processing phase.
if (ok)
- save(req, tow.token, chunk.idlPred, variantsToks)
+ save(req, t.token, chunk.idlPred, variantsToks)
ok
@@ -270,22 +283,29 @@
require(toks != null)
require(!syn.sparse && !syn.hasIdl)
- if (
- toks.length == syn.length && {
+ if (toks.length == syn.length) { // Same length.
+ val ok =
if (syn.isTextOnly)
- toks.zip(syn).forall(p => p._1.stem == p._2.wordStem)
+ toks.zip(syn).
+ // Checks all synonym chunks with all tokens.
+ forall { case (tok, chunk) => tok.stem == chunk.wordStem }
else
- toks.zip(syn).sortBy(p => getSort(p._2.kind)).forall { case (tok, chunk) => isMatch(tok, chunk) }
- }
- )
- callback()
+ toks.zip(syn).
+ // Pre-sort by chunk kind for performance reasons, easier to compare should be first.
+ sortBy { case (_, chunk) => getSort(chunk.kind) }.
+ // Checks all synonym chunks with all tokens.
+ forall { case (tok, chunk) => isMatch(tok, chunk) }
+
+ if (ok)
+ callback(())
+ }
}
/**
*
* @param srvReqId
* @param elemId
- * @param s
+ * @param syn
* @param toks
* @param req
* @param variantsToks
@@ -294,24 +314,26 @@
def onMatch(
srvReqId: String,
elemId: String,
- s: Synonym,
+ syn: Synonym,
toks: Seq[IdlToken],
req: NCRequest,
variantsToks: Seq[Seq[NCToken]],
callback: Unit => Unit
): Unit =
- if (isUnprocessedIdl(srvReqId, elemId, s, toks)) {
+ if (isUnprocessedIdl(srvReqId, elemId, syn, toks)) {
require(toks != null)
if (
- toks.length == s.length &&
- toks.count(_.isToken) >= s.idlChunks && {
- toks.zip(s).sortBy(p => getSort(p._2.kind)).forall {
- case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
- }
+ toks.length == syn.length && // Same length.
+ toks.count(_.isToken) >= syn.idlChunks && // Enough tokens.
+ toks.zip(syn).sortBy { // Pre-sort by chunk kind for performance reasons, easier to compare should be first.
+ case (_, chunk) => getSort(chunk.kind)
+ }.
+ forall { // Checks all synonym chunks with all tokens.
+ case (idlTok, chunk) => isMatch(idlTok, chunk, req, variantsToks)
}
)
- callback()
+ callback(())
}
/**
@@ -363,7 +385,7 @@
syn,
toks,
(t: IdlToken, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req, variantsToks),
- (t: IdlToken) => if (t.isToken) t.token.getStartCharIndex else t.word.startCharIndex,
+ (t: IdlToken) => t.startCharIndex,
shouldBeNeighbors = !syn.sparse
) match {
case Some(res) => callback(res)
@@ -372,13 +394,15 @@
}
/**
+ * Checks that suitable variant wasn't deleted and IDL condition for token is still valid.
+ * We have to check it because NCIdlContext which used in predicate based on variant.
*
* @param srvReqId
- * @param senToks
+ * @param toks
*/
- def isStillValidIdl(srvReqId: String, senToks: Seq[NCToken]): Boolean =
+ def isStillValidIdl(srvReqId: String, toks: Seq[NCToken]): Boolean =
savedIdl.get(srvReqId) match {
- case Some(m) =>
+ case Some(map) =>
lazy val allCheckedSenToks = {
val set = mutable.HashSet.empty[SavedIdlKey]
@@ -388,13 +412,13 @@
t.getPartTokens.asScala.foreach(add)
}
- senToks.foreach(add)
+ toks.foreach(add)
set
}
- senToks.forall(tok =>
- m.get(SavedIdlKey(tok)) match {
+ toks.forall(tok =>
+ map.get(SavedIdlKey(tok)) match {
case Some(vals) =>
vals.exists(
v =>
@@ -415,6 +439,7 @@
}
/**
+ * Called when request processing finished.
*
* @param srvReqId
*/
@@ -425,6 +450,7 @@
}
/**
+ * Called on each request enrichment iteration.
*
* @param srvReqId
*/
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index e8265e1..a892561 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -762,7 +762,7 @@
errCode
)
- logger.info(s"${rb(w(" REJECT "))} result processed [srvReqId=${m(srvReqId)}, error=$err, code=$errCode]")
+ logger.info(s"${rb(w("<REJECT>"))} result processed [srvReqId=${m(srvReqId)}, error=$err, code=$errCode]")
}
else { // OK result.
require(resTypeOpt.isDefined && resBodyOpt.isDefined)
@@ -776,7 +776,7 @@
intentId
)
- logger.info(s"${gb(w(" SUCCESS "))} result processed [srvReqId=${m(srvReqId)}]")
+ logger.info(s"${gb(w("<SUCCESS>"))} result processed [srvReqId=${m(srvReqId)}]")
}
}
catch {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
index 877cf60..dd92ef8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
@@ -262,7 +262,8 @@
startScopedSpan("future", parent, "srvReqId" -> srvReqId) { span =>
val tbl = NCAsciiTable()
- tbl += (s"${b("Text")}", rv(" " + txt0 + " "))
+ tbl += (s"${b("Text")}", bo(txt0))
+ tbl += ("", bo("-") * txt0.length)
tbl += (s"${b("User ID")}", usr.id)
tbl += (s"${b("Model ID")}", mdlId)
tbl += (s"${b("Agent")}", usrAgent.getOrElse("<n/a>"))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index d89ba98..91b195d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -42,8 +42,8 @@
import scala.util.{Failure, Success}
/**
- * Synonym suggestion manager.
- */
+ * Synonym suggestion manager.
+ */
object NCSuggestSynonymManager extends NCService {
// For context word server requests.
private final val MAX_LIMIT: Int = 10000
@@ -82,40 +82,47 @@
case _ =>
throw new NCE(
- s"Unexpected HTTP response from `ctxword` server [" +
- s"code=$code, " +
- s"response=$js" +
- s"]"
- )
+ s"Unexpected HTTP response from `ctxword` server [" +
+ s"code=$code, " +
+ s"response=$js" +
+ s"]"
+ )
}
}
case class Suggestion(word: String, score: Double)
case class RequestData(sentence: String, ex: String, elmId: String, index: Int)
- case class RestRequestSentence(text: String, indexes: util.List[Int])
+ case class RestRequestSentence(text: String, indexes: util.List[Int]) {
+ validate(text, indexes.asScala)
+
+ private def validate(text: String, indexes: Seq[Int]): Unit = {
+ val arr = splitAndNormalize(text)
+
+ require(
+ indexes.forall(i => i >= 0 && i < arr.length),
+ s"Invalid request [text=$text, indexes=${indexes.mkString(",")}"
+ )
+ }
+ }
case class RestRequest(sentences: util.List[RestRequestSentence], limit: Int, minScore: Double)
case class Word(word: String, stem: String) {
require(!word.contains(" "), s"Word cannot contains spaces: $word")
- require(
- word.forall(ch =>
- ch.isLetterOrDigit ||
- ch == '\'' ||
- SEPARATORS.contains(ch)
- ),
- s"Unsupported symbols: $word"
- )
+ require(isSuitable4Suggestion(word), s"Unsupported symbols: $word")
}
case class SuggestionResult(synonym: String, score: Double)
private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ")
private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
+ private def splitAndNormalize(s: String) = s.split(" ").map(_.strip).filter(_.nonEmpty)
+ private def isSuitable4Suggestion(word: String): Boolean =
+ word.forall(ch => ch.isLetterOrDigit || ch == '\'' || SEPARATORS.contains(ch))
/**
- *
- * @param seq1
- * @param seq2
- */
+ *
+ * @param seq1
+ * @param seq2
+ */
private def getAllSlices(seq1: Seq[String], seq2: Seq[String]): Seq[Int] = {
val seq = mutable.Buffer.empty[Int]
@@ -131,12 +138,12 @@
}
/**
- *
- * @param mdlId
- * @param minScoreOpt
- * @param parent
- * @return
- */
+ *
+ * @param mdlId
+ * @param minScoreOpt
+ * @param parent
+ * @return
+ */
def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] =
startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ =>
val now = U.now()
@@ -148,8 +155,8 @@
try {
require(
m.containsKey("macros") &&
- m.containsKey("synonyms") &&
- m.containsKey("samples")
+ m.containsKey("synonyms") &&
+ m.containsKey("samples")
)
val mdlMacros = m.get("macros").
@@ -187,7 +194,7 @@
if (allSamplesCnt < MIN_CNT_MODEL)
warns +=
s"Model has too few ($allSamplesCnt) intents samples. " +
- s"Try to increase overall sample count to at least $MIN_CNT_MODEL."
+ s"Try to increase overall sample count to at least $MIN_CNT_MODEL."
else {
val ids =
@@ -198,7 +205,7 @@
if (ids.nonEmpty)
warns +=
s"Following model intent have too few samples (${ids.mkString(", ")}). " +
- s"Try to increase overall sample count to at least $MIN_CNT_INTENT."
+ s"Try to increase overall sample count to at least $MIN_CNT_INTENT."
}
val parser = new NCMacroParser()
@@ -212,15 +219,18 @@
flatMap { case (_, samples) => samples }.
map(ex => SEPARATORS.foldLeft(ex)((s, ch) => s.replaceAll(s"\\$ch", s" $ch "))).
map(ex => {
- val seq = ex.split(" ")
+ val seq = splitAndNormalize(ex)
seq -> seq.map(toStemWord)
}).
toMap
val elmSyns =
- mdlSyns.map { case (elmId, syns) => elmId -> syns.flatMap(parser.expand) }.
- map { case (id, seq) => id -> seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) }
+ mdlSyns.
+ map { case (elmId, syns) => elmId -> syns.flatMap(parser.expand) }.
+ map { case (elmId, syns) => elmId -> syns.filter(isSuitable4Suggestion) }.
+ filter { case (_, syns) => syns.nonEmpty }.
+ map { case (elmId, seq) => elmId -> seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) }
val allReqs =
elmSyns.map {
@@ -276,7 +286,7 @@
s"exs=${exs.size}, " +
s"syns=$allSynsCnt, " +
s"reqs=$allReqsCnt" +
- s"]")
+ s"]")
if (allReqsCnt == 0)
onError(s"Suggestions cannot be generated for model: '$mdlId'")
@@ -441,19 +451,19 @@
}
/**
- *
- * @param parent Optional parent span.
- * @return
- */
+ *
+ * @param parent Optional parent span.
+ * @return
+ */
override def start(parent: Span): NCService = startScopedSpan("start", parent) { _ =>
ackStarting()
ackStarted()
}
/**
- *
- * @param parent Optional parent span.
- */
+ *
+ * @param parent Optional parent span.
+ */
override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { _ =>
ackStopping()
ackStopped()
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
index 2bbc9cb..387e8b0 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
@@ -17,7 +17,7 @@
package org.apache.nlpcraft.server.rest
-import org.apache.nlpcraft.model.NCElement
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentSample, NCResult}
import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.Test
@@ -25,10 +25,22 @@
import java.util
import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, SetHasAsJava, SetHasAsScala}
+class RestTestModelExt1 extends RestTestModel {
+ @NCIntent("intent=onX term(t)={# == 'a'}")
+ @NCIntentSample(Array(
+ "oh, cat will feel happy",
+ "oh , cat will feel happy",
+ "oh cat will feel happy"
+ ))
+ private def x(): NCResult = NCResult.text("OK")
+
+ override def getElements: util.Set[NCElement] =
+ (super.getElements.asScala ++ Set(NCTestElement("cat", "cat", "{^^{is_alphanum(tok_txt)}^^}[1,3]"))).asJava
+}
/**
* Note that context word server should be started.
*/
-@NCTestEnvironment(model = classOf[RestTestModel], startClient = false)
+@NCTestEnvironment(model = classOf[RestTestModelExt1], startClient = false)
class NCRestModelSpec1 extends NCRestSpec {
@Test
def testSugsyn(): Unit = {
@@ -57,13 +69,24 @@
})
)
+ post("model/sugsyn", "mdlId" -> "rest.test.model", "minScore" -> 0.5)(
+ ("$.status", (status: String) => assertEquals("API_OK", status)),
+ ("$.result.suggestions[:1].cat.*", (data: JList[java.util.Map[String, Object]]) => {
+ val scores = extract(data)
+
+ assertTrue(scores.nonEmpty)
+ assertTrue(scores.forall(s => s >= 0.5 && s <= 1))
+ })
+ )
+
+
postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> "UNKNOWN")
postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> "rest.test.model", "minScore" -> 2)
postError("model/sugsyn", 400, "NC_ERROR")
}
}
-class RestTestModelExt extends RestTestModel {
+class RestTestModelExt2 extends RestTestModel {
override def getMacros: util.Map[String, String] = {
Map(
"<M1>" -> "mtest1 {x|_}",
@@ -90,7 +113,7 @@
/**
*
*/
-@NCTestEnvironment(model = classOf[RestTestModelExt], startClient = false)
+@NCTestEnvironment(model = classOf[RestTestModelExt2], startClient = false)
class NCRestModelSpec2 extends NCRestSpec {
@Test
def testSyns(): Unit = {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
index 0cb519e..8fa5b15 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
@@ -55,15 +55,15 @@
@NCIntent("intent=onA term(t)={# == 'a'}")
@NCIntentSample(Array("My A"))
- private def a(): NCResult = NCResult.text("OK")
+ def a(): NCResult = NCResult.text("OK")
@NCIntent("intent=onB term(t)={# == 'b'}")
@NCIntentSample(Array("My B"))
- private def b(): NCResult = NCResult.text("OK")
+ def b(): NCResult = NCResult.text("OK")
@NCIntent("intent=onMeta term(t)={# == 'meta'}")
@NCIntentSample(Array("meta"))
- private def meta(): NCResult = {
+ def meta(): NCResult = {
val res = NCResult.text("OK")
res.getMetadata.put(K1, V1)
diff --git a/pom.xml b/pom.xml
index bf178ba..c7ba319 100644
--- a/pom.xml
+++ b/pom.xml
@@ -82,8 +82,8 @@
<timestamp>${maven.build.timestamp}</timestamp>
<!-- Versions. -->
- <ignite.ver>2.10.0</ignite.ver>
- <gridgain.agent.ver>2.10.0.0</gridgain.agent.ver>
+ <ignite.ver>2.11.0</ignite.ver>
+ <gridgain.agent.ver>2.11.0.0</gridgain.agent.ver>
<scala.plugin.ver>4.3.1</scala.plugin.ver>
<log4j.ver>2.12.0</log4j.ver>
<scala.ver>${scala.base}.6</scala.ver>
@@ -139,7 +139,7 @@
<vertical.blank.ver>1.0.1</vertical.blank.ver>
<fliptables.ver>1.1.0</fliptables.ver>
<jline.ver>3.20.0</jline.ver>
- <jansi.ver>2.3.3</jansi.ver>
+ <jansi.ver>2.3.4</jansi.ver>
<!--
Following libraries versions are compatible: