Merge branch 'master' into NLPCRAFT-383
# Conflicts:
# nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
diff --git a/nlpcraft-examples/alarm/pom.xml b/nlpcraft-examples/alarm/pom.xml
index 9856392..9420959 100644
--- a/nlpcraft-examples/alarm/pom.xml
+++ b/nlpcraft-examples/alarm/pom.xml
@@ -25,8 +25,8 @@
<artifactId>nlpcraft-example-alarm</artifactId>
<parent>
- <artifactId>nlpcraft-parent</artifactId>
<groupId>org.apache.nlpcraft</groupId>
+ <artifactId>nlpcraft-parent</artifactId>
<version>0.9.0</version>
<relativePath>../../pom.xml</relativePath>
</parent>
@@ -70,6 +70,7 @@
<target>${java.ver}</target>
</configuration>
</plugin>
+
<plugin>
<groupId>com.bazaarvoice.maven.plugins</groupId>
<artifactId>process-exec-maven-plugin</artifactId>
diff --git a/nlpcraft-examples/alarm/src/main/java/org/apache/nlpcraft/examples/alarm/AlarmModel.java b/nlpcraft-examples/alarm/src/main/java/org/apache/nlpcraft/examples/alarm/AlarmModel.java
index 0e15a02..a6034a7 100644
--- a/nlpcraft-examples/alarm/src/main/java/org/apache/nlpcraft/examples/alarm/AlarmModel.java
+++ b/nlpcraft-examples/alarm/src/main/java/org/apache/nlpcraft/examples/alarm/AlarmModel.java
@@ -62,7 +62,7 @@
* @param ctx Intent solver context.
* @return Query result.
*/
- @NCIntentRef("alarm") // Intent is defined in JSON model file (alarm_model.json and intents.idl).
+ @NCIntentRef("alarm") // Intent is defined in JSON model file (alarm_model.json and alarm_intents.idl).
@NCIntentSampleRef("alarm_samples.txt") // Samples supplied in an external file.
NCResult onMatch(
NCIntentMatch ctx,
diff --git a/nlpcraft-examples/alarm/src/main/resources/intents.idl b/nlpcraft-examples/alarm/src/main/resources/alarm_intents.idl
similarity index 84%
rename from nlpcraft-examples/alarm/src/main/resources/intents.idl
rename to nlpcraft-examples/alarm/src/main/resources/alarm_intents.idl
index 3c3934d..087bff9 100644
--- a/nlpcraft-examples/alarm/src/main/resources/intents.idl
+++ b/nlpcraft-examples/alarm/src/main/resources/alarm_intents.idl
@@ -15,15 +15,19 @@
* limitations under the License.
*/
+/*
+ * Read documentation: http://nlpcraft.apache.org/intent-matching.html
+ */
+
// Fragments (mostly for demo purposes here).
fragment=buzz term~{# == 'x:alarm'}
fragment=when
term(nums)~{
// Demonstrating term variables.
@type = meta_tok('nlpcraft:num:unittype')
- @iseq = meta_tok('nlpcraft:num:isequalcondition') // Excludes conditional statements.
+ @isEq = meta_tok('nlpcraft:num:isequalcondition') // Excludes conditional statements.
- # == 'nlpcraft:num' && @type == 'datetime' && @iseq == true
+ # == 'nlpcraft:num' && @type == 'datetime' && @isEq == true
}[1,7]
// Intents (using fragments).
diff --git a/nlpcraft-examples/alarm/src/main/resources/alarm_model.json b/nlpcraft-examples/alarm/src/main/resources/alarm_model.json
index 9060fde..e55fd15 100644
--- a/nlpcraft-examples/alarm/src/main/resources/alarm_model.json
+++ b/nlpcraft-examples/alarm/src/main/resources/alarm_model.json
@@ -34,6 +34,6 @@
}
],
"intents": [
- "import('intents.idl')" // Import intents from external file.
+ "import('alarm_intents.idl')" // Import intents from external file.
]
}
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/README.md b/nlpcraft-examples/cargps/README.md
new file mode 100644
index 0000000..ce7a565
--- /dev/null
+++ b/nlpcraft-examples/cargps/README.md
@@ -0,0 +1,45 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<img src="https://nlpcraft.apache.org/images/nlpcraft_logo_black.gif" height="80px" alt="">
+<br>
+
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/apache/opennlp/master/LICENSE)
+[![Build](https://github.com/apache/incubator-nlpcraft/workflows/build/badge.svg)](https://github.com/apache/incubator-nlpcraft/actions)
+[![Documentation Status](https://img.shields.io/:docs-latest-green.svg)](https://nlpcraft.apache.org/docs.html)
+[![Gitter](https://badges.gitter.im/apache-nlpcraft/community.svg)](https://gitter.im/apache-nlpcraft/community)
+
+### Car GPS Example
+This example provides a simulation of the in-car GPS-based navigation system with the natural
+language interface.
+
+For any questions, feedback or suggestions:
+
+ * View & run other [examples](https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples)
+ * Read [documentation](https://nlpcraft.apache.org/docs.html), latest [Javadoc](https://nlpcraft.apache.org/apis/latest/index.html) and [REST APIs](https://nlpcraft.apache.org/using-rest.html)
+ * Download & Maven/Grape/Gradle/SBT [instructions](https://nlpcraft.apache.org/download.html)
+ * File a bug or improvement in [JIRA](https://issues.apache.org/jira/projects/NLPCRAFT)
+ * Post a question at [Stack Overflow](https://stackoverflow.com/questions/ask) using <code>nlpcraft</code> tag
+ * Access [GitHub](https://github.com/apache/incubator-nlpcraft) mirror repository.
+ * Join project developers on [dev@nlpcraft.apache.org](mailto:dev-subscribe@nlpcraft.apache.org)
+
+### Copyright
+Copyright (C) 2021 Apache Software Foundation
+
+<img src="https://www.apache.org/img/ASF20thAnniversary.jpg" height="64px" alt="ASF Logo">
+
+
diff --git a/nlpcraft-examples/cargps/pom.xml b/nlpcraft-examples/cargps/pom.xml
new file mode 100644
index 0000000..939d4f3
--- /dev/null
+++ b/nlpcraft-examples/cargps/pom.xml
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <name>NLPCraft Example Car GPS</name>
+ <artifactId>nlpcraft-example-cargps</artifactId>
+
+ <parent>
+ <artifactId>nlpcraft-parent</artifactId>
+ <groupId>org.apache.nlpcraft</groupId>
+ <version>0.9.0</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+
+ <properties>
+ <nlpcraft.server.module>nlpcraft</nlpcraft.server.module>
+ <nlpcraft.all.deps.jar>apache-${nlpcraft.server.module}-incubating-${project.version}-all-deps.jar</nlpcraft.all.deps.jar>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>nlpcraft</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- Test dependencies. -->
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>nlpcraft</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>${maven.compiler.plugin.ver}</version>
+ <configuration>
+ <source>${java.ver}</source>
+ <target>${java.ver}</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>com.bazaarvoice.maven.plugins</groupId>
+ <artifactId>process-exec-maven-plugin</artifactId>
+ <version>${maven.bazaarvoice.plugin.ver}</version>
+ <executions>
+ <execution>
+ <id>pre-integration-test</id>
+ <phase>pre-integration-test</phase>
+ <goals>
+ <goal>start</goal>
+ </goals>
+ <configuration>
+ <!--
+ Depending on the console config and how maven is run this will produce the output with ANSI colors.
+ To strip out ANSI escape sequences from the log file, see the following:
+ https://stackoverflow.com/questions/17998978/removing-colors-from-output
+ -->
+ <name>server</name>
+ <healthcheckUrl>http://localhost:8081/api/v1/health</healthcheckUrl>
+ <waitAfterLaunch>600</waitAfterLaunch>
+ <processLogFile>${project.build.directory}/server-${timestamp}.log</processLogFile>
+ <arguments>
+ <argument>${java.home}/bin/java</argument>
+ <argument>-Xmx4G</argument>
+ <argument>-Xms4G</argument>
+ <argument>--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED</argument>
+ <argument>--add-exports=java.base/sun.nio.ch=ALL-UNNAMED</argument>
+ <argument>--add-exports=java.management/com.sun.jmx.mbeanserver=ALL-UNNAMED</argument>
+ <argument>--add-exports=jdk.internal.jvmstat/sun.jvmstat.monitor=ALL-UNNAMED</argument>
+ <argument>--add-exports=java.base/sun.reflect.generics.reflectiveObjects=ALL-UNNAMED</argument>
+ <argument>--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED</argument>
+ <argument>--illegal-access=permit</argument>
+ <argument>-DNLPCRAFT_ANSI_COLOR_DISABLED=true</argument> <!-- Remove ANSI at least from NLPCraft output. -->
+ <argument>-Djdk.tls.client.protocols=TLSv1.2</argument>
+ <argument>-jar</argument>
+ <argument>${project.basedir}/../../${nlpcraft.server.module}/target/${nlpcraft.all.deps.jar}</argument>
+ <argument>-server</argument>
+ </arguments>
+ </configuration>
+ </execution>
+ <execution>
+ <id>stop-all</id>
+ <phase>post-integration-test</phase>
+ <goals>
+ <goal>stop-all</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${maven.surefire.plugin.ver}</version>
+ <configuration>
+ <!-- Skips all tests on phase `test`. -->
+ <skip>true</skip>
+ </configuration>
+ <executions>
+ <!-- All tests are defined as integration. -->
+ <execution>
+ <id>integration-tests</id>
+ <phase>integration-test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <skip>false</skip>
+ <!-- Mandatory part. -->
+ <includes>
+ <include>**/*.*</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala b/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala
new file mode 100644
index 0000000..9b6cf41
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/java/org/apache/nlpcraft/examples/cargps/CarGpsModel.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.examples.cargps
+
+import org.apache.nlpcraft.model._
+
+/**
+ * See 'README.md' file in the same folder for running and testing instructions.
+ */
+class CarGpsModel extends NCModelFileAdapter("cargps_model.yaml") {
+ /**
+ *
+ * @return
+ */
+ @NCIntentRef("int:navigate")
+ @NCIntentSampleRef("samples/cargps_navigate_samples.txt")
+ def onNavigation(): NCResult = {
+ NCResult.text(s"")
+ }
+
+ /**
+ *
+ * @return
+ */
+ @NCIntentRef("int:cancel")
+ @NCIntentSampleRef("samples/cargps_cancel_samples.txt")
+ def onCancel(): NCResult = {
+ NCResult.text(s"")
+ }
+
+ /**
+ *
+ * @return
+ */
+ @NCIntentRef("int:add:waypoint")
+ @NCIntentSampleRef("samples/cargps_add_waypoint_samples.txt")
+ def onAddWaypoint(): NCResult = {
+ NCResult.text(s"")
+ }
+
+ /**
+ *
+ * @return
+ */
+ @NCIntentRef("int:remove:waypoint")
+ @NCIntentSampleRef("samples/cargps_remove_waypoint_samples.txt")
+ def onRemoveWaypoint(): NCResult = {
+ NCResult.text(s"")
+ }
+}
diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl b/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl
new file mode 100644
index 0000000..e90b7b5
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_intents.idl
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /*
+ * Read documentation: http://nlpcraft.apache.org/intent-matching.html
+ */
+
+// Reusable fragments.
+fragment=hey term={# == "x:hey" && tok_is_first()}
+
+// Intents.
+intent=int:cancel
+ // Ignore any other user or system tokens if we found 'cancel' token.
+ options={'unused_usr_toks': true, 'unused_sys_toks': true}
+ fragment(hey)
+ term={# == "x:cancel"}
+
+intent=int:navigate options={'ordered': true} fragment(hey) term={# == "x:navigate"} term={# == "x:addr"}
+intent=int:add:waypoint fragment(hey) term={# == "x:add-waypoint"}
+intent=int:remove:waypoint fragment(hey) term={# == "x:remove-waypoint"}
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
new file mode 100644
index 0000000..62f45c8
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
@@ -0,0 +1,100 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+id: "nlpcraft.cargps.ex"
+name: "Car GPS Example Model"
+version: "1.0"
+description: "NLI-powered car GPS-based navigation example model."
+enabledBuiltInTokens:
+ - "nlpcraft:num"
+ - "nlpcraft:city"
+ - "nlpcraft:country"
+ - "nlpcraft:region"
+
+macros:
+ - name: "<HEY>"
+ macro: "{hey|hi|howdy}"
+ - name: "<NAVIGATE>"
+ macro: "{navigate|pilot|plot|drive|route|plan|find|head|ride|direct|steer|operate|sail} {out|_} {course|route|destination|drive|_} {to|_}"
+ - name: "<CANCEL>"
+ macro: "{cancel|stop|abort|finish|cease|quit} {off|_}"
+ - name: "<WAYPOINT>"
+ macro: "{waypoint|location|point|stopover|stop over|way station|stop|checkpoint|stop point} {point|station|_}"
+
+excludedStopWords:
+ - howdy
+
+abstractTokens:
+ - x:addr:kind
+ - x:addr:num
+ - x:addr:st
+
+elements:
+ #
+ # Address definition.
+ # -------------------
+ - id: "x:addr:kind"
+ # Short list from https://pe.usps.com/text/pub28/28apc_002.htm
+ synonyms:
+ - "{street|drive|court|plaza|avenue|alley|anex|beach|bend|boulevard|bridge|canyon|causeway|way|circle|corner|creek|fork|harbor|highway|expressway|island|lane|lake|loop|motorway|park|path|point|ramp|route|rue|row|skyway|square|station|summit|trail|tunnel|walk|road}"
+ - "{st|str|dr|crt|plz|ave|blvd|hwy|rd}"
+
+ - id: "x:addr:num"
+ synonyms:
+ - "^^{# == 'nlpcraft:num' && meta_tok('nlpcraft:num:unit') == null && meta_tok('nlpcraft:num:isequalcondition')}^^"
+
+ - id: "x:addr:st"
+ greedy: false
+ synonyms:
+ - "{^^{is_alphanum(tok_txt) && tok_is_between_ids('x:addr:num', 'x:addr:kind') == true}^^}[1,3]"
+
+ - id: "x:addr"
+ synonyms:
+ - "^^[num]{# == 'x:addr:num'}^^ ^^[name]{# == 'x:addr:st'}^^ ^^[kind]{# == 'x:addr:kind'}^^"
+
+ #
+ # Salutation.
+ # -----------
+ - id: "x:hey"
+ description: "NLI prompt"
+ synonyms:
+ - "<HEY> {car|vehicle|truck}"
+
+ - id: "x:cancel"
+ description: "Cancel action."
+ synonyms:
+ - "<CANCEL>"
+
+ - id: "x:navigate"
+ description: "Start 'navigate' action."
+ synonyms:
+ - "<NAVIGATE>"
+
+ - id: "x:add-waypoint"
+ description: "Add 'waypoint' action."
+ synonyms:
+ - "{add|make} <WAYPOINT>"
+ - "stop by"
+
+ - id: "x:remove-waypoint"
+ description: "Remove 'waypoint' action."
+ synonyms:
+ - "{skip|remove} {over|_} {last|latest|current|_} <WAYPOINT>"
+ - "<NAVIGATE> without {stopping|<WAYPOINT>}"
+
+intents:
+ - "import('cargps_intents.idl')"
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/src/main/resources/probe.conf b/nlpcraft-examples/cargps/src/main/resources/probe.conf
new file mode 100644
index 0000000..e15ac80
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/probe.conf
@@ -0,0 +1,148 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# This is joint configuration file for both the server and the data probes. Note that
+# server and probe configuration can be placed into separate files - each file containing only
+# 'nlpcraft.server' or 'nlpcraft.probe' sub-sections.
+#
+# You can also provide configuration properties or override the default ones via environment variables.
+# To use environment variables override:
+# 1. Set probe or server JVM system property -Dconfig.override_with_env_vars=true
+# 2. For each configuration 'x.y.z' set the environment variable CONFIG_FORCE_x_y_z=some_value
+#
+# Examples of environment variables:
+# -- Overrides 'nlpcraft.sever.host' configuration property.
+# CONFIG_FORCE_nlpcraft_server_rest_host="localhost"
+#
+# -- Overrides 'nlpcraft.sever.models' configuration property.
+# CONFIG_FORCE_nlpcraft_server_models="com.models.MyModel"
+#
+# See https://nlpcraft.apache.org/server-and-probe.html for more details.
+#
+
+# Common server/probe configuration root.
+nlpcraft {
+ # Basic NLP toolkit to use on both server and probes. Possible values:
+ # - 'opennlp'
+ # - 'stanford'
+ #
+ # NOTE: Stanford CoreNLP requires special installation due to its licensing.
+ # See https://nlpcraft.apache.org/integrations.html#stanford for more details.
+ nlpEngine = "opennlp"
+
+ # External configuration resources.
+ #
+ # NOTE:
+ # ----
+ # Due to licensing restrictions of the official ASF release policy some of the
+ # configuration for NLPCraft cannot be shipped with the official Apache release.
+ # Instead, NLPCraft will attempt to download these configuration files from the
+ # external URL upon the first start.
+ #
+ # NLPCraft will attempt to download the missing configuration files from URL defined
+ # in 'nlpcraft.extConfig.extUrl' property and place them into 'nlpcraft.extConfig.locDir'
+ # folder on the local file system. On subsequent starts, NLPCraft will check if the required
+ # file is already present locally and skip the download in such case. If 'nlpcraft.extConfig.checkMd5'
+ # property is set to 'true' then on each start NLPCraft will check the checksum of each file
+ # locally and remote and will re-download such file if the MD5 checksums don't match.
+ #
+ # By default, the external configuration is stored in the main Git repository for NLPCraft
+ # project from where it will be downloaded ('/external' folder). See this folder in the Git
+ # repository for more information: https://github.com/apache/incubator-nlpcraft/raw/external_config/external
+ extConfig {
+ # Mandatory.
+ extUrl = "https://github.com/apache/incubator-nlpcraft/raw/external_config/external"
+
+ # Optional.
+ # Default value is $USER_HOME/.nlpcraft/extcfg
+ # locDir = ...
+
+ # If 'true', on each start NLPCraft will check the MD5 checksum of the each local and remote
+ # external configuration file and will re-download such file if the checksum doesn't match.
+ # Set it to 'false' to speed up the bootstrap of the NLPCraft server and the data probe if you
+ # are certain that all external configuration files are properly downloaded and available
+ # in 'nlpcraft.extConfig.locDir' local folder.
+ checkMd5 = true
+ }
+
+ # +---------------------+
+ # | Probe configuration. |
+ # +---------------------+
+ probe {
+ # Any arbitrary descriptive name.
+ id = "cargps"
+
+ # This is the default token (as in default company).
+ # Note that this token must match the probe token for the company this probe
+ # associated with. If changed from default, this token must be kept secure.
+ token = "3141592653589793"
+
+ # These are default up-link and down-link endpoints that the probe will connect to.
+ # If changed - they need to be changed on both server and probe.
+ upLink = "0.0.0.0:8201" # Server to probe data pipe.
+ downLink = "0.0.0.0:8202" # Probe to server data pipe.
+
+ # All JARs in this folder will be scanned for models.
+ # Safely ignored if 'null' - but then 'models' should have at least one element.
+ jarsFolder = null
+
+ # Specifies fully qualifies model class names for the probe to start with.
+ #
+ # Note that following models require 'google' on the server side.
+ # See https://nlpcraft.apache.org/integrations.html#nlp for more details
+ # on how to configure 3rd party token providers:
+ models = org.apache.nlpcraft.examples.cargps.CarGpsModel
+
+ # Specify class names for probe life cycle components.
+ # Each class should extend 'NCProbeLifecycle' interface and provide a no-arg constructor.
+ #
+ # The following built-in OpenCensus exporters are supported as lifecycle components:
+ # - org.apache.nlpcraft.model.opencensus.NCJaegerExporter (traces)
+ # - org.apache.nlpcraft.model.opencensus.NCZipkinExporter (traces)
+ # - org.apache.nlpcraft.model.opencensus.NCPrometheusExporter (stats)
+ # - org.apache.nlpcraft.model.opencensus.NCStackdriverTraceExporter (traces)
+ # - org.apache.nlpcraft.model.opencensus.NCStackdriverStatsExporter (stats)
+ lifecycle = ""
+
+ # Properties for built-in OpenCensus exporters.
+ # All configuration properties are optional unless otherwise specified.
+ # opencensus {
+ # jaeger {
+ # thriftUrl = "http://127.0.0.1:14268/api/traces"
+ # serviceName = "nlpcraft-probe"
+ # }
+ # prometheus {
+ # hostPort = "localhost:8889"
+ # namespace = "nlpcraft-probe"
+ # }
+ # stackdriver {
+ # # Mandatory Google project ID.
+ # googleProjectId = "your_google_project_id"
+ # metricsPrefix = "custom.googleapis.com/nlpcraft/probe"
+ # }
+ # zipkin {
+ # v2Url = "http://127.0.0.1:9411/api/v2/spans"
+ # serviceName = "nlpcraft-probe"
+ # }
+ # }
+
+ # Maximum execution result size in bytes. Default value is 1M.
+ # When exceeded the request will be automatically rejected.
+ resultMaxSizeBytes = 1048576
+ }
+}
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt
new file mode 100644
index 0000000..4c3f520
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_add_waypoint_samples.txt
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Set of samples (corpus) for automatic unit and regression testing.
+#
+
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
new file mode 100644
index 0000000..0e4cb3d
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Set of samples (corpus) for automatic unit and regression testing.
+#
+
+#Hey truck - stop the navigation!
+Howdy, car, please cancel the routing now.
+#Hi car - stop the route.
+#Hi car - stop the navigation...
+Howdy truck - quit navigating.
+#Hi car - finish off the driving.
+#Hi car - cancel the journey.
\ No newline at end of file
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt
new file mode 100644
index 0000000..029340e
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_navigate_samples.txt
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Set of samples (corpus) for automatic unit and regression testing.
+#
+hey car, navigate to 21 table rock drive
diff --git a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt
new file mode 100644
index 0000000..4c3f520
--- /dev/null
+++ b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_remove_waypoint_samples.txt
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Set of samples (corpus) for automatic unit and regression testing.
+#
+
diff --git a/nlpcraft-examples/alarm/src/main/resources/intents.idl b/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala
similarity index 63%
copy from nlpcraft-examples/alarm/src/main/resources/intents.idl
copy to nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala
index 3c3934d..f7f5ca1 100644
--- a/nlpcraft-examples/alarm/src/main/resources/intents.idl
+++ b/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCCarGpsModelSpec.scala
@@ -15,18 +15,15 @@
* limitations under the License.
*/
-// Fragments (mostly for demo purposes here).
-fragment=buzz term~{# == 'x:alarm'}
-fragment=when
- term(nums)~{
- // Demonstrating term variables.
- @type = meta_tok('nlpcraft:num:unittype')
- @iseq = meta_tok('nlpcraft:num:isequalcondition') // Excludes conditional statements.
+package org.apache.nlpcraft.examples.cargps
- # == 'nlpcraft:num' && @type == 'datetime' && @iseq == true
- }[1,7]
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
-// Intents (using fragments).
-intent=alarm
- fragment(buzz)
- fragment(when)
\ No newline at end of file
+@NCTestEnvironment(model = classOf[CarGpsModel], startClient = true)
+class NCCarGpsModelSpec extends NCTestContext {
+ @Test
+ def test(): Unit = {
+ checkIntent("hey truck, drive to 21 x x drive", "int:navigate")
+ }
+}
diff --git a/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCModelValidationSpec.scala b/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCModelValidationSpec.scala
similarity index 93%
rename from nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCModelValidationSpec.scala
rename to nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCModelValidationSpec.scala
index f1b572e..e46e10a 100644
--- a/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCModelValidationSpec.scala
+++ b/nlpcraft-examples/cargps/src/test/java/org/apache/nlpcraft/examples/cargps/NCModelValidationSpec.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.example.minecraft
+package org.apache.nlpcraft.examples.cargps
import org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator
import org.junit.jupiter.api.{Assertions, Test}
@@ -27,7 +27,7 @@
@Test
def test(): Unit = {
// Instruct auto-validator what models to test.
- System.setProperty("NLPCRAFT_TEST_MODELS", "org.apache.nlpcraft.example.minecraft.MinecraftModel")
+ System.setProperty("NLPCRAFT_TEST_MODELS", "org.apache.nlpcraft.examples.cargps.CarGpsModel")
// Start model auto-validator.
Assertions.assertTrue(NCTestAutoModelValidator.isValid(),"See error logs above.")
diff --git a/nlpcraft-examples/minecraft-mod/build.gradle b/nlpcraft-examples/minecraft-mod/build.gradle
index c6973e4..b759058 100644
--- a/nlpcraft-examples/minecraft-mod/build.gradle
+++ b/nlpcraft-examples/minecraft-mod/build.gradle
@@ -74,14 +74,14 @@
task dumpBlocks(type: JavaExec) {
description = 'Dumps game file objects. Supports 2 types: items and blocks'
classpath sourceSets.main.runtimeClasspath
- main = "org.apache.nlpcraft.example.minecraft.utils.NCMinecraftFilesDump"
+ main = "org.apache.nlpcraft.examples.minecraft.utils.NCMinecraftFilesDump"
args "block", minecraftVersion
}
task dumpItems(type: JavaExec) {
description = 'Dumps game file objects. Supports 2 types: items and blocks'
classpath sourceSets.main.runtimeClasspath
- main = "org.apache.nlpcraft.example.minecraft.utils.NCMinecraftFilesDump"
+ main = "org.apache.nlpcraft.examples.minecraft.utils.NCMinecraftFilesDump"
args "block", minecraftVersion
}
diff --git a/nlpcraft-examples/minecraft/pom.xml b/nlpcraft-examples/minecraft/pom.xml
index 09ee22a..69c3d40 100644
--- a/nlpcraft-examples/minecraft/pom.xml
+++ b/nlpcraft-examples/minecraft/pom.xml
@@ -21,7 +21,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
- <name>NLPCraft example Minecraft</name>
+ <name>NLPCraft Example Minecraft</name>
<artifactId>nlpcraft-example-minecraft</artifactId>
<parent>
diff --git a/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftFillMatchProcessor.kt b/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftFillMatchProcessor.kt
similarity index 96%
rename from nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftFillMatchProcessor.kt
rename to nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftFillMatchProcessor.kt
index 75790e1..779d7d2 100644
--- a/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftFillMatchProcessor.kt
+++ b/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftFillMatchProcessor.kt
@@ -16,9 +16,9 @@
*
*/
-package org.apache.nlpcraft.example.minecraft
+package org.apache.nlpcraft.examples.minecraft
-import org.apache.nlpcraft.example.minecraft.MinecraftValueLoader.Companion.dumps
+import org.apache.nlpcraft.examples.minecraft.MinecraftValueLoader.Companion.dumps
import org.apache.nlpcraft.model.*
import java.util.*
diff --git a/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftModel.kt b/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftModel.kt
similarity index 90%
rename from nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftModel.kt
rename to nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftModel.kt
index 954ab40..6137c5d 100644
--- a/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftModel.kt
+++ b/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftModel.kt
@@ -16,10 +16,10 @@
*
*/
-package org.apache.nlpcraft.example.minecraft
+package org.apache.nlpcraft.examples.minecraft
import org.apache.nlpcraft.common.NCException
-import org.apache.nlpcraft.example.minecraft.MinecraftValueLoader.Companion.dumps
+import org.apache.nlpcraft.examples.minecraft.MinecraftValueLoader.Companion.dumps
import org.apache.nlpcraft.model.*
import java.util.*
@@ -50,7 +50,7 @@
"start a hurricane",
"cast super squall"
)
- fun onWeatherMatch(@Suppress("UNUSED_PARAMETER") ctx: NCIntentMatch, @NCIntentTerm("arg") tok: NCToken): NCResult {
+ fun onWeatherMatch(@NCIntentTerm("arg") tok: NCToken): NCResult {
return NCResult.text("weather ${tok.id}")
}
@@ -64,7 +64,7 @@
"night",
"it's midnight"
)
- fun onTimeMatch(@Suppress("UNUSED_PARAMETER") ctx: NCIntentMatch, @NCIntentTerm("arg") tok: NCToken): NCResult {
+ fun onTimeMatch(@NCIntentTerm("arg") tok: NCToken): NCResult {
val time: Int = when (tok.id) {
"morning" -> 23000
"day" -> 1000
@@ -90,7 +90,6 @@
"give potion to me"
)
fun onGiveMatch(
- @Suppress("UNUSED_PARAMETER") ctx: NCIntentMatch,
@NCIntentTerm("item") item: NCToken,
@NCIntentTerm("action") target: NCToken,
@NCIntentTerm("quantity") quantity: Optional<NCToken>
diff --git a/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftValueLoader.kt b/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftValueLoader.kt
similarity index 97%
rename from nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftValueLoader.kt
rename to nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftValueLoader.kt
index 429adcf..2b658f4 100644
--- a/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/example/minecraft/MinecraftValueLoader.kt
+++ b/nlpcraft-examples/minecraft/src/main/kotlin/org/apache/nlpcraft/examples/minecraft/MinecraftValueLoader.kt
@@ -16,7 +16,7 @@
*
*/
-package org.apache.nlpcraft.example.minecraft
+package org.apache.nlpcraft.examples.minecraft
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
diff --git a/nlpcraft-examples/minecraft/src/main/resources/minecraft.yaml b/nlpcraft-examples/minecraft/src/main/resources/minecraft.yaml
index 9cffd47..32b4632 100644
--- a/nlpcraft-examples/minecraft/src/main/resources/minecraft.yaml
+++ b/nlpcraft-examples/minecraft/src/main/resources/minecraft.yaml
@@ -35,11 +35,11 @@
- id: mc:item
metadata:
mc:type: item
- valueLoader: org.apache.nlpcraft.example.minecraft.MinecraftValueLoader
+ valueLoader: org.apache.nlpcraft.examples.minecraft.MinecraftValueLoader
- id: mc:block
metadata:
mc:type: block
- valueLoader: org.apache.nlpcraft.example.minecraft.MinecraftValueLoader
+ valueLoader: org.apache.nlpcraft.examples.minecraft.MinecraftValueLoader
# Weather intent
- id: weather:action
@@ -100,7 +100,7 @@
# Give intent
- id: give:action
synonyms:
- - "{give ^^[target]{# == 'mc:player'}^^}"
+ - "{give ^^{# == 'mc:player'}^^}"
- id: give:block-word
synonyms:
- "{block|blocks}"
@@ -124,18 +124,18 @@
- "wall"
- id: fill:length
synonyms:
- - "{{size|length|diameter} {of|_} ^^[length]{# == 'nlpcraft:num'}^^}"
+ - "{{size|length|diameter} {of|_} ^^{# == 'nlpcraft:num'}^^}"
- id: position:player
groups:
- fill:position
synonyms:
- - "{{at|near} ^^[player]{# == 'mc:player'}^^ {position|_}|where ^^[player]{# == 'mc:player'}^^}"
+ - "{{at|near} ^^{# == 'mc:player'}^^ {position|_}|where ^^{# == 'mc:player'}^^}"
- id: position:front
groups:
- fill:position
synonyms:
- - "{{^^[distance]{# == 'nlpcraft:num'}^^|_} {in|_} front {of|_} ^^[player]{# == 'mc:player'}^^}"
+ - "{{^^{# == 'nlpcraft:num'}^^|_} {in|_} front {of|_} ^^{# == 'mc:player'}^^}"
abstractTokens:
- mc:player
diff --git a/nlpcraft-examples/minecraft/src/main/resources/probe.conf b/nlpcraft-examples/minecraft/src/main/resources/probe.conf
index 5f8bb7e..5c5e0e4 100644
--- a/nlpcraft-examples/minecraft/src/main/resources/probe.conf
+++ b/nlpcraft-examples/minecraft/src/main/resources/probe.conf
@@ -106,7 +106,7 @@
# Note that following models require 'google' on the server side.
# See https://nlpcraft.apache.org/integrations.html#nlp for more details
# on how to configure 3rd party token providers:
- models = org.apache.nlpcraft.example.minecraft.MinecraftModel
+ models = org.apache.nlpcraft.examples.minecraft.MinecraftModel
# Specify class names for probe life cycle components.
# Each class should extend 'NCProbeLifecycle' interface and provide a no-arg constructor.
diff --git a/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCMinecraftModelSpec.kt b/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/examples/minecraft/NCMinecraftModelSpec.kt
similarity index 96%
rename from nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCMinecraftModelSpec.kt
rename to nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/examples/minecraft/NCMinecraftModelSpec.kt
index 811df6b..49f5e56 100644
--- a/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCMinecraftModelSpec.kt
+++ b/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/examples/minecraft/NCMinecraftModelSpec.kt
@@ -16,7 +16,7 @@
*
*/
-package org.apache.nlpcraft.example.minecraft
+package org.apache.nlpcraft.examples.minecraft
import org.apache.nlpcraft.NCTestContext
import org.apache.nlpcraft.NCTestEnvironment
diff --git a/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCModelValidationSpec.scala b/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/examples/minecraft/NCModelValidationSpec.scala
similarity index 93%
copy from nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCModelValidationSpec.scala
copy to nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/examples/minecraft/NCModelValidationSpec.scala
index f1b572e..96767f4 100644
--- a/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/example/minecraft/NCModelValidationSpec.scala
+++ b/nlpcraft-examples/minecraft/src/test/kotlin/org/apache/nlpcraft/examples/minecraft/NCModelValidationSpec.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.example.minecraft
+package org.apache.nlpcraft.examples.minecraft
import org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator
import org.junit.jupiter.api.{Assertions, Test}
@@ -27,7 +27,7 @@
@Test
def test(): Unit = {
// Instruct auto-validator what models to test.
- System.setProperty("NLPCRAFT_TEST_MODELS", "org.apache.nlpcraft.example.minecraft.MinecraftModel")
+ System.setProperty("NLPCRAFT_TEST_MODELS", "org.apache.nlpcraft.examples.minecraft.MinecraftModel")
// Start model auto-validator.
Assertions.assertTrue(NCTestAutoModelValidator.isValid(),"See error logs above.")
diff --git a/nlpcraft-examples/time/pom.xml b/nlpcraft-examples/time/pom.xml
index 88c25e2..8da109e 100644
--- a/nlpcraft-examples/time/pom.xml
+++ b/nlpcraft-examples/time/pom.xml
@@ -21,7 +21,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
- <name>NLPCraft example Time</name>
+ <name>NLPCraft Example Time</name>
<artifactId>nlpcraft-example-time</artifactId>
<parent>
diff --git a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala
index 256d044..c8da891 100644
--- a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala
+++ b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala
@@ -19,6 +19,8 @@
import java.io.StringReader
import edu.stanford.nlp.process.PTBTokenizer
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.common.nlp.core.{NCNlpCoreToken, NCNlpTokenizer}
import scala.jdk.CollectionConverters.ListHasAsScala
@@ -27,6 +29,25 @@
* Stanford tokenizer implementation.
*/
object NCStanfordTokenizer extends NCNlpTokenizer {
+ /**
+ *
+ * @param parent Optional parent span.
+ * @return
+ */
+ override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ =>
+ ackStarting()
+ ackStarted()
+ }
+
+ /**
+ *
+ * @param parent Optional parent span.
+ */
+ override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
+ ackStopping()
+ ackStopped()
+ }
+
override def tokenize(sen: String): Seq[NCNlpCoreToken] = {
PTBTokenizer.newPTBTokenizer(new StringReader(sen)).
tokenize().
diff --git a/nlpcraft/src/main/resources/stopwords/stop_words.txt b/nlpcraft/src/main/resources/stopwords/stop_words.txt
index 3629397..5644efd 100644
--- a/nlpcraft/src/main/resources/stopwords/stop_words.txt
+++ b/nlpcraft/src/main/resources/stopwords/stop_words.txt
@@ -23,13 +23,13 @@
# - Words with wildcard, symbol `*` (processed as lemma)
#
# Words and POSes can me marked as excluded (symbol `~` before word)
-# Word can be marked as case sensitive (symbol `@` before word)
+# Word can be marked as case-sensitive (symbol `@` before word)
#
# Restrictions:
# - POSes list cannot be defined for multiple words.
# - Only one wildcard can be defined in the word.
# - Wildcard cannot be applied to chunks of words.
-# - Only one case sensitive flag can be defined in the word.
+# - Only one case-sensitive flag can be defined in the word.
#
# Examples:
# ========
@@ -63,6 +63,7 @@
# POSES list exceptions.
~may
+~no
# Postfixes list.
*ent | ~NN ~NNS ~NNP ~NNPS
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
index 3b8e292..786e1ea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
@@ -110,7 +110,7 @@
* @return
*/
private def padDur(ms: Long): String =
- StringUtils.leftPad(s"${U.now() - ms}ms", 6)
+ StringUtils.leftPad(s"${U.now() - ms}ms", 7)
/**
* Acks started service. Should be called at the end of the `start()` method.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index f508745..9d9f4e3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -59,7 +59,7 @@
@transient
private var hash: java.lang.Integer = _
- private def calcHash(): Int = U.mkJavaHash(srvReqId, text, enabledBuiltInToks, tokens)
+ private def calcHash(): Int = U.mkJavaHash(tokens)
// Deep copy.
override def clone(): NCNlpSentence =
@@ -74,6 +74,18 @@
firstProbePhase = firstProbePhase
)
+ def copy(srvReqId: Option[String]): NCNlpSentence =
+ new NCNlpSentence(
+ srvReqId = srvReqId.getOrElse(this.srvReqId),
+ text = this.text,
+ enabledBuiltInToks = this.enabledBuiltInToks,
+ tokens = this.tokens,
+ deletedNotes = this.deletedNotes,
+ initNlpNotes = this.initNlpNotes,
+ nlpTokens = this.nlpTokens,
+ firstProbePhase = this.firstProbePhase
+ )
+
/**
* Utility method that gets set of notes for given note type collected from
* tokens in this sentence. Notes are sorted in the same order they appear
@@ -101,10 +113,11 @@
override def equals(obj: Any): Boolean = obj match {
case x: NCNlpSentence =>
+ tokens.size == x.tokens.size &&
tokens == x.tokens &&
- srvReqId == x.srvReqId &&
- text == x.text &&
- enabledBuiltInToks == x.enabledBuiltInToks
+ srvReqId == x.srvReqId &&
+ text == x.text &&
+ enabledBuiltInToks == x.enabledBuiltInToks
case _ => false
}
@@ -139,8 +152,8 @@
// One possible difference - stopwords indexes.
def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
- val set1 = n1.wordIndexes.toSet
- val set2 = n2.wordIndexes.toSet
+ val set1 = n1.wordIndexesSet
+ val set2 = n2.wordIndexesSet
set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(stopIdxs.contains)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index c356550..255e086 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -29,11 +29,14 @@
/**
* Sentence token note is a typed map of KV pairs.
- *
*/
class NCNlpSentenceNote(private val values: Map[String, JSerializable]) extends JSerializable with NCAsciiLike {
import NCNlpSentenceNote._
+ private lazy val dataWithoutIndexes = this.filter(p => !SKIP_CLONE.contains(p._1))
+ private lazy val skipNlp = dataWithoutIndexes.filter { case (key, _) => key != "noteType" }
+
+
@transient
private lazy val hash = values.hashCode()
@@ -43,6 +46,7 @@
lazy val tokenTo: Int = values("tokMaxIndex").asInstanceOf[Int] // Last index.
lazy val tokenIndexes: Seq[Int] = values("tokWordIndexes").asInstanceOf[JList[Int]].asScala.toSeq // Includes 1st and last indices too.
lazy val wordIndexes: Seq[Int] = values("wordIndexes").asInstanceOf[JList[Int]].asScala.toSeq // Includes 1st and last indices too.
+ lazy val wordIndexesSet: Set[Int] = wordIndexes.toSet
lazy val sparsity: Int = values("sparsity").asInstanceOf[Int]
lazy val isDirect: Boolean = values("direct").asInstanceOf[Boolean]
lazy val isUser: Boolean = {
@@ -68,19 +72,25 @@
/**
* Clones this note.
*/
- def clone(indexes: Seq[Int], wordIndexes: Seq[Int], params: (String, Any)*): NCNlpSentenceNote =
- NCNlpSentenceNote(
+ def clone(indexes: Seq[Int], wordIndexes: Seq[Int], params: (String, JSerializable)*): NCNlpSentenceNote =
+ apply(
indexes,
Some(wordIndexes),
noteType,
- values.filter(p => !SKIP_CLONE.contains(p._1)).toSeq ++ params:_*
+ dataWithoutIndexes ++ params.toMap
)
- override def clone(): NCNlpSentenceNote = {
- val m = mutable.Map.empty[String, JSerializable] ++ values
+ override def clone(): NCNlpSentenceNote = new NCNlpSentenceNote(values)
- new NCNlpSentenceNote(m.toMap)
- }
+ /**
+ *
+ * @param n
+ */
+ def equalsWithoutIndexes(n: NCNlpSentenceNote): Boolean =
+ this.noteType == n.noteType &&
+ this.wordIndexes.size == n.wordIndexes.size &&
+ this.wordIndexes.zip(n.wordIndexes).map(p => p._1 - p._2).distinct.size == 1 &&
+ this.dataWithoutIndexes == n.dataWithoutIndexes
/**
*
@@ -93,12 +103,6 @@
*
* @return
*/
- def skipNlp(): Map[String, JSerializable] =
- values.filter { case (key, _) => !SKIP_CLONE.contains(key) && key != "noteType" }
-
- /**
- *
- */
def asMetadata(): Map[String, JSerializable] =
if (isUser)
values.get("meta") match {
@@ -108,7 +112,7 @@
else {
val md = mutable.Map.empty[String, JSerializable]
- val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
+ val m = if (noteType != "nlpcraft:nlp") skipNlp else values
m.foreach { case (name, value) => md += (name.toLowerCase() -> value)}
@@ -119,13 +123,8 @@
*
* @param kvs
*/
- def clone(kvs : (String, JSerializable)*): NCNlpSentenceNote = {
- val m = mutable.HashMap.empty[String, JSerializable] ++ values
-
- kvs.foreach(kv => m += kv._1 -> kv._2)
-
- new NCNlpSentenceNote(m.toMap)
- }
+ def clone(kvs : (String, JSerializable)*): NCNlpSentenceNote =
+ new NCNlpSentenceNote(values ++ kvs)
/**
*
@@ -134,35 +133,11 @@
* @return
*/
def getKey(withIndexes: Boolean = true, withReferences: Boolean = true): Seq[Any] = {
- def addRefs(names: String*): Seq[String] = if (withReferences) names else Seq.empty
-
- val names: Seq[String] =
- if (isUser)
- Seq.empty
- else
- noteType match {
- case "nlpcraft:continent" => Seq("continent")
- case "nlpcraft:subcontinent" => Seq("continent", "subcontinent")
- case "nlpcraft:country" => Seq("continent", "subcontinent", "country")
- case "nlpcraft:region" => Seq("continent", "subcontinent", "country", "region")
- case "nlpcraft:city" => Seq("continent", "subcontinent", "country", "region", "city")
- case "nlpcraft:metro" => Seq("metro")
- case "nlpcraft:date" => Seq("from", "to")
- case "nlpcraft:relation" => Seq("type", "note") ++ addRefs("indexes")
- case "nlpcraft:sort" => Seq("asc", "subjnotes", "bynotes") ++ addRefs("subjindexes", "byindexes")
- case "nlpcraft:limit" => Seq("limit", "note") ++ addRefs("indexes", "asc") // Asc flag has sense only with references for limit.
- case "nlpcraft:coordinate" => Seq("latitude", "longitude")
- case "nlpcraft:num" => Seq("from", "to", "unit", "unitType")
- case x if x.startsWith("google:") => Seq("meta", "mentionsBeginOffsets", "mentionsContents", "mentionsTypes")
- case x if x.startsWith("stanford:") => Seq("nne")
- case x if x.startsWith("opennlp:") => Seq.empty
- case x if x.startsWith("spacy:") => Seq("vector")
-
- case _ => throw new AssertionError(s"Unexpected note type: $noteType")
- }
-
val seq1 = if (withIndexes) Seq(wordIndexes, noteType) else Seq(noteType)
- val seq2 = names.map(name => this.getOrElse(name, null))
+ val seq2 = if (isUser)
+ Seq.empty
+ else
+ getBuiltProperties(noteType, withReferences).map(name => this.getOrElse(name, null))
seq1 ++ seq2
}
@@ -220,7 +195,7 @@
indexes: Seq[Int],
wordIndexesOpt: Option[Seq[Int]],
typ: String,
- params: (String, Any)*
+ params: Map[String, Any]
): NCNlpSentenceNote = {
def calc(seq: Seq[Int]): (Int, Int, Int, JList[Int], Int) =
(U.calcSparsity(seq), seq.min, seq.max, seq.asJava, seq.length)
@@ -228,18 +203,18 @@
val (sparsity, tokMinIndex, tokMaxIndex, tokWordIndexes, len) = calc(wordIndexesOpt.getOrElse(indexes))
new NCNlpSentenceNote(
- mutable.HashMap[String, JSerializable]((
- params.filter(_._2 != null) :+
- ("noteType" -> typ) :+
- ("tokMinIndex" -> indexes.min) :+
- ("tokMaxIndex" -> indexes.max) :+
- ("tokWordIndexes" -> indexes.asJava) :+
- ("minIndex" -> tokMinIndex) :+
- ("maxIndex" -> tokMaxIndex) :+
- ("wordIndexes" -> tokWordIndexes) :+
- ("wordLength" -> len) :+
- ("sparsity" -> sparsity)
- ).map(p => p._1 -> p._2.asInstanceOf[JSerializable]): _*).toMap
+ params.filter(_._2 != null).map(p => p._1 -> p._2.asInstanceOf[JSerializable]) ++
+ Map[String, JSerializable](
+ "noteType" -> typ,
+ "tokMinIndex" -> indexes.min,
+ "tokMaxIndex" -> indexes.max,
+ "tokWordIndexes" -> indexes.asJava.asInstanceOf[JSerializable],
+ "minIndex" -> tokMinIndex,
+ "maxIndex" -> tokMaxIndex,
+ "wordIndexes" -> tokWordIndexes.asInstanceOf[JSerializable],
+ "wordLength" -> len,
+ "sparsity" -> sparsity
+ )
)
}
@@ -251,7 +226,7 @@
* @param params Parameters.
*/
def apply(indexes: Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
- apply(indexes, None, typ, params: _*)
+ apply(indexes, None, typ, params.toMap)
/**
* Creates new note with given parameters.
@@ -261,7 +236,7 @@
* @param params Parameters.
*/
def apply(indexes: mutable.Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
- apply(indexes.toSeq, None, typ, params: _*)
+ apply(indexes.toSeq, None, typ, params.toMap)
/**
* Creates new note with given parameters.
@@ -272,7 +247,7 @@
* @param params Parameters.
*/
def apply(indexes: Seq[Int], wordIndexes: Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
- apply(indexes, Some(wordIndexes), typ, params: _*)
+ apply(indexes, Some(wordIndexes), typ, params.toMap)
/**
* Creates new note with given parameters.
@@ -283,5 +258,37 @@
* @param params Parameters.
*/
def apply(indexes: mutable.Seq[Int], wordIndexes: mutable.Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
- apply(indexes.toSeq, Some(wordIndexes.toSeq), typ, params: _*)
+ apply(indexes.toSeq, Some(wordIndexes.toSeq), typ, params.toMap)
+
+ /**
+ *
+ * @param noteType
+ * @param withReferences
+ */
+ def getBuiltProperties(noteType: String, withReferences: Boolean = true): Seq[String] = {
+ def addRefs(names: String*): Seq[String] = if (withReferences) names else Seq.empty
+
+ noteType match {
+ case "nlpcraft:nlp" => Seq.empty
+
+ case "nlpcraft:continent" => Seq("continent")
+ case "nlpcraft:subcontinent" => Seq("continent", "subcontinent")
+ case "nlpcraft:country" => Seq("continent", "subcontinent", "country")
+ case "nlpcraft:region" => Seq("continent", "subcontinent", "country", "region")
+ case "nlpcraft:city" => Seq("continent", "subcontinent", "country", "region", "city")
+ case "nlpcraft:metro" => Seq("metro")
+ case "nlpcraft:date" => Seq("from", "to")
+ case "nlpcraft:relation" => Seq("type", "note") ++ addRefs("indexes")
+ case "nlpcraft:sort" => Seq("asc", "subjnotes", "bynotes") ++ addRefs("subjindexes", "byindexes")
+ case "nlpcraft:limit" => Seq("limit", "note") ++ addRefs("indexes", "asc") // Asc flag has sense only with references for limit.
+ case "nlpcraft:coordinate" => Seq("latitude", "longitude")
+ case "nlpcraft:num" => Seq("from", "to", "unit", "unitType")
+ case x if x.startsWith("google:") => Seq("meta", "mentionsBeginOffsets", "mentionsContents", "mentionsTypes")
+ case x if x.startsWith("stanford:") => Seq("nne")
+ case x if x.startsWith("opennlp:") => Seq.empty
+ case x if x.startsWith("spacy:") => Seq("vector")
+
+ case _ => throw new AssertionError(s"Unexpected note type: $noteType")
+ }
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index 4b94b98..1c66da1 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -17,6 +17,7 @@
package org.apache.nlpcraft.common.nlp
+import org.apache.nlpcraft.common.U
import org.apache.nlpcraft.common.nlp.pos._
import java.util.{List => JList}
@@ -56,6 +57,22 @@
def isSwearWord: Boolean = getNlpValue[Boolean]("swear")
def isEnglish: Boolean = getNlpValue[Boolean]("english")
+ @transient
+ private var hash: java.lang.Integer = _
+
+ //noinspection HashCodeUsesVar
+ override def hashCode(): Int = {
+ if (hash == null)
+ hash = U.mkJavaHash(index, notes, stopsReasons)
+
+ hash
+ }
+
+ override def equals(obj: Any): Boolean = obj match {
+ case x: NCNlpSentenceToken => x.index == index && x.notes == notes && x.stopsReasons == stopsReasons
+ case _ => false
+ }
+
/**
*
* @param noteType Note type.
@@ -67,17 +84,7 @@
* Shallow copy.
*/
def clone(index: Int): NCNlpSentenceToken =
- NCNlpSentenceToken(
- index,
- {
- val m = mutable.HashSet.empty[NCNlpSentenceNote]
-
- notes.foreach(n => m += n.clone())
-
- m
- },
- stopsReasons.clone()
- )
+ NCNlpSentenceToken(index, mutable.HashSet.empty[NCNlpSentenceNote] ++ notes.clone(), stopsReasons.clone())
/**
* Clones note.
@@ -90,7 +97,11 @@
*
* @param note Note.
*/
- def remove(note: NCNlpSentenceNote): Unit = notes.remove(note)
+ def remove(note: NCNlpSentenceNote): Unit = {
+ notes.remove(note)
+
+ hash = null
+ }
/**
* Tests whether or not this token contains note.
@@ -172,6 +183,7 @@
* @param note Element.
*/
def add(note: NCNlpSentenceNote): Unit = {
+ hash = null
val added = notes.add(note)
if (added && note.isNlp)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
index a3d1156..3034a5e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
@@ -26,12 +26,6 @@
* @param tokens Initial buffer.
*/
class NCNlpSentenceTokenBuffer(val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](16)) extends java.io.Serializable {
- /** Stringified stems. */
- lazy val stems: String = tokens.map(_.stem).mkString(" ")
-
- /** Stem-based hashcode. */
- lazy val stemsHash: Int = stems.hashCode()
-
type SSOT = IndexedSeq[IndexedSeq[Option[NCNlpSentenceToken]]]
type SST = IndexedSeq[IndexedSeq[NCNlpSentenceToken]]
@@ -113,8 +107,7 @@
object NCNlpSentenceTokenBuffer {
implicit def toTokens(x: NCNlpSentenceTokenBuffer): ArrayBuffer[NCNlpSentenceToken] = x.tokens
- implicit def toBuf( toks: Iterable[NCNlpSentenceToken]): NCNlpSentenceTokenBuffer = apply(toks)
- def apply(toks: Iterable[NCNlpSentenceToken]): NCNlpSentenceTokenBuffer =
+ def apply(toks: Seq[NCNlpSentenceToken]): NCNlpSentenceTokenBuffer =
new NCNlpSentenceTokenBuffer(new ArrayBuffer[NCNlpSentenceToken](toks.size) ++ toks)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index d2c2b03..4f3a701 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -869,13 +869,6 @@
}
/**
- * Gets resource existing flag.
- *
- * @param res Resource.
- */
- def hasResource(res: String): Boolean = getClass.getClassLoader.getResourceAsStream(res) != null
-
- /**
* Serializes data.
*
* @param obj Data.
@@ -2329,6 +2322,11 @@
}
}
+ /**
+ *
+ * @param path
+ * @return
+ */
def isFile(path: String): Boolean = {
val f = new File(path)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCAddElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCAddElement.java
new file mode 100644
index 0000000..b6aadfd
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCAddElement.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.Repeatable;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+// TODO: json or yaml
+@Documented
+@Retention(value=RUNTIME)
+@Target(value=METHOD)
+@Repeatable(NCAddElement.NCAddElementList.class)
+public @interface NCAddElement {
+ /**
+ * ID of the intent term.
+ *
+ * @return ID of the intent term.
+ */
+ String value();
+
+ /**
+ *
+ */
+ @Retention(RetentionPolicy.RUNTIME)
+ @Target(value=METHOD)
+ @Documented
+ @interface NCAddElementList {
+ NCAddElement[] value();
+ }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCAddElementClass.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCAddElementClass.java
new file mode 100644
index 0000000..fa0d345
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCAddElementClass.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.Repeatable;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+// TODO: class name
+@Documented
+@Retention(value=RUNTIME)
+@Target(value=METHOD)
+@Repeatable(NCAddElementClass.NCAddElementClassList.class)
+public @interface NCAddElementClass {
+ /**
+ * ID of the intent term.
+ *
+ * @return ID of the intent term.
+ */
+ Class<?> value();
+
+ /**
+ *
+ */
+ @Retention(RetentionPolicy.RUNTIME)
+ @Target(value=METHOD)
+ @Documented
+ @interface NCAddElementClassList {
+ NCAddElementClass[] value();
+ }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java
index 28d0aec..91a3db3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java
@@ -37,7 +37,7 @@
* Analyses user input provided as a list of {@link NCCustomWord} objects and returns a list
* of {@link NCCustomElement} objects. Note that model elements returned from this method must
* be defined in the model, i.e. this method only provides an additional logic of detecting these
- * elements but they still need to be defined normally in the model.
+ * elements, but they still need to be defined normally in the model.
*
* @param req User request descriptor.
* @param mdl Instance of data model this parser belongs to.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index b5b6cbd..02f06ea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -383,7 +383,7 @@
return Optional.empty();
}
- // TODO:
+ // TODO: add javadoc
default Optional<Boolean> isGreedy() {
return Optional.empty();
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index efa2b68..61cb84d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -559,6 +559,11 @@
}
@Override
+ public boolean isStopWordsAllowed() {
+ return proxy.isStopWordsAllowed();
+ }
+
+ @Override
public Map<String, Set<String>> getRestrictedCombinations() {
return restrictedCombinations;
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index 30a2b40..2d06412 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -278,6 +278,9 @@
*/
boolean DFLT_IS_NO_USER_TOKENS_ALLOWED = true;
+ // TODO: add javadoc
+ boolean DFLT_IS_STOPWORDS_ALLOWED = true;
+
/**
* Default set of enabled built-in tokens. The following built-in tokens are enabled by default:
* <ul>
@@ -1235,4 +1238,12 @@
default Map<String, Set<String>> getRestrictedCombinations() {
return Collections.emptyMap();
}
+
+ /**
+ * // TODO: add javadoc
+ * @return
+ */
+ default boolean isStopWordsAllowed() {
+ return DFLT_IS_STOPWORDS_ALLOWED;
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 4b2f251..1bd9add 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -17,17 +17,16 @@
package org.apache.nlpcraft.model.impl
-import java.io.{Serializable => JSerializable}
-import java.util.Collections
-import java.util.{List => JList}
-
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
+import java.io.{Serializable => JSerializable}
+import java.lang
+import java.util.{Collections, List => JList}
import scala.collection.mutable
-import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava, MapHasAsScala, SeqHasAsJava}
+import scala.jdk.CollectionConverters.{CollectionHasAsScala, SeqHasAsJava}
/**
*
@@ -49,9 +48,9 @@
value: String,
startCharIndex: Int,
endCharIndex: Int,
- meta: Map[String, Object],
+ meta: java.util.Map[String, Object],
isAbstractProp: Boolean
-) extends NCMetadataAdapter(new java.util.HashMap(mutable.HashMap(meta.toSeq:_ *).asJava)) with NCToken with JSerializable {
+) extends NCMetadataAdapter(meta) with NCToken with JSerializable {
require(mdl != null)
require(srvReqId != null)
require(id != null)
@@ -105,12 +104,12 @@
// nlpcraft:nlp and some optional (after collapsing).
require(tok.size <= 2, s"Unexpected token [size=${tok.size}, token=$tok]")
- val md = mutable.HashMap.empty[String, JSerializable]
+ val md = new java.util.HashMap[String, AnyRef]()
tok.foreach(n => {
val id = n.noteType.toLowerCase
- n.asMetadata().foreach { case (k, v) => md += s"$id:$k" -> v}
+ n.asMetadata().foreach { case (k, v) => md.put(s"$id:$k", v.asInstanceOf[AnyRef]) }
})
val usrNotes = tok.filter(_.isUser)
@@ -118,8 +117,6 @@
// No overlapping allowed at this point.
require(usrNotes.size <= 1, s"Unexpected elements notes: $usrNotes")
- def convertMeta(): ScalaMeta = md.toMap.map(p => p._1 -> p._2.asInstanceOf[AnyRef])
-
usrNotes.headOption match {
case Some(usrNote) =>
require(mdl.elements.contains(usrNote.noteType), s"Element is not found: ${usrNote.noteType}")
@@ -139,9 +136,9 @@
}
// Special synthetic meta data element.
- md.put("nlpcraft:nlp:freeword", false)
+ md.put("nlpcraft:nlp:freeword", java.lang.Boolean.FALSE)
- elm.getMetadata.asScala.foreach { case (k, v) => md.put(k, v.asInstanceOf[JSerializable]) }
+ md.putAll(elm.getMetadata)
new NCTokenImpl(
mdl.model,
@@ -153,7 +150,7 @@
value = usrNote.dataOpt("value").orNull,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
- meta = convertMeta(),
+ meta = md,
isAbstractProp = mdl.model.getAbstractTokens.contains(elm.getId)
)
@@ -162,10 +159,10 @@
val note = tok.toSeq.minBy(n => if (n.isNlp) 1 else 0)
- val isStop: Boolean = md("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
+ val isStop = md.get("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
// Special synthetic meta data element.
- md.put("nlpcraft:nlp:freeword", !isStop && note.isNlp)
+ md.put("nlpcraft:nlp:freeword", lang.Boolean.valueOf(!isStop && note.isNlp))
new NCTokenImpl(
mdl.model,
@@ -177,7 +174,7 @@
value = null,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
- meta = convertMeta(),
+ meta = md,
isAbstractProp = mdl.model.getAbstractTokens.contains(note.noteType)
)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 2bbc72a..b3005ce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -378,6 +378,13 @@
def prepareTable(toks: Seq[NCToken]): NCAsciiTable = {
val allFree = toks.forall(_.isFreeWord)
+ /**
+ *
+ * @param s
+ * @return
+ */
+ def cc(s: String): String = s"${ansi256Fg(183)}$s$ansiReset"
+
val headers = mutable.ArrayBuffer.empty[String] ++
Seq(
"idx",
@@ -385,15 +392,15 @@
"lemma",
"pos",
"quoted",
- "stopword",
- "freeword",
+ r("stopword"),
+ y("freeword"),
"wordindexes",
"direct",
"sparsity"
)
if (!allFree)
- headers += "token data"
+ headers += cc("token data")
val tbl = NCAsciiTable(headers)
@@ -628,11 +635,11 @@
if (tok.getId == "nlpcraft:nlp")
row.map(_.toString)
else
- row.map(s => s"${ansi256Fg(183)}${s.toString}${ansiReset}")
+ row.map(s => cc(s.toString))
)
++
// Token data.
- Seq(if (tok.getId == "nlpcraft:nlp") "" else s"<<${ansi256Fg(183)}${tok.getId}$ansiReset>> $v") :_*
+ Seq(if (tok.getId == "nlpcraft:nlp") "" else s"<<${cc(tok.getId)}>> $v") :_*
)
}
})
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index f332e08..043297c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -63,6 +63,7 @@
private boolean maxSynonymsThresholdError = DFLT_MAX_SYNONYMS_THRESHOLD_ERROR;
private long conversationTimeout = DFLT_CONV_TIMEOUT_MS;
private int conversationDepth = DFLT_CONV_DEPTH;
+ private boolean isStopWordsAllowed = DFLT_IS_STOPWORDS_ALLOWED;
public String getId() {
return id;
@@ -278,4 +279,10 @@
return restrictedCombinations;
}
public void setRestrictedCombinations(Map<String, String[]> restrictedCombinations) { this.restrictedCombinations = restrictedCombinations;}
+ public boolean isStopWordsAllowed() {
+ return isStopWordsAllowed;
+ }
+ public void setStopWordsAllowed(boolean stopWordsAllowed) {
+ isStopWordsAllowed = stopWordsAllowed;
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index bf3888d..a3f876a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -633,8 +633,14 @@
line: Int, // 1, 2, ...
charPos: Int, // 1, 2, ...
msg: String,
- e: RecognitionException): Unit =
- throw new NCE(mkSyntaxError(msg, recog.getInputStream.getSourceName, line, charPos - 1, dsl, origin, mdl))
+ e: RecognitionException): Unit = {
+ val aMsg = if ((msg.contains("'\"") && msg.contains("\"'")) || msg.contains("''"))
+ s"${if (msg.last == '.') msg.substring(0, msg.length - 1) else msg} - try removing quotes."
+ else
+ msg
+
+ throw new NCE(mkSyntaxError(aMsg, recog.getInputStream.getSourceName, line, charPos - 1, dsl, origin, mdl))
+ }
}
/**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala
index 88af005..573ac4c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala
@@ -20,6 +20,7 @@
import com.typesafe.scalalogging.LazyLogging
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
+import org.apache.nlpcraft.common.ansi.NCAnsi._
import org.apache.nlpcraft.common.debug.NCLogHolder
import org.apache.nlpcraft.common.opencensus.NCOpenCensusTrace
import org.apache.nlpcraft.common.util.NCUtils
@@ -145,7 +146,7 @@
if (cbRes.getIntentId == null)
cbRes.setIntentId(res.intentId)
- logger.info(s"Intent '${res.intentId}' for variant #${res.variantIdx + 1} selected as the ${g(bo("<|best match|>"))}.")
+ logger.info(s"Intent ${ansi256Fg(183)}'${res.intentId}'$ansiReset for variant #${res.variantIdx + 1} selected as the ${g(bo("<|best match|>"))}.")
NCDialogFlowManager.addMatchedIntent(
intentMatch,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 710b9f0..ccff087 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -37,6 +37,7 @@
* Intent solver that finds the best matching intent given user sentence.
*/
object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
+ private final val DNM = r("did not match")
/**
* NOTE: not thread-safe.
@@ -352,8 +353,10 @@
tbl.info(logger, Some(s"Found ${sorted.size} matching ${if (sorted.size > 1) "intents" else "intent"} (sorted $G${BO}best$RST to worst):"))
}
- else
- logger.info("No matching intent found.")
+ else {
+ logger.info(s"No matching intent found:")
+ logger.info(s" +-- Turn on ${y("DEBUG")} log level to see more details.")
+ }
sorted.map(m =>
NCIntentSolverResult(
@@ -437,7 +440,7 @@
}
if (!flowRegex.get.matcher(str).find(0)) {
- x(s"${bo(r("did not match"))}")
+ x(DNM)
flowMatched = false
}
@@ -470,7 +473,7 @@
}
if (!res) {
- x(s"${bo(r("did not match"))}")
+ x(DNM)
flowMatched = false
}
@@ -532,12 +535,12 @@
s"${y("<")}${w.head}, ${w(1)}, ${w(2)}, ${w(3)}, ${w(4)}, ${w(5)}${y(">")}"
)
- tbl.info(logger, Some("Term match found:"))
+ tbl.debug(logger, Some("Term match found:"))
}
case None =>
// Term is missing. Stop further processing for this intent. This intent cannot be matched.
- logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unmatched term '${term.toAnsiString}' $varStr.")
+ logger.debug(s"Intent '$intentId' $DNM because of unmatched term '${term.toAnsiString}' $varStr.")
abort = true
}
@@ -556,7 +559,7 @@
if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty)
logger.info(
- s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr. " +
+ s"Intent '$intentId' $DNM because all its matched tokens came from STM $varStr. " +
s"See intent '${c(JSON_ALLOW_STM_ONLY)}' option."
)
else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord))
@@ -566,7 +569,7 @@
info(
logger,
Some(
- s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr. " +
+ s"Intent '$intentId' $DNM because of unused free words $varStr. " +
s"See intent '${c(JSON_UNUSED_FREE_WORDS)}' option. " +
s"Unused free words:"
)
@@ -578,7 +581,7 @@
info(
logger,
Some(
- s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr. " +
+ s"Intent '$intentId' $DNM because of unused user tokens $varStr. " +
s"See intent '${c(JSON_UNUSED_USR_TOKS)}' option. " +
s"Unused user tokens:"
)
@@ -590,7 +593,7 @@
info(
logger,
Some(
- s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr. " +
+ s"Intent '$intentId' $DNM because of unused system tokens $varStr. " +
s"See intent '${c(JSON_UNUSED_SYS_TOKS)}' option. " +
s"Unused system tokens:"
)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
index 1f08d4f..71162db 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
@@ -258,13 +258,20 @@
* @param args
* @return
*/
- private def getCpParams(args: Seq[Argument]): String =
- U.splitTrimFilter(
+ private def getCpParams(args: Seq[Argument]): String = {
+ val s = U.splitTrimFilter(
getParams( args, "cp").map(cp => normalizeCp(U.trimQuotes(cp))).mkString(CP_SEP),
CP_SEP
)
.mkString(CP_SEP)
+ // Remove extra '\' from the end, if possible.
+ if (s.last == '\\' && !s.endsWith(":\\"))
+ s.substring(0, s.length - 1)
+ else
+ s
+ }
+
/**
*
* @param args
@@ -3262,8 +3269,8 @@
.system(true)
.nativeSignals(true)
.signalHandler(Terminal.SignalHandler.SIG_IGN)
- .dumb(true)
- .jansi(true)
+ .jansi(SystemUtils.IS_OS_UNIX)
+ .jna(SystemUtils.IS_OS_WINDOWS)
.build()
// Process 'no-ansi' and 'ansi' commands first (before ASCII title is shown).
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala
index 21c022c..381a663 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala
@@ -65,7 +65,7 @@
// | MAKE SURE TO UPDATE THIS VAR WHEN NUMBER OF SERVICES IS CHANGED. |
// +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^+
final val NUM_SRV_SERVICES = 31 /*services*/ + 1 /*progress start*/
- final val NUM_PRB_SERVICES = 23 /*services*/ + 1 /*progress start*/
+ final val NUM_PRB_SERVICES = 24 /*services*/ + 1 /*progress start*/
// +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^+
// | MAKE SURE TO UPDATE THIS VAR WHEN NUMBER OF SERVICES IS CHANGED. |
// +==================================================================+
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index ecf7a18..561860f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -50,6 +50,7 @@
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import java.io._
import java.util.concurrent.CompletableFuture
@@ -527,6 +528,7 @@
startedMgrs += NCConnectionManager.start(span)
startedMgrs += NCDialogFlowManager.start(span)
startedMgrs += NCSentenceManager.start(span)
+ startedMgrs += NCSynonymsManager.start(span)
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
new file mode 100644
index 0000000..5da9808
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs
+
+import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
+import org.apache.nlpcraft.model.{NCToken, _}
+
+/**
+ *
+ * @param token
+ * @param word
+ */
+case class NCProbeIdlToken(token: NCToken, word: NCNlpSentenceToken) {
+ val (origText: String, wordIndexes: Set[Int], minIndex: Int, maxIndex: Int, isToken: Boolean, isWord: Boolean) =
+ if (token != null)
+ (token.origText, token.wordIndexes.toSet, token.wordIndexes.head, token.wordIndexes.last, true, false)
+ else
+ (word.origText, word.wordIndexes.toSet, word.wordIndexes.head, word.wordIndexes.last, false, true)
+
+ private lazy val hash = if (isToken) Seq(wordIndexes, token.getId).hashCode() else wordIndexes.hashCode()
+
+ override def hashCode(): Int = hash
+
+ def isSubsetOf(minIndex: Int, maxIndex: Int, indexes: Set[Int]): Boolean =
+ if (this.minIndex > maxIndex || this.maxIndex < minIndex)
+ false
+ else
+ wordIndexes.subsetOf(indexes)
+
+ override def equals(obj: Any): Boolean = obj match {
+ case x: NCProbeIdlToken =>
+ hash == x.hash && (isToken && x.isToken && token == x.token || isWord && x.isWord && word == x.word)
+ case _ => false
+ }
+
+ // Added for debug reasons.
+ override def toString: String = {
+ val idxs = wordIndexes.mkString(",")
+
+ if (isToken && token.getId != "nlpcraft:nlp") s"'$origText' (${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
+ }
+}
+
+/**
+ *
+ */
+object NCProbeIdlToken {
+ def apply(t: NCToken): NCProbeIdlToken = NCProbeIdlToken(token = t, word = null)
+ def apply(t: NCNlpSentenceToken): NCProbeIdlToken = NCProbeIdlToken(token = null, word = t)
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 75ae18b..ea41793 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -45,9 +45,13 @@
solver: NCIntentSolver,
intents: Seq[NCIdlIntent],
callbacks: Map[String /* Intent ID */, NCProbeModelCallback],
- continuousSynonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
+ continuousSynonyms:
+ Map[
+ String /*Element ID*/,
+ /*Fast access map.*/ Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]
+ ],
sparseSynonyms: Map[String /*Element ID*/, Seq[NCProbeSynonym]],
- idlSynonyms: Map[String /*Element ID*/ , Seq[NCProbeSynonym]], // Fast access map.
+ idlSynonyms: Map[String /*Element ID*/ , Seq[NCProbeSynonym]],
addStopWordsStems: Set[String],
exclStopWordsStems: Set[String],
suspWordsStems: Set[String],
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index c370738..2b533b3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -17,11 +17,6 @@
package org.apache.nlpcraft.probe.mgrs
-import org.apache.nlpcraft.common.U
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, NCNlpSentenceTokenBuffer}
-import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.model.intent.NCIdlContext
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind._
import scala.collection.mutable
@@ -54,180 +49,18 @@
lazy val hasIdl: Boolean = idlChunks != 0
lazy val isValueSynonym: Boolean = value != null
lazy val stems: String = map(_.wordStem).mkString(" ")
- lazy val stemsHash: Int = stems.hashCode
- /**
- *
- * @param kind
- * @return
- */
- private def getSort(kind: NCSynonymChunkKind): Int =
- kind match {
- case TEXT => 0
- case IDL => 1
- case REGEX => 2
- case _ => throw new AssertionError(s"Unexpected kind: $kind")
- }
-
- /**
- *
- * @param tok
- * @param chunk
- */
- private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk): Boolean =
- chunk.kind match {
- case TEXT => chunk.wordStem == tok.stem
- case REGEX =>
- val regex = chunk.regex
-
- regex.matcher(tok.origText).matches() || regex.matcher(tok.normText).matches()
- case IDL => throw new AssertionError()
- case _ => throw new AssertionError()
- }
-
- /**
- *
- * @param toks
- * @param isMatch
- * @param getIndex
- * @param shouldBeNeighbors
- * @tparam T
- * @return
- */
- private def sparseMatch0[T](
- toks: Seq[T],
- isMatch: (T, NCProbeSynonymChunk) => Boolean,
- getIndex: T => Int,
- shouldBeNeighbors: Boolean
- ): Option[Seq[T]] =
- if (toks.size >= this.size) {
- lazy val res = mutable.ArrayBuffer.empty[T]
- lazy val all = mutable.HashSet.empty[T]
-
- var state = 0
-
- for (chunk <- this if state != -1) {
- val seq =
- if (state == 0) {
- state = 1
-
- toks.filter(t => isMatch(t, chunk))
- }
- else
- toks.filter(t => !res.contains(t) && isMatch(t, chunk))
-
- if (seq.nonEmpty) {
- val head = seq.head
-
- if (!permute && res.nonEmpty && getIndex(head) <= getIndex(res.last))
- state = -1
- else {
- all ++= seq
-
- if (all.size > this.size)
- state = -1
- else
- res += head
- }
- }
- else
- state = -1
- }
-
- if (state != -1 && all.size == res.size && (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).toSeq.sorted)))
- Some(res.toSeq)
- else
- None
- }
- else
- None
-
- /**
- *
- * @param tow
- * @param chunk
- * @param req
- */
- private def isMatch(tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest): Boolean = {
- def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken => T): T =
- if (tow.isLeft) fromToken(tow.swap.toOption.get) else fromWord(tow.toOption.get)
-
- chunk.kind match {
- case TEXT => chunk.wordStem == get0(_.stem, _.stem)
-
- case REGEX =>
- val r = chunk.regex
-
- r.matcher(get0(_.origText, _.origText)).matches() || r.matcher(get0(_.normText, _.normText)).matches()
-
- case IDL =>
- get0(t => chunk.idlPred.apply(t, NCIdlContext(req = req)).value.asInstanceOf[Boolean], _ => false)
-
- case _ => throw new AssertionError()
- }
- }
-
- /**
- *
- * @param toks
- */
- def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
- require(toks != null)
- require(!sparse && !hasIdl)
-
- if (toks.length == length) {
- if (isTextOnly)
- toks.stemsHash == stemsHash && toks.stems == stems
- else
- toks.zip(this).sortBy(p => getSort(p._2.kind)).forall { case (tok, chunk) => isMatch(tok, chunk) }
- }
- else
- false
- }
-
- /**
- *
- * @param tows
- * @param req
- * @return
- */
- def isMatch(tows: Seq[NCIdlContent], req: NCRequest): Boolean = {
- require(tows != null)
-
- if (tows.length == length && tows.count(_.isLeft) >= idlChunks)
- tows.zip(this).sortBy(p => getSort(p._2.kind)).forall { case (tow, chunk) => isMatch(tow, chunk, req) }
- else
- false
- }
-
- /**
- *
- * @param toks
- */
- def sparseMatch(toks: NCNlpSentenceTokenBuffer): Option[Seq[NCNlpSentenceToken]] = {
- require(toks != null)
- require(sparse && !hasIdl)
-
- sparseMatch0(toks.toSeq, isMatch, (t: NCNlpSentenceToken) => t.startCharIndex, shouldBeNeighbors = false)
- }
-
- /**
- *
- * @param tows
- * @param req
- */
- def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest): Option[Seq[NCIdlContent]] = {
- require(tows != null)
- require(req != null)
- require(hasIdl)
-
- sparseMatch0(
- tows,
- (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req),
- (t: NCIdlContent) => if (t.isLeft) t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
- shouldBeNeighbors = !sparse
- )
- }
+ private lazy val hash =
+ Seq(
+ super.hashCode(),
+ isTextOnly,
+ regexChunks,
+ idlChunks,
+ isValueSynonym,
+ isElementId,
+ isValueName,
+ value
+ ).map(p => if (p == null) 0 else p.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
override def toString(): String = mkString(" ")
@@ -286,41 +119,23 @@
}
}
- override def canEqual(other: Any): Boolean = other.isInstanceOf[NCProbeSynonym]
-
override def equals(other: Any): Boolean = other match {
case that: NCProbeSynonym =>
- super.equals(that) &&
- (that canEqual this) &&
- isTextOnly == that.isTextOnly &&
- regexChunks == that.regexChunks &&
- idlChunks == that.idlChunks &&
- isValueSynonym == that.isValueSynonym &&
- isElementId == that.isElementId &&
- isValueName == that.isValueName &&
- value == that.value
+ isElementId == that.isElementId &&
+ isTextOnly == that.isTextOnly &&
+ regexChunks == that.regexChunks &&
+ idlChunks == that.idlChunks &&
+ isValueSynonym == that.isValueSynonym &&
+ isValueName == that.isValueName &&
+ value == that.value &&
+ super.equals(that)
case _ => false
}
- override def hashCode(): Int = {
- val state = Seq(
- super.hashCode(),
- isTextOnly,
- regexChunks,
- idlChunks,
- isValueSynonym,
- isElementId,
- isValueName,
- value
- )
-
- state.map(p => if (p == null) 0 else p.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
- }
+ override def hashCode(): Int = hash
}
object NCProbeSynonym {
- type NCIdlContent = Either[NCToken, NCNlpSentenceToken]
-
/**
*
* @param isElementId
@@ -341,9 +156,9 @@
permute: Boolean
): NCProbeSynonym = {
val syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value, sparse, permute)
-
+
syn ++= chunks
-
+
syn
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index bcf2c9c..2b91128 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -22,6 +22,7 @@
import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
import org.apache.nlpcraft.model.NCVariant
import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCTokenLogger, NCVariantImpl}
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import java.io.{Serializable => JSerializable}
import java.util
@@ -267,6 +268,8 @@
for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
process(tok, tokNlp)
+ ok = ok && NCSynonymsManager.isStillValidIdl(srvReqId, toks.toSeq)
+
if (ok) Some(new NCVariantImpl(toks.asJava)) else None
})
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index c01801e..827846f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -17,6 +17,9 @@
package org.apache.nlpcraft.probe.mgrs.deploy
+import com.fasterxml.jackson.core.JsonParser
+import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.google.common.reflect.ClassPath
import java.io._
@@ -35,17 +38,19 @@
import org.apache.nlpcraft.common.util.NCUtils.{IDL_FIX, REGEX_FIX}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.model.factories.basic.NCBasicModelFactory
+import org.apache.nlpcraft.model.impl.json.NCElementJson
+import org.apache.nlpcraft.model.intent._
import org.apache.nlpcraft.model.intent.compiler.NCIdlCompiler
import org.apache.nlpcraft.model.intent.solver.NCIntentSolver
-import org.apache.nlpcraft.model.intent._
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL, REGEX, TEXT}
import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeModelCallback, NCProbeSynonym, NCProbeSynonymChunk, NCProbeSynonymsWrapper}
import java.lang.annotation.Annotation
+import java.util.Optional
import scala.util.Using
import scala.compat.java8.OptionConverters._
import scala.collection.mutable
-import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, MapHasAsScala, SetHasAsScala}
+import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, MapHasAsScala, SeqHasAsJava, SetHasAsJava, SetHasAsScala}
import scala.util.control.Exception._
/**
@@ -58,11 +63,13 @@
private final val CLS_INTENT = classOf[NCIntent]
private final val CLS_INTENT_REF = classOf[NCIntentRef]
private final val CLS_QRY_RES = classOf[NCResult]
- private final val CLS_SLV_CTX = classOf[NCIntentMatch]
+ private final val CLS_INTENT_MATCH = classOf[NCIntentMatch]
private final val CLS_SAMPLE = classOf[NCIntentSample]
private final val CLS_SAMPLE_REF = classOf[NCIntentSampleRef]
private final val CLS_MDL_CLS_REF = classOf[NCModelAddClasses]
private final val CLS_MDL_PKGS_REF = classOf[NCModelAddPackage]
+ private final val CLS_ELEM_DEF = classOf[NCAddElement]
+ private final val CLS_ELEM_DEF_CLASS = classOf[NCAddElementClass]
// Java and scala lists.
private final val CLS_SCALA_SEQ = classOf[Seq[_]]
@@ -92,6 +99,13 @@
private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
private final val SUSP_SYNS_CHARS = Seq("?", "*", "+")
+ private val MAPPER_YAML = new ObjectMapper(new YAMLFactory)
+ private val MAPPER_JSON = new ObjectMapper
+
+ MAPPER_JSON.enable(JsonParser.Feature.ALLOW_COMMENTS)
+ MAPPER_JSON.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true)
+ MAPPER_YAML.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true)
+
@volatile private var data: mutable.ArrayBuffer[NCProbeModel] = _
@volatile private var mdlFactory: NCModelFactory = _
@@ -112,6 +126,12 @@
*/
case class SynonymHolder(elmId: String, syn: NCProbeSynonym)
+ /**
+ *
+ * @param method
+ * @param objClassName
+ * @param obj
+ */
case class MethodOwner(method: Method, objClassName: String, obj: Any) {
require(method != null)
require(objClassName != null ^ obj != null)
@@ -120,11 +140,7 @@
def getObject: Any = {
if (lazyObj == null)
- try
- lazyObj = U.mkObject(objClassName)
- catch {
- case e: Throwable => throw new NCE(s"Error initializing object of type: $objClassName", e)
- }
+ lazyObj = U.mkObject(objClassName)
lazyObj
}
@@ -155,6 +171,21 @@
for (makro <- macros.keys if !set.exists(_.contains(makro)))
logger.warn(s"Unused macro detected [mdlId=${mdl.getId}, macro=$makro]")
+
+ def isSuspicious(s: String): Boolean =
+ SUSP_SYNS_CHARS.exists(susp => s.contains(susp))
+
+ for (makro <- macros) {
+ val mkName = makro._1
+ val mkVal = makro._2
+
+ // Ignore suspicious chars if regex is used in macro...
+ if (isSuspicious(mkName) || (isSuspicious(mkVal) && !mkVal.contains("//")))
+ logger.warn(s"Suspicious macro definition (use of ${SUSP_SYNS_CHARS.map(s => s"'$s'").mkString(", ")} chars) [" +
+ s"mdlId=${mdl.getId}, " +
+ s"macro=$makro" +
+ s"]")
+ }
}
/**
@@ -170,29 +201,96 @@
val mdlId = mdl.getId
- for (elm <- mdl.getElements.asScala) {
+ val annElems = scanElementsAnnotations(mdl)
+
+ val wrappedMdl =
+ if (annElems.nonEmpty)
+ new NCModel {
+ private val allElems =
+ (annElems ++ (if (mdl.getElements == null) Set.empty else mdl.getElements.asScala)).
+ asJava
+
+ // One wrapped method.
+ override def getElements: util.Set[NCElement] = allElems
+
+ // Other methods delegated.
+ override def getId: String = mdl.getId
+ override def getName: String = mdl.getName
+ override def getVersion: String = mdl.getVersion
+
+ override def onParsedVariant(`var`: NCVariant): Boolean = mdl.onParsedVariant(`var`)
+ override def onContext(ctx: NCContext): NCResult = mdl.onContext(ctx)
+ override def onMatchedIntent(ctx: NCIntentMatch): Boolean = mdl.onMatchedIntent(ctx)
+ override def onResult(ctx: NCIntentMatch, res: NCResult): NCResult = mdl.onResult(ctx, res)
+ override def onRejection(ctx: NCIntentMatch, e: NCRejection): NCResult = mdl.onRejection(ctx, e)
+ override def onError(ctx: NCContext, e: Throwable): NCResult = mdl.onError(ctx, e)
+ override def getDescription: String = mdl.getDescription
+ override def getOrigin: String = mdl.getOrigin
+ override def getMaxUnknownWords: Int = mdl.getMaxUnknownWords
+ override def getMaxFreeWords: Int = mdl.getMaxFreeWords
+ override def getMaxSuspiciousWords: Int = mdl.getMaxSuspiciousWords
+ override def getMinWords: Int = mdl.getMinWords
+ override def getMaxWords: Int = mdl.getMaxWords
+ override def getMinTokens: Int = mdl.getMinTokens
+ override def getMaxTokens: Int = mdl.getMaxTokens
+ override def getMinNonStopwords: Int = mdl.getMinNonStopwords
+ override def isNonEnglishAllowed: Boolean = mdl.isNonEnglishAllowed
+ override def isNotLatinCharsetAllowed: Boolean = mdl.isNotLatinCharsetAllowed
+ override def isSwearWordsAllowed: Boolean = mdl.isSwearWordsAllowed
+ override def isNoNounsAllowed: Boolean = mdl.isNoNounsAllowed
+ override def isPermutateSynonyms: Boolean = mdl.isPermutateSynonyms
+ override def isDupSynonymsAllowed: Boolean = mdl.isDupSynonymsAllowed
+ override def getMaxTotalSynonyms: Int = mdl.getMaxTotalSynonyms
+ override def isNoUserTokensAllowed: Boolean = mdl.isNoUserTokensAllowed
+ override def isSparse: Boolean = mdl.isSparse
+ override def getMetadata: util.Map[String, AnyRef] = mdl.getMetadata
+ override def getAdditionalStopWords: util.Set[String] = mdl.getAdditionalStopWords
+ override def getExcludedStopWords: util.Set[String] = mdl.getExcludedStopWords
+ override def getSuspiciousWords: util.Set[String] = mdl.getSuspiciousWords
+ override def getMacros: util.Map[String, String] = mdl.getMacros
+ override def getParsers: util.List[NCCustomParser] = mdl.getParsers
+ override def getEnabledBuiltInTokens: util.Set[String] = mdl.getEnabledBuiltInTokens
+ override def getAbstractTokens: util.Set[String] = mdl.getAbstractTokens
+ override def getMaxElementSynonyms: Int = mdl.getMaxElementSynonyms
+ override def isMaxSynonymsThresholdError: Boolean = mdl.isMaxSynonymsThresholdError
+ override def getConversationTimeout: Long = mdl.getConversationTimeout
+ override def getConversationDepth: Int = mdl.getConversationDepth
+ override def getRestrictedCombinations: util.Map[String, util.Set[String]] =
+ mdl.getRestrictedCombinations
+
+ override def onInit(): Unit = mdl.onInit()
+ override def onDiscard(): Unit = mdl.onDiscard()
+
+ override def metaOpt[T](prop: String): Optional[T] = mdl.metaOpt(prop)
+ override def meta[T](prop: String): T = mdl.meta(prop)
+ override def metax[T](prop: String): T = mdl.metax(prop)
+ override def meta[T](prop: String, dflt: T): T = mdl.meta(prop, dflt)
+ }
+ else
+ mdl
+
+ for (elm <- wrappedMdl.getElements.asScala)
if (!elm.getId.matches(ID_REGEX))
throw new NCE(
- s"Model element ID does not match regex [" +
+ s"Model element ID does not match regex [" +
s"mdlId=$mdlId, " +
s"elmId=${elm.getId}, " +
s"regex=$ID_REGEX" +
- s"]"
+ s"]"
)
- }
- checkMacros(mdl)
+ checkMacros(wrappedMdl)
val parser = new NCMacroParser
// Initialize macro parser.
- mdl.getMacros.asScala.foreach(t => parser.addMacro(t._1, t._2))
+ wrappedMdl.getMacros.asScala.foreach(t => parser.addMacro(t._1, t._2))
- for (elm <- mdl.getElements.asScala)
- checkElement(mdl, elm)
+ for (elm <- wrappedMdl.getElements.asScala)
+ checkElement(wrappedMdl, elm)
- checkElementIdsDups(mdl)
- checkCyclicDependencies(mdl)
+ checkElementIdsDups(wrappedMdl)
+ checkCyclicDependencies(wrappedMdl)
/**
*
@@ -211,9 +309,9 @@
else
NCNlpCoreManager.stem(word)
- val addStopWords = checkAndStemmatize(mdl.getAdditionalStopWords, "additionalStopWords")
- val exclStopWords = checkAndStemmatize(mdl.getExcludedStopWords, "excludedStopWords")
- val suspWords = checkAndStemmatize(mdl.getSuspiciousWords, "suspiciousWord")
+ val addStopWords = checkAndStemmatize(wrappedMdl.getAdditionalStopWords, "additionalStopWords")
+ val exclStopWords = checkAndStemmatize(wrappedMdl.getExcludedStopWords, "excludedStopWords")
+ val suspWords = checkAndStemmatize(wrappedMdl.getSuspiciousWords, "suspiciousWord")
checkStopwordsDups(mdlId, addStopWords, exclStopWords)
@@ -224,26 +322,26 @@
def sparse(syns: Set[SynonymHolder], sp: Boolean): Set[SynonymHolder] = syns.filter(s => ok(s.syn.sparse, sp))
var cnt = 0
- val maxCnt = mdl.getMaxTotalSynonyms
+ val maxCnt = wrappedMdl.getMaxTotalSynonyms
// Process and check elements.
- for (elm <- mdl.getElements.asScala) {
+ for (elm <- wrappedMdl.getElements.asScala) {
val elmId = elm.getId
// Checks before macros processing.
- val susp = elm.getSynonyms.asScala.filter(syn => SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
+ val susp = elm.getSynonyms.asScala.filter(syn => !syn.contains("//") && SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
if (susp.nonEmpty)
logger.warn(
- s"Suspicious synonyms detected [" +
+ s"Suspicious synonyms detected (use of ${SUSP_SYNS_CHARS.map(s => s"'$s'").mkString(", ")} chars) [" +
s"mdlId=$mdlId, " +
s"elementId=$elmId, " +
s"synonyms=[${susp.mkString(", ")}]" +
s"]"
)
- val sparseElem = elm.isSparse.orElse(mdl.isSparse)
- val permuteElem = elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms)
+ val sparseElem = elm.isSparse.orElse(wrappedMdl.isSparse)
+ val permuteElem = elm.isPermutateSynonyms.orElse(wrappedMdl.isPermutateSynonyms)
def addSynonym(
isElementId: Boolean,
@@ -260,7 +358,7 @@
if (syns.add(holder)) {
cnt += 1
- if (mdl.isMaxSynonymsThresholdError && cnt > maxCnt)
+ if (wrappedMdl.isMaxSynonymsThresholdError && cnt > maxCnt)
throw new NCE(s"Too many total synonyms detected [" +
s"mdlId=$mdlId, " +
s"cnt=$cnt, " +
@@ -348,7 +446,7 @@
chunks ++= U.splitTrimFilter(x.substring(start), " ")
- chunks.map(mkChunk(mdl, _))
+ chunks.map(mkChunk(wrappedMdl, _))
}
/**
@@ -392,10 +490,13 @@
val vals =
(if (elm.getValues != null) elm.getValues.asScala else Seq.empty) ++
(
- elm.getValueLoader.asScala match {
- case Some(ldr) => ldr.load(elm).asScala
- case None => Seq.empty
- }
+ if (elm.getValueLoader == null)
+ Seq.empty
+ else
+ elm.getValueLoader.asScala match {
+ case Some(ldr) => ldr.load(elm).asScala
+ case None => Seq.empty
+ }
)
// Add value synonyms.
@@ -436,7 +537,7 @@
}
}
- if (cnt > maxCnt && !mdl.isMaxSynonymsThresholdError)
+ if (cnt > maxCnt && !wrappedMdl.isMaxSynonymsThresholdError)
logger.warn(
s"Too many total synonyms detected [" +
s"mdlId=$mdlId, " +
@@ -445,7 +546,7 @@
s"]")
// Discard value loaders.
- for (elm <- mdl.getElements.asScala)
+ for (elm <- wrappedMdl.getElements.asScala if elm.getValueLoader != null)
elm.getValueLoader.ifPresent(_.onDiscard())
val allAliases = syns
@@ -463,7 +564,7 @@
s"dups=${allAliases.diff(allAliases.distinct).mkString(", ")}" +
s"]")
- val idAliasDups = mdl.getElements.asScala.map(_.getId).intersect(allAliases.toSet)
+ val idAliasDups = wrappedMdl.getElements.asScala.map(_.getId).intersect(allAliases.toSet)
// Check that IDL aliases don't intersect with element IDs.
if (idAliasDups.nonEmpty)
@@ -486,7 +587,7 @@
}
if (dupSyns.nonEmpty) {
- if (mdl.isDupSynonymsAllowed) {
+ if (wrappedMdl.isDupSynonymsAllowed) {
val tbl = NCAsciiTable("Elements", "Dup Synonym")
dupSyns.foreach(row => tbl += (
@@ -499,14 +600,14 @@
logger.trace(s" ${b("+--")} Model '$mdlId' allows duplicate synonyms but the large number may degrade the performance.")
logger.trace(tbl.toString)
- logger.warn(s"Duplicate synonyms (${dupSyns.size}) found in '$mdlId' model - turn on TRACE logging to see them.")
+ logger.warn(s"Duplicate synonyms (${dupSyns.size}) found in '$mdlId' model - turn on TRACE logging to see if they can be ignored.")
}
else
throw new NCE(s"Duplicated synonyms found and not allowed [mdlId=$mdlId]")
}
// Scan for intent annotations in the model class.
- val intents = scanIntents(mdl)
+ val intents = scanIntents(mdl, wrappedMdl)
var solver: NCIntentSolver = null
@@ -516,7 +617,7 @@
case ids if ids.nonEmpty =>
throw new NCE(s"Duplicate intent IDs [" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
+ s"origin=${wrappedMdl.getOrigin}, " +
s"ids=${ids.mkString(",")}" +
s"]")
case _ => ()
@@ -535,7 +636,7 @@
val simple = idl(syns.toSet, idl = false)
NCProbeModel(
- model = mdl,
+ model = wrappedMdl,
solver = solver,
intents = intents.map(_._1).toSeq,
callbacks = intents.map(kv => (
@@ -552,8 +653,8 @@
addStopWordsStems = addStopWords,
exclStopWordsStems = exclStopWords,
suspWordsStems = suspWords,
- elements = mdl.getElements.asScala.map(elm => (elm.getId, elm)).toMap,
- samples = scanSamples(mdl)
+ elements = wrappedMdl.getElements.asScala.map(elm => (elm.getId, elm)).toMap,
+ samples = scanSamples(wrappedMdl)
)
}
@@ -1133,9 +1234,9 @@
val allParamTypes = mtd.getParameterTypes.toSeq
- val ctxFirstParam = allParamTypes.nonEmpty && allParamTypes.head == CLS_SLV_CTX
+ val ctxFirstParam = allParamTypes.nonEmpty && allParamTypes.head == CLS_INTENT_MATCH
- def getTokensSeq[T](data: Seq[T]): Seq[T] =
+ def getSeq[T](data: Seq[T]): Seq[T] =
if (data == null)
Seq.empty
else if (ctxFirstParam)
@@ -1144,8 +1245,8 @@
data
val allAnns = mtd.getParameterAnnotations
- val tokParamAnns = getTokensSeq(allAnns.toIndexedSeq).filter(_ != null)
- val tokParamTypes = getTokensSeq(allParamTypes)
+ val tokParamAnns = getSeq(allAnns.toIndexedSeq).filter(_ != null)
+ val tokParamTypes = getSeq(allParamTypes)
// Checks tokens parameters annotations count.
if (tokParamAnns.length != tokParamTypes.length)
@@ -1158,21 +1259,28 @@
// Gets terms IDs.
val termIds = tokParamAnns.toList.zipWithIndex.map {
- case (anns, idx) =>
+ case (annArr, idx) =>
def mkArg(): String = arg2Str(mtd, idx, ctxFirstParam)
- val annsTerms = anns.filter(_.isInstanceOf[NCIntentTerm])
+ val termAnns = annArr.filter(_.isInstanceOf[NCIntentTerm])
// Each method arguments (second and later) must have one NCIntentTerm annotation.
- annsTerms.length match {
- case 1 => annsTerms.head.asInstanceOf[NCIntentTerm].value()
+ termAnns.length match {
+ case 1 => termAnns.head.asInstanceOf[NCIntentTerm].value()
case 0 =>
- throw new NCE(s"Missing @NCIntentTerm annotation for [" +
- s"mdlId=$mdlId, " +
- s"intentId=${intent.id}, " +
- s"arg=${mkArg()}" +
- s"]")
+ if (idx == 0)
+ throw new NCE(s"Missing @NCIntentTerm annotation or wrong type of the 1st parameter (must be 'NCIntentMatch') for [" +
+ s"mdlId=$mdlId, " +
+ s"intentId=${intent.id}, " +
+ s"arg=${mkArg()}" +
+ s"]")
+ else
+ throw new NCE(s"Missing @NCIntentTerm annotation for [" +
+ s"mdlId=$mdlId, " +
+ s"intentId=${intent.id}, " +
+ s"arg=${mkArg()}" +
+ s"]")
case _ =>
throw new NCE(s"Too many @NCIntentTerm annotations for [" +
@@ -1208,7 +1316,7 @@
s"]")
}
- val paramGenTypes = getTokensSeq(mtd.getGenericParameterTypes.toIndexedSeq)
+ val paramGenTypes = getSeq(mtd.getGenericParameterTypes.toIndexedSeq)
require(tokParamTypes.length == paramGenTypes.length)
@@ -1534,17 +1642,130 @@
* @param mdl
*/
@throws[NCE]
- private def scanIntents(mdl: NCModel): Set[Intent] = {
- val cl = Thread.currentThread().getContextClassLoader
+ private def scanElementsAnnotations(mdl: NCModel): Set[NCElement] = {
+ val elems = mutable.HashSet.empty[NCElement]
- val mdlId = mdl.getId
+ def scan(claxx: Class[_]): Unit = {
+ val allClassAnns = mutable.ArrayBuffer.empty[NCAddElement]
+
+ def add(anns: Array[NCAddElement]): Unit = if (anns != null) allClassAnns ++= anns.toSeq
+
+ // Class.
+ add(claxx.getAnnotationsByType(CLS_ELEM_DEF))
+
+ // All class's methods.
+ getAllMethods(claxx).foreach(m => add(m.getAnnotationsByType(CLS_ELEM_DEF)))
+
+ allClassAnns.foreach(a => {
+ val body = a.value().strip
+ val expectedJson = body.head == '{'
+
+ val jsElem =
+ try
+ (if (expectedJson) MAPPER_JSON else MAPPER_YAML).readValue(body, classOf[NCElementJson])
+ catch {
+ case e: Exception =>
+ // TODO: fix text
+ throw new NCE(s"Error parsing element[" +
+ s"modelId=${mdl.getId}, " +
+ s"definitionClass=${claxx.getName}, " +
+ s"element='$body', " +
+ s"expectedFormat=${if (expectedJson) "JSON" else "YAML"}" +
+ s"]",
+ e
+ )
+ }
+
+ val elem =
+ new NCElement {
+ private var loader: NCValueLoader = _
+
+ private def nvl[T](arr: Array[T]): Seq[T] = if (arr == null) Seq.empty else arr.toSeq
+
+ override def getId: String = jsElem.getId
+ override def getDescription: String = jsElem.getDescription
+ override def getParentId: String = jsElem.getParentId
+
+ override def getGroups: util.List[String] = nvl(jsElem.getGroups).asJava
+ override def getMetadata: util.Map[String, AnyRef] = jsElem.getMetadata
+ override def getSynonyms: util.List[String] = nvl(jsElem.getSynonyms).asJava
+
+ override def getValues: util.List[NCValue] =
+ nvl(jsElem.getValues).map(v => new NCValue {
+ override def getName: String = v.getName
+ override def getSynonyms: util.List[String] = nvl(v.getSynonyms).asJava
+ }).asJava
+
+ override def getValueLoader: Optional[NCValueLoader] =
+ if (jsElem.getValueLoader != null) {
+ if (loader == null) {
+ loader = U.mkObject(jsElem.getValueLoader)
+
+ loader.onInit()
+ }
+
+ Optional.of(loader)
+ }
+ else
+ Optional.empty()
+
+ override def isPermutateSynonyms: Optional[java.lang.Boolean] =
+ Optional.ofNullable(jsElem.isPermutateSynonyms)
+ override def isSparse: Optional[java.lang.Boolean] =
+ Optional.ofNullable(jsElem.isSparse)
+ }
+
+ elems += elem
+
+ val allClassDefAnns = mutable.ArrayBuffer.empty[NCAddElementClass]
+
+ def addClass(anns: Array[NCAddElementClass]): Unit = if (anns != null) allClassDefAnns ++= anns.toSeq
+
+ addClass(claxx.getAnnotationsByType(CLS_ELEM_DEF_CLASS))
+ getAllMethods(claxx).foreach(m => addClass(m.getAnnotationsByType(CLS_ELEM_DEF_CLASS)))
+
+ allClassDefAnns.foreach(cl =>
+ try
+ elems += cl.value().getDeclaredConstructor().newInstance().asInstanceOf[NCElement]
+ catch {
+ case e: Exception => throw new NCE(s"Failed to instantiate element for: ${cl.value()}", e)
+ }
+ )
+ })
+ }
+
+ val claxx = Class.forName(mdl.meta[String](MDL_META_MODEL_CLASS_KEY))
+
+ scan(claxx)
+
+ val classesRef = claxx.getAnnotationsByType(CLS_MDL_CLS_REF)
+
+ if (classesRef != null && classesRef.nonEmpty)
+ classesRef.head.value().foreach(scan)
+
+ val packRef = claxx.getAnnotationsByType(CLS_MDL_PKGS_REF)
+
+ if (packRef != null && packRef.nonEmpty)
+ packRef.head.value().flatMap(pack => getPackageClasses(mdl, pack)).foreach(scan)
+
+ elems.toSet
+ }
+
+ /**
+ *
+ * @param mdl
+ * @param wrappedMdl
+ */
+ @throws[NCE]
+ private def scanIntents(mdl: NCModel, wrappedMdl: NCModel): Set[Intent] = {
+ val mdlId = wrappedMdl.getId
val intentDecls = mutable.Buffer.empty[NCIdlIntent]
val intents = mutable.Buffer.empty[Intent]
// First, get intent declarations from the JSON/YAML file, if any.
mdl match {
case adapter: NCModelFileAdapter =>
- intentDecls ++= adapter.getIntents.asScala.flatMap(NCIdlCompiler.compileIntents(_, mdl, mdl.getOrigin))
+ intentDecls ++= adapter.getIntents.asScala.flatMap(NCIdlCompiler.compileIntents(_, wrappedMdl, wrappedMdl.getOrigin))
case _ => ()
}
@@ -1553,12 +1774,12 @@
try
for (
ann <- cls.getAnnotationsByType(CLS_INTENT);
- intent <- NCIdlCompiler.compileIntents(ann.value(), mdl, cls.getName)
+ intent <- NCIdlCompiler.compileIntents(ann.value(), wrappedMdl, cls.getName)
)
if (intentDecls.exists(_.id == intent.id))
throw new NCE(s"Duplicate intent ID [" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
+ s"origin=${wrappedMdl.getOrigin}, " +
s"class=$cls, " +
s"id=${intent.id}" +
s"]")
@@ -1580,13 +1801,13 @@
if (intents.exists(i => i._1.id == intent.id && i._2.id != cb.id))
throw new NCE(s"The intent cannot be bound to more than one callback [" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
+ s"origin=${wrappedMdl.getOrigin}, " +
s"class=${mo.objClassName}, " +
s"intentId=${intent.id}" +
s"]")
else {
intentDecls += intent
- intents += (intent -> prepareCallback(mo, mdl, intent))
+ intents += (intent -> prepareCallback(mo, wrappedMdl, intent))
}
def existsForOtherMethod(id: String): Boolean =
@@ -1598,28 +1819,28 @@
// Process inline intent declarations by @NCIntent annotation.
for (
ann <- m.getAnnotationsByType(CLS_INTENT);
- intent <- NCIdlCompiler.compileIntents(ann.value(), mdl, mtdStr)
+ intent <- NCIdlCompiler.compileIntents(ann.value(), wrappedMdl, mtdStr)
)
if (intentDecls.exists(_.id == intent.id && existsForOtherMethod(intent.id)))
throw new NCE(s"Duplicate intent ID [" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
+ s"origin=${wrappedMdl.getOrigin}, " +
s"callback=$mtdStr, " +
s"id=${intent.id}" +
s"]")
else
- bindIntent(intent, prepareCallback(mo, mdl, intent))
+ bindIntent(intent, prepareCallback(mo, wrappedMdl, intent))
// Process intent references from @NCIntentRef annotation.
for (ann <- m.getAnnotationsByType(CLS_INTENT_REF)) {
val refId = ann.value().trim
intentDecls.find(_.id == refId) match {
- case Some(intent) => bindIntent(intent, prepareCallback(mo, mdl, intent))
+ case Some(intent) => bindIntent(intent, prepareCallback(mo, wrappedMdl, intent))
case None => throw new NCE(
s"""@NCIntentRef("$refId") references unknown intent ID [""" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
+ s"origin=${wrappedMdl.getOrigin}, " +
s"refId=$refId, " +
s"callback=$mtdStr" +
s"]"
@@ -1648,7 +1869,7 @@
throw new NCE(
s"Additional reference in @${clazz.getSimpleName} annotation is empty [" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}" +
+ s"origin=${wrappedMdl.getOrigin}" +
s"]"
)
@@ -1667,23 +1888,7 @@
// Process @NCModelAddPackages annotation.
scanAdditionalClasses(
CLS_MDL_PKGS_REF,
- (a: NCModelAddPackage) =>
- a.value().toIndexedSeq.flatMap(p => {
- //noinspection UnstableApiUsage
- val res = ClassPath.from(cl).getTopLevelClassesRecursive(p).asScala.map(_.load())
-
- // Check should be after classes loading attempt.
- if (cl.getDefinedPackage(p) == null)
- throw new NCE(
- s"Invalid additional references in @${CLS_MDL_PKGS_REF.getSimpleName} annotation [" +
- s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
- s"package=$p" +
- s"]"
- )
-
- res
- })
+ (a: NCModelAddPackage) => a.value().toIndexedSeq.flatMap(p => getPackageClasses(wrappedMdl, p))
)
val unusedIntents = intentDecls.filter(i => !intents.exists(_._1.id == i.id))
@@ -1691,14 +1896,40 @@
if (unusedIntents.nonEmpty)
logger.warn(s"Intents are unused (have no callback): [" +
s"mdlId=$mdlId, " +
- s"origin=${mdl.getOrigin}, " +
+ s"origin=${wrappedMdl.getOrigin}, " +
s"intentIds=${unusedIntents.map(_.id).mkString("(", ", ", ")")}]"
)
-
+
intents.toSet
}
/**
+ *
+ * @param mdl
+ * @param pack
+ * @return
+ */
+ @throws[NCE]
+ private def getPackageClasses(mdl: NCModel, pack: String): Set[Class[_]] = {
+ val cl = Thread.currentThread().getContextClassLoader
+
+ //noinspection UnstableApiUsage
+ val res = ClassPath.from(cl).getTopLevelClassesRecursive(pack).asScala.map(_.load())
+
+ // Check should be after classes loading attempt.
+ if (cl.getDefinedPackage(pack) == null)
+ throw new NCE(
+ s"Invalid additional references in @${CLS_MDL_PKGS_REF.getSimpleName} annotation [" +
+ s"mdlId=${mdl.getId}, " +
+ s"origin=${mdl.getOrigin}, " +
+ s"package=$pack" +
+ s"]"
+ )
+
+ res.toSet
+ }
+
+ /**
* Scans given model for intent samples.
*
* @param mdl Model to scan.
@@ -1721,25 +1952,46 @@
if (intAnns.isEmpty && refAnns.isEmpty)
throw new NCE(s"@NCIntentSample or @NCIntentSampleRef annotations without corresponding @NCIntent or @NCIntentRef annotations: $mtdStr")
else {
- def read[T](arr: Array[T], annName: String, getValue: T => Seq[String]): Seq[Seq[String]] = {
- val seq = arr.toSeq.map(getValue).map(_.map(_.strip).filter(s => s.nonEmpty && s.head != '#'))
+ /**
+ *
+ * @param annArr
+ * @param annName
+ * @param getSamples
+ * @param getSource
+ * @tparam T
+ * @return
+ */
+ def read[T](
+ annArr: Array[T],
+ annName: String,
+ getSamples: T => Seq[String],
+ getSource: Option[T => String]): Seq[Seq[String]] = {
+ for (ann <- annArr.toSeq) yield {
+ val samples = getSamples(ann).map(_.strip).filter(s => s.nonEmpty && s.head != '#')
- if (seq.exists(_.isEmpty))
- logger.warn(s"$annName annotation has no samples: $mtdStr")
+ if (samples.isEmpty) {
+ getSource match {
+ case None => logger.warn(s"$annName annotation has no samples: $mtdStr")
+ case Some(f) => logger.warn(s"$annName annotation references '${f(ann)}' file that has no samples: $mtdStr")
+ }
- seq
- }
+ Seq.empty
+ }
+ else
+ samples
+ }
+ }.filter(_.nonEmpty)
val seqSeq =
read[NCIntentSample](
- smpAnns, "@NCIntentSample", _.value().toSeq
+ smpAnns, "@NCIntentSample", _.value().toSeq, None
) ++
read[NCIntentSampleRef](
- smpAnnsRef, "@NCIntentSampleRef", a => U.readAnySource(a.value())
+ smpAnnsRef, "@NCIntentSampleRef", a => U.readAnySource(a.value()), Some(_.value())
)
if (U.containsDups(seqSeq.flatMap(_.toSeq).toList))
- logger.warn(s"@NCIntentSample and @NCIntentSampleRef annotations have duplicates (safely ignoring): $mtdStr")
+ logger.warn(s"@NCIntentSample and @NCIntentSampleRef annotations have duplicates: $mtdStr")
val distinct = seqSeq.map(_.distinct).distinct
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/dialogflow/NCDialogFlowManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/dialogflow/NCDialogFlowManager.scala
index b7e667d..c90f918 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/dialogflow/NCDialogFlowManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/dialogflow/NCDialogFlowManager.scala
@@ -177,7 +177,7 @@
}
val tbl = NCAsciiTable(
- "",
+ "#",
"Intent ID",
"Sever Request ID",
"Text",
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index e0febb3..4f93936 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -54,6 +54,13 @@
logger.info(s"Models deployed: ${data.size}")
+ if (data.isEmpty) {
+ logger.error(s"To start the probe provide $BO${R}at least one model:$RST")
+ logger.error(" +-- Check probe configuration ('probe.conf' file) or -DNLPCRAFT_TEST_MODELS system property if using embedded probe.")
+
+ throw new NCE(s"Probe requires at least one model deployed to start.")
+ }
+
data.values.foreach(pm => {
val mdl = pm.model
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 4b6c697..b3fe3e1 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -44,6 +44,7 @@
import org.apache.nlpcraft.probe.mgrs.nlp.impl._
import org.apache.nlpcraft.probe.mgrs.nlp.validate._
import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
import java.io.Serializable
@@ -232,7 +233,7 @@
val tbl = NCAsciiTable()
- tbl += (s"${b("Text")}", nlpSens.map(s => rv(s.text)))
+ tbl += (s"${b("Text")}", nlpSens.map(s => rv(" " + s.text + " ")))
tbl += (s"${b("Model ID")}", mdlId)
tbl += (s"${b("User:")}", "")
tbl += (s"${b(" ID")}", usrId)
@@ -294,6 +295,9 @@
): Unit = {
require(errMsg.isDefined || (resType.isDefined && resBody.isDefined))
+ NCSentenceManager.clearRequestData(srvReqId)
+ NCSynonymsManager.clearRequestData(srvReqId)
+
val msg = NCProbeMessage(msgName)
msg.addData("srvReqId", srvReqId)
@@ -346,28 +350,23 @@
val durMs = U.now() - startMs.get
- if (errMsg.isEmpty)
- logger.info(s"" +
- s"\n" +
- s"${g("|>")}\n" +
- s"${g("|>")} ${bo(g("SUCCESS"))} result sent back to server [" +
- s"srvReqId=${m(srvReqId)}, " +
- s"type=${resType.getOrElse("")}, " +
- s"dur=${durMs}ms" +
- s"]\n" +
- s"${g("|>")}"
- )
- else
- logger.info(s"" +
- s"\n" +
- s"${r("|X")}\n" +
- s"${r("|X")} ${bo(r("REJECT"))} result sent back to server [" +
- s"srvReqId=${m(srvReqId)}, " +
- s"response=${errMsg.get}, " +
- s"dur=${durMs}ms" +
- s"]\n" +
- s"${r("|X")}"
- )
+ val tbl = NCAsciiTable()
+
+ if (errMsg.isEmpty) {
+ tbl += (s"${gb(w(" SUCCESS "))}", "")
+ tbl += (s"${g("---------")}", "")
+ tbl += (s"${b("Result type")}", resType.getOrElse(""))
+ }
+ else {
+ tbl += (s"${rb(w(" REJECT "))}", "")
+ tbl += (s"${r("--------")}", "")
+ tbl += (s"${r("Error")}", s"${r(errMsg.get)}")
+ }
+
+ tbl += (s"${b("Probe duration")}", s"${durMs}ms")
+ tbl += (s"${b("Server Request ID")}", m(srvReqId))
+
+ logger.info(s"\n$tbl")
}
val mdl = NCModelManager.getModel(mdlId, span)
@@ -526,8 +525,6 @@
)
})
- NCSentenceManager.clearCache(srvReqId)
-
// Final validation before execution.
try
sensSeq.foreach(NCValidateManager.postValidate(mdl, _, span))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6e6f7d1..7196985 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,19 +19,20 @@
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
+import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
import org.apache.nlpcraft.common.nlp.{NCNlpSentence => Sentence, NCNlpSentenceNote => NlpNote, NCNlpSentenceToken => NlpToken}
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
+import org.apache.nlpcraft.model.impl.NCTokenImpl
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
+import org.apache.nlpcraft.probe.mgrs.{NCProbeIdlToken => IdlToken, NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
import java.io.Serializable
import java.util.{List => JList}
import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
import scala.collection.parallel.CollectionConverters._
import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, MapHasAsScala, SeqHasAsJava}
@@ -40,78 +41,14 @@
*/
object NCModelEnricher extends NCProbeEnricher {
type TokType = (NCToken, NCSynonymChunkKind)
- type Cache = mutable.Map[String, ArrayBuffer[Seq[Int]]]
- object Complex {
- def apply(t: NCToken): Complex =
- Complex(
- data = Left(t),
- isToken = true,
- isWord = false,
- token = t,
- word = null,
- origText = t.origText,
- wordIndexes = t.wordIndexes.toSet,
- minIndex = t.wordIndexes.head,
- maxIndex = t.wordIndexes.last
- )
-
- def apply(t: NlpToken): Complex =
- Complex(
- data = Right(t),
- isToken = false,
- isWord = true,
- token = null,
- word = t,
- origText = t.origText,
- wordIndexes = t.wordIndexes.toSet,
- minIndex = t.wordIndexes.head,
- maxIndex = t.wordIndexes.last
- )
+ object IdlTokensSeq {
+ def apply(all: Seq[IdlToken]): IdlTokensSeq = IdlTokensSeq(all.filter(_.isToken), all.flatMap(_.wordIndexes).toSet)
}
- case class Complex(
- data: NCIdlContent,
- isToken: Boolean,
- isWord: Boolean,
- token: NCToken,
- word: NlpToken,
- origText: String,
- wordIndexes: Set[Int],
- minIndex: Int,
- maxIndex: Int
- ) {
- private final val hash = if (isToken) Seq(wordIndexes, token.getId).hashCode() else wordIndexes.hashCode()
-
- override def hashCode(): Int = hash
-
- def isSubsetOf(minIndex: Int, maxIndex: Int, indexes: Set[Int]): Boolean =
- if (this.minIndex > maxIndex || this.maxIndex < minIndex)
- false
- else
- wordIndexes.subsetOf(indexes)
-
- override def equals(obj: Any): Boolean = obj match {
- case x: Complex =>
- hash == x.hash && (isToken && x.isToken && token == x.token || isWord && x.isWord && word == x.word)
- case _ => false
- }
-
- // Added for debug reasons.
- override def toString: String = {
- val idxs = wordIndexes.mkString(",")
-
- if (isToken && token.getId != "nlpcraft:nlp") s"'$origText' (${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
- }
- }
-
- object ComplexSeq {
- def apply(all: Seq[Complex]): ComplexSeq = ComplexSeq(all.filter(_.isToken), all.flatMap(_.wordIndexes).toSet)
- }
-
- case class ComplexSeq(tokensComplexes: Seq[Complex], wordsIndexes: Set[Int]) {
+ case class IdlTokensSeq(tokens: Seq[IdlToken], wordsIndexes: Set[Int]) {
private val (idxsSet: Set[Int], minIndex: Int, maxIndex: Int) = {
- val seq = tokensComplexes.flatMap(_.wordIndexes).distinct.sorted
+ val seq = tokens.flatMap(_.wordIndexes).distinct.sorted
(seq.toSet, seq.head, seq.last)
}
@@ -122,10 +59,10 @@
else
this.idxsSet.exists(idxsSet.contains)
- override def toString: String = tokensComplexes.mkString(" | ")
+ override def toString: String = tokens.mkString(" | ")
}
- case class ComplexHolder(complexesWords: Seq[Complex], complexes: Seq[ComplexSeq])
+ case class IdlTokensHolder(tokens: Seq[IdlToken], seqs: Seq[IdlTokensSeq])
/**
*
@@ -282,23 +219,65 @@
}
/**
- * Gets all sequential permutations of given tokens.
*
- * For example, if buffer contains "a b c d" tokens, then this function will return the
- * sequence of following token sequences in this order:
- * "a b c d"
- * "a b c"
- * "b c d"
- * "a b"
- * "b c"
- * "c d"
- * "a"
- * "b"
- * "c"
- * "d"
+ * 1. Prepares combination of tokens (sliding).
+ * Example: 'A B C D' -> {'A B C', 'A B', 'B C', 'A', 'B', 'C'}
+ * One sentence converted to 4 pieces.
+ *
+ * 2. Additionally, each piece converted into set of elements with all possible its stopwords permutations.
+ * Example: Piece: 'x1, x2(stopword), x3(stopword), x4' will be expanded into
+ * {'x1, x2, x3, x4', 'x1, x2, x4', 'x1, x3, x4', 'x1, x4'}
+ *
+ * 3. All variants collected, duplicated deleted, etc.
*
* @param toks
- * @return
+ */
+ private def combosTokens(toks: Seq[NlpToken]): Seq[(Seq[NlpToken], Seq[NlpToken])] =
+ combos(toks).flatMap(combo => {
+ val stops = combo.filter(s => s.isStopWord && s != combo.head && s != combo.last)
+
+ val slides = mutable.ArrayBuffer.empty[mutable.ArrayBuffer[NlpToken]]
+
+ for (stop <- stops)
+ if (slides.nonEmpty && slides.last.last.index + 1 == stop.index)
+ slides.last += stop
+ else
+ slides += mutable.ArrayBuffer.empty :+ stop
+
+ // Too many stopords inside skipped.
+ val bigSlides = slides.filter(_.size > 2)
+
+ var stops4Delete: Seq[Seq[NlpToken]] =
+ if (bigSlides.nonEmpty) {
+ val allBig = bigSlides.flatten
+ val stops4AllCombs = stops.filter(p => !allBig.contains(p))
+
+ if (stops4AllCombs.nonEmpty)
+ for (
+ seq1 <- Range.inclusive(0, stops4AllCombs.size).flatMap(stops4AllCombs.combinations);
+ seq2 <- Range.inclusive(0, bigSlides.size).flatMap(bigSlides.combinations)
+ )
+ yield seq1 ++ seq2.flatten
+ else
+ for (seq <- Range.inclusive(0, bigSlides.size).flatMap(bigSlides.combinations))
+ yield seq.toSeq.flatten
+ }
+ else
+ Range.inclusive(1, stops.size).flatMap(stops.combinations)
+
+ stops4Delete = stops4Delete.filter(seq => !seq.contains(combo.head) && !seq.contains(combo.last))
+
+ (Seq(combo) ++ stops4Delete.map(del => combo.filter(t => !del.contains(t)))).map(_ -> combo).distinct
+
+ }).
+ filter(_._1.nonEmpty).
+ groupBy(_._1).
+ map(p => p._1 -> p._2.map(_._2).minBy(p => (-p.size, p.head.index))).
+ sortBy { case(data, combo) => (-combo.size, -data.size, combo.head.index, data.head.index) }
+
+ /**
+ *
+ * @param toks
*/
private def combos[T](toks: Seq[T]): Seq[Seq[T]] =
(for (n <- toks.size until 0 by -1) yield toks.sliding(n)).flatten.map(p => p)
@@ -308,9 +287,12 @@
* @param seq
* @param s
*/
- private def toParts(seq: Seq[NCIdlContent], s: Synonym): Seq[TokType] =
+ private def toParts(mdl: NCProbeModel, stvReqId: String, seq: Seq[IdlToken], s: Synonym): Seq[TokType] =
seq.zip(s.map(_.kind)).flatMap {
- case (complex, kind) => if (complex.isLeft) Some(complex.swap.toOption.get -> kind) else None
+ case (idlTok, kind) =>
+ val t = if (idlTok.isToken) idlTok.token else mkNlpToken(mdl, stvReqId, idlTok.word)
+
+ Some(t -> kind)
}
/**
@@ -318,10 +300,10 @@
* @param tows
* @param ns
*/
- private def toTokens(tows: Seq[NCIdlContent], ns: Sentence): Seq[NlpToken] =
+ private def toTokens(tows: Seq[IdlToken], ns: Sentence): Seq[NlpToken] =
(
- tows.filter(_.isRight).map(_.toOption.get) ++
- tows.filter(_.isLeft).map(_.swap.toOption.get).
+ tows.filter(_.isWord).map(_.word) ++
+ tows.filter(_.isToken).map(_.token).
flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
).sortBy(_.startCharIndex)
@@ -329,7 +311,6 @@
*
* @param m
* @param id
- * @return
*/
private def get(m: Map[String , Seq[Synonym]], id: String): Seq[Synonym] = m.getOrElse(id, Seq.empty)
@@ -349,10 +330,10 @@
* @param mdl
* @param ns
*/
- private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
- val complexesWords = ns.map(Complex(_))
+ private def mkHolder(mdl: NCProbeModel, ns: Sentence): IdlTokensHolder = {
+ val toks = ns.map(IdlToken(_))
- val complexes =
+ val seqs =
NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).
map(_.asScala).
par.
@@ -371,15 +352,29 @@
// Single word token is not split as words - token.
// Partly (not strict in) token - word.
if (t.wordIndexes.length == 1 || senPartComb.contains(t))
- Seq(Complex(t))
+ Seq(IdlToken(t))
else
- t.wordIndexes.map(complexesWords)
+ t.wordIndexes.map(toks)
)
// Drops without tokens (IDL part works with tokens).
- }).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
+ }).filter(_.exists(_.isToken)).map(IdlTokensSeq(_)).distinct
).seq
- ComplexHolder(complexesWords, complexes)
+ IdlTokensHolder(toks, seqs)
+ }
+
+ /**
+ *
+ * @param mdl
+ * @param srvReqId
+ * @param t
+ */
+ private def mkNlpToken(mdl: NCProbeModel, srvReqId: String, t: NlpToken): NCToken = {
+ val notes = mutable.HashSet.empty[NlpNote]
+
+ notes += t.getNlpNote
+
+ NCTokenImpl(mdl, srvReqId, NlpToken(t.index, notes, t.stopsReasons))
}
/**
@@ -387,60 +382,37 @@
* @param h
* @param toks
*/
- private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: Set[Seq[Complex]]): Seq[Seq[Complex]] = {
+ private def mkCombinations(h: IdlTokensHolder, toks: Seq[NlpToken]): Seq[Seq[IdlToken]] = {
val idxs = toks.flatMap(_.wordIndexes).toSet
- h.complexes.par.
- flatMap(complexSeq => {
- val rec = complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxs.contains))
+ h.seqs.par.
+ flatMap(seq => {
+ val rec = seq.tokens.filter(_.wordIndexes.exists(idxs.contains))
// Drops without tokens (IDL part works with tokens).
- if (rec.nonEmpty) {
- val data = rec ++
- (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.complexesWords)
-
- if (!cache.contains(data)) Some(data) else None
- }
+ if (rec.nonEmpty)
+ Some(rec ++
+ (seq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.tokens)
+ )
else
None
}).seq
}
- private def add(
- dbgType: String,
- ns: Sentence,
- contCache: Cache,
- elemId: String,
- greedy: Boolean,
- elemToks: Seq[NlpToken],
- sliceToksIdxs: Seq[Int],
- syn: Synonym,
- parts: Seq[TokType] = Seq.empty
- ): Unit = {
- val resIdxs = elemToks.map(_.index)
- val resIdxsSorted = resIdxs.sorted
+ /**
+ *
+ * @param matched
+ * @param toks2Match
+ */
+ private def getSparsedTokens(matched: Seq[NlpToken], toks2Match: Seq[NlpToken]): Seq[NlpToken] = {
+ require(matched.nonEmpty)
- if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
- contCache(elemId) += sliceToksIdxs
+ // Matched tokens should be already sorted.
+ val stopsInside = toks2Match.filter(t =>
+ t.isStopWord && !matched.contains(matched) && t.index > matched.head.index && t.index < matched.last.index
+ )
- val ok =
- (!greedy || !alreadyMarked(ns, elemId, elemToks, sliceToksIdxs)) &&
- ( parts.isEmpty || !parts.exists { case (t, _) => t.getId == elemId })
-
- if (ok)
- mark(ns, elemId, elemToks, direct = syn.isDirect && U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
-
- if (DEEP_DEBUG)
- logger.trace(
- s"${if (ok) "Added" else "Skipped"} element [" +
- s"id=$elemId, " +
- s"type=$dbgType, " +
- s"text='${elemToks.map(_.origText).mkString(" ")}', " +
- s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
- s"allTokensIndexes=${sliceToksIdxs.mkString("[", ",", "]")}, " +
- s"synonym=$syn" +
- s"]"
- )
+ if (stopsInside.nonEmpty) (matched ++ stopsInside).sortBy(_.index) else matched
}
@throws[NCE]
@@ -451,8 +423,12 @@
"enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
) { span =>
val req = NCRequestImpl(senMeta, ns.srvReqId)
- val combToks = combos(ns.toSeq)
- lazy val ch = mkComplexes(mdl, ns)
+
+ lazy val ch = mkHolder(mdl, ns)
+ lazy val variantsToks =
+ ch.seqs.map(
+ p => p.tokens.map(p => if (p.isToken) p.token else mkNlpToken(mdl, ns.srvReqId, p.word))
+ )
def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
startScopedSpan(
@@ -461,44 +437,80 @@
if (DEEP_DEBUG)
logger.trace(s"Execution started [simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]")
- val contCache = mutable.HashMap.empty ++
- mdl.elements.keys.map(k => k -> mutable.ArrayBuffer.empty[Seq[Int]])
- lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]
-
for (
- toks <- combToks;
+ // 'toksExt' is piece of sentence, 'toks' is the same as 'toksExt' or without some stopwords set.
+ (toks, toksExt) <- combosTokens(ns.toSeq);
idxs = toks.map(_.index);
e <- mdl.elements.values;
- eId = e.getId;
+ elemId = e.getId;
greedy = e.isGreedy.orElse(mdl.model.isGreedy)
- if
- !greedy ||
- !contCache(eId).exists(_.containsSlice(idxs)) && !alreadyMarked(ns, eId, toks, idxs)
+ if !greedy || !alreadyMarked(ns, elemId, toks, idxs)
) {
+ def add(
+ dbgType: String,
+ elemToks: Seq[NlpToken],
+ syn: Synonym,
+ parts: Seq[TokType] = Seq.empty
+ ): Unit = {
+ val resIdxs = elemToks.map(_.index)
+
+ val ok =
+ (!greedy || !alreadyMarked(ns, elemId, elemToks, idxs)) &&
+ ( parts.isEmpty || !parts.exists { case (t, _) => t.getId == elemId })
+
+ if (ok)
+ mark(
+ ns,
+ elemId,
+ elemToks,
+ direct = syn.isDirect && U.isIncreased(resIdxs),
+ syn = Some(syn),
+ parts = parts
+ )
+
+ if (DEEP_DEBUG)
+ logger.trace(
+ s"${if (ok) "Added" else "Skipped"} element [" +
+ s"id=$elemId, " +
+ s"type=$dbgType, " +
+ s"text='${elemToks.map(_.origText).mkString(" ")}', " +
+ s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
+ s"allTokensIndexes=${idxs.mkString("[", ",", "]")}, " +
+ s"synonym=$syn" +
+ s"]"
+ )
+ }
+
// 1. SIMPLE.
- if (simpleEnabled && (if (idlEnabled) mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
+ if (simpleEnabled && (if (idlEnabled) mdl.hasIdlSynonyms(elemId) else !mdl.hasIdlSynonyms(elemId))) {
lazy val tokStems = toks.map(_.stem).mkString(" ")
// 1.1 Continuous.
var found = false
if (mdl.hasContinuousSynonyms)
- fastAccess(mdl.continuousSynonyms, eId, toks.length) match {
+ fastAccess(mdl.continuousSynonyms, elemId, toks.length) match {
case Some(h) =>
def tryMap(syns: Map[String, Synonym], notFound: () => Unit): Unit =
syns.get(tokStems) match {
case Some(s) =>
found = true
- add("simple continuous", ns, contCache, eId, greedy, toks, idxs, s)
+ add("simple continuous", toksExt, s)
case None => notFound()
}
def tryScan(syns: Seq[Synonym]): Unit =
- for (s <- syns if !found)
- if (s.isMatch(toks)) {
- found = true
- add("simple continuous scan", ns, contCache, eId, greedy, toks, idxs, s)
- }
+ for (syn <- syns if !found)
+ NCSynonymsManager.onMatch(
+ ns.srvReqId,
+ elemId,
+ syn,
+ toks,
+ _ => {
+ found = true
+ add("simple continuous scan", toksExt, syn)
+ }
+ )
tryMap(
h.txtDirectSynonyms,
@@ -514,52 +526,60 @@
// 1.2 Sparse.
if (!found && mdl.hasSparseSynonyms)
- for (s <- get(mdl.sparseSynonyms, eId))
- s.sparseMatch(toks) match {
- case Some(res) => add("simple sparse", ns, contCache, eId, greedy, res, idxs, s)
- case None => // No-op.
- }
+ for (syn <- get(mdl.sparseSynonyms, elemId))
+ NCSynonymsManager.onSparseMatch(
+ ns.srvReqId,
+ elemId,
+ syn,
+ toks,
+ res => add("simple sparse", getSparsedTokens(res, toks), syn)
+ )
}
// 2. IDL.
if (idlEnabled) {
- val allSyns = get(mdl.idlSynonyms, eId)
- lazy val allCombs = mkCombinations(ch, toks, idlCache.toSet)
+ val allSyns = get(mdl.idlSynonyms, elemId)
+ lazy val allCombs = mkCombinations(ch, toks)
// 2.1 Continuous.
-
if (!mdl.hasSparseSynonyms) {
var found = false
- for (
- s <- allSyns;
- comb <- allCombs
- if !found;
- data = comb.map(_.data)
- )
- if (s.isMatch(data, req)) {
- add("IDL continuous", ns, contCache, eId, greedy, toks, idxs, s, toParts(data, s))
+ for (syn <- allSyns; comb <- allCombs; if !found)
+ NCSynonymsManager.onMatch(
+ ns.srvReqId,
+ elemId,
+ syn,
+ comb,
+ req,
+ variantsToks,
+ _ => {
+ val parts = toParts(mdl, ns.srvReqId, comb, syn)
- idlCache += comb
+ add("IDL continuous", toksExt, syn, parts)
- found = true
- }
+ found = true
+ }
+ )
}
else
// 2.2 Sparse.
- for (
- s <- allSyns;
- comb <- allCombs
- )
- s.sparseMatch(comb.map(_.data), req) match {
- case Some(res) =>
- val typ = if (s.sparse) "IDL sparse" else "IDL continuous"
+ for (syn <- allSyns; comb <- allCombs)
+ NCSynonymsManager.onSparseMatch(
+ ns.srvReqId,
+ elemId,
+ syn,
+ comb,
+ req,
+ variantsToks,
+ res => {
+ val toks = getSparsedTokens(toTokens(res, ns), toTokens(comb, ns))
+ val parts = toParts(mdl, ns.srvReqId, res, syn)
+ val typ = if (syn.sparse) "IDL sparse"else "IDL continuous"
- add(typ, ns, contCache, eId, greedy, toTokens(res, ns), idxs, s, toParts(res, s))
-
- idlCache += comb
- case None => // No-op.
- }
+ add(typ, toks, syn, parts)
+ }
+ )
}
}
}
@@ -576,6 +596,43 @@
processParsers(mdl, ns, span, req)
}
+
+ NCSynonymsManager.clearIteration(ns.srvReqId)
+
+ normalize(ns)
+ }
+
+ /**
+ *
+ * @param ns
+ */
+ private def normalize(ns: Sentence): Unit = {
+ val usrNotes = ns.flatten.filter(_.isUser).distinct
+ val links = NCSentenceManager.getLinks(usrNotes)
+ val parts = NCSentenceManager.getPartKeys(usrNotes)
+
+ val usrNotesIdxs = usrNotes.
+ filter(n => !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
+ filter(n => !parts.contains(NCTokenPartKey(n, ns))).
+ zipWithIndex
+
+ usrNotesIdxs.
+ foreach { case (n, idx) =>
+ usrNotesIdxs.find { case (candidate, candidateIdx) =>
+ candidateIdx != idx &&
+ candidate.noteType == n.noteType &&
+ candidate.dataOpt("parts") == n.dataOpt("parts") &&
+ candidate.wordIndexesSet.subsetOf(n.wordIndexesSet) &&
+ n.wordIndexes.filter(n => !candidate.wordIndexes.contains(n)).
+ forall(wordIdx => ns.tokens.exists(t => t.wordIndexes.contains(wordIdx) && t.isStopWord))
+ } match {
+ case Some(better) =>
+ ns.removeNote(n)
+
+ logger.trace(s"Element removed: $n, better: $better")
+ case None => // No-op.
+ }
+ }
}
// TODO: simplify, add tests, check model properties (sparse etc) for optimization.
@@ -598,11 +655,11 @@
||
(
n.tokenIndexes == toksIdxsSorted ||
- n.tokenIndexes.containsSlice(toksIdxsSorted) &&
- U.isContinuous(toksIdxsSorted) &&
- U.isContinuous(n.tokenIndexes)
+ n.tokenIndexes.containsSlice(toksIdxsSorted) &&
+ U.isContinuous(toksIdxsSorted) &&
+ U.isContinuous(n.tokenIndexes)
)
)
))
}
-}
\ No newline at end of file
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 286c8b4..6e0780e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -17,7 +17,6 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort
-import java.io.Serializable
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.common.makro.NCMacroParser
@@ -26,6 +25,7 @@
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import java.io.Serializable
import java.util.{List => JList}
import scala.collection.mutable
import scala.jdk.CollectionConverters._
@@ -187,59 +187,50 @@
*
* @param toksNoteData
*/
- private def split(toks: Seq[NCNlpSentenceToken], othersRefs: Seq[NCNlpSentenceToken], toksNoteData: Seq[NoteData], nullable: Boolean): Seq[Seq[NoteData]] = {
- val res =
- if (toksNoteData.nonEmpty) {
- val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
+ private def split(
+ toks: Seq[NCNlpSentenceToken],
+ othersRefs: Seq[NCNlpSentenceToken],
+ toksNoteData: Seq[NoteData]
+ ): Seq[Seq[NoteData]] =
+ if (toksNoteData.nonEmpty) {
+ val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
- /**
- * Returns flag which indicates are token contiguous or not.
- *
- * @param tok1Idx First token index.
- * @param tok2Idx Second token index.
- */
- def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
- val between = toks.filter(t => t.index > tok1Idx && t.index < tok2Idx)
+ /**
+ * Returns flag which indicates are token contiguous or not.
+ *
+ * @param tok1Idx First token index.
+ * @param tok2Idx Second token index.
+ */
+ def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
+ val between = toks.filter(t => t.index > tok1Idx && t.index < tok2Idx)
- between.isEmpty || between.forall(p => p.isStopWord || p.stem == stemAnd)
- }
-
- val toks2 = toks.filter(othersRefs.contains)
-
- val minIdx = toks2.dropWhile(t => !isUserNotValue(t)).head.index
- val maxIdx = toks2.reverse.dropWhile(t => !isUserNotValue(t)).head.index
-
- require(minIdx <= maxIdx)
-
- def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
- seq += nd
-
- toksNoteData.
- filter(p => nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
- foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
-
- if (seq.nonEmpty && seq.head.indexes.head == minIdx && seq.last.indexes.last == maxIdx)
- res += seq
- }
-
- toksNoteData.filter(_.indexes.head == minIdx).foreach(p => fill(p))
-
- res
+ between.isEmpty || between.forall(p => p.isStopWord || p.stem == stemAnd)
}
- else
- Seq.empty
- if (res.isEmpty && !nullable)
- throw new AssertionError(s"Invalid empty result " +
- s"[tokensTexts=[${toks.map(_.origText).mkString("|")}]" +
- s", notes=[${toks.flatten.map(n => s"${n.noteType}:[${n.tokenIndexes.mkString(",")}]").mkString("|")}]" +
- s", tokensIndexes=[${toks.map(_.index).mkString("|")}]" +
- s", allData=[${toksNoteData.mkString("|")}]" +
- s"]"
- )
+ val toks2 = toks.filter(othersRefs.contains)
- res.toSeq
- }
+ val minIdx = toks2.dropWhile(t => !isUserNotValue(t)).head.index
+ val maxIdx = toks2.reverse.dropWhile(t => !isUserNotValue(t)).head.index
+
+ require(minIdx <= maxIdx)
+
+ def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
+ seq += nd
+
+ toksNoteData.
+ filter(p => nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
+ foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
+
+ if (seq.nonEmpty && seq.head.indexes.head == minIdx && seq.last.indexes.last == maxIdx)
+ res += seq
+ }
+
+ toksNoteData.filter(_.indexes.head == minIdx).foreach(p => fill(p))
+
+ res
+ }
+ else
+ Seq.empty
/**
*
@@ -346,71 +337,75 @@
if (data1.nonEmpty || data2.nonEmpty) {
val seq1 =
if (data1.nonEmpty)
- split(part1, othersRefs, data1, nullable = false)
+ split(part1, othersRefs, data1)
else
- split(part2, othersRefs, data2, nullable = false)
- val seq2 =
- if (data1.nonEmpty && data2.nonEmpty)
- split(part2, othersRefs, data2, nullable = true)
- else
- Seq.empty
- val asc = orderOpt.flatMap(o => Some(order(o.synonymIndex)._2))
+ split(part2, othersRefs, data2)
- typ match {
- case TYPE_SUBJ =>
- require(seq1.nonEmpty)
- require(seq2.isEmpty)
- require(sortToks.nonEmpty)
+ if (seq1.nonEmpty) {
+ val seq2 =
+ if (data1.nonEmpty && data2.nonEmpty)
+ split(part2, othersRefs, data2)
+ else
+ Seq.empty
- // Ignores invalid cases.
- if (byToks.isEmpty)
- res =
- Some(
+ val asc = orderOpt.flatMap(o => Some(order(o.synonymIndex)._2))
+
+ typ match {
+ case TYPE_SUBJ =>
+ require(seq1.nonEmpty)
+ require(seq2.isEmpty)
+ require(sortToks.nonEmpty)
+
+ // Ignores invalid cases.
+ if (byToks.isEmpty)
+ res =
+ Some(
+ Match(
+ asc = asc,
+ main = sortToks,
+ stop = orderToks,
+ subjSeq = seq1,
+ bySeq = Seq.empty
+ )
+ )
+
+ case TYPE_SUBJ_BY =>
+ require(seq1.nonEmpty)
+ require(sortToks.nonEmpty)
+ require(byToks.nonEmpty)
+
+ if (seq2.isEmpty)
+ res = None
+ else
+ res = Some(
Match(
asc = asc,
main = sortToks,
- stop = orderToks,
+ stop = byToks ++ orderToks,
subjSeq = seq1,
- bySeq = Seq.empty
+ bySeq = seq2
)
)
- case TYPE_SUBJ_BY =>
- require(seq1.nonEmpty)
- require(sortToks.nonEmpty)
- require(byToks.nonEmpty)
+ case TYPE_BY =>
+ require(seq1.nonEmpty)
+ require(seq2.isEmpty)
+ require(sortToks.nonEmpty)
+ require(byToks.nonEmpty)
- if (seq2.isEmpty)
- res = None
- else
+ // `Sort by` as one element, see validation.
res = Some(
Match(
asc = asc,
- main = sortToks,
- stop = byToks ++ orderToks,
- subjSeq = seq1,
- bySeq = seq2
+ main = sortToks ++ byToks,
+ stop = orderToks,
+ subjSeq = Seq.empty,
+ bySeq = seq1
)
)
- case TYPE_BY =>
- require(seq1.nonEmpty)
- require(seq2.isEmpty)
- require(sortToks.nonEmpty)
- require(byToks.nonEmpty)
-
- // `Sort by` as one element, see validation.
- res = Some(
- Match(
- asc = asc,
- main = sortToks ++ byToks,
- stop = orderToks,
- subjSeq = Seq.empty,
- bySeq = seq1
- )
- )
-
- case _ => throw new AssertionError(s"Unexpected type: $typ")
+ case _ => throw new AssertionError(s"Unexpected type: $typ")
+ }
}
}
case None => // No-op.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index fc904d2..c0abd73 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -17,8 +17,6 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword
-import java.io.Serializable
-
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
@@ -26,6 +24,7 @@
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import java.io.Serializable
import scala.annotation.tailrec
/**
@@ -225,12 +224,20 @@
startScopedSpan(
"enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
) { _ =>
- mark(mdl.exclStopWordsStems, f = false)
- mark(mdl.addStopWordsStems, f = true)
- processGeo(ns)
- processDate(ns)
- processNums(ns)
- processCommonStops(mdl, ns)
+ if (mdl.model.isStopWordsAllowed) {
+ mark(mdl.exclStopWordsStems, f = false)
+ mark(mdl.addStopWordsStems, f = true)
+
+ // If stop word swallowed by any built token (numeric, date etc) - it's stop word marking dropped.
+ ns.filter(t => t.isStopWord && !t.isNlp).foreach(t => ns.fixNote(t.getNlpNote, "stopWord" -> false))
+
+ processGeo(ns)
+ processDate(ns)
+ processNums(ns)
+ processCommonStops(mdl, ns)
+ }
+ else
+ ns.filter(_.isStopWord).foreach(t => ns.fixNote(t.getNlpNote, "stopWord" -> false))
}
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index d5dfc1e..00d6bdf 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -43,12 +43,7 @@
type CacheValue = Seq[Seq[NCNlpSentenceNote]]
private val combCache = mutable.HashMap.empty[String, mutable.HashMap[CacheKey, CacheValue]]
-
- /**
- *
- * @param notes
- */
- private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
+ def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
for (n <- notes.filter(n => n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
@@ -77,16 +72,31 @@
/**
*
+ * @param n
+ */
+ private def getParts(n: NCNlpSentenceNote): Option[Seq[NCTokenPartKey]] = {
+ val res: Option[JList[NCTokenPartKey]] = n.dataOpt("parts")
+
+ res match {
+ case Some(v) => Some(v.asScala)
+ case None => None
+ }
+ }
+
+ /**
+ *
* @param notes
*/
- private def getPartKeys(notes: NCNlpSentenceNote*): Seq[NCTokenPartKey] =
- notes.
- filter(_.isUser).
- flatMap(n => {
- val optList: Option[JList[NCTokenPartKey]] = n.dataOpt("parts")
+ def getPartKeys(notes: Seq[NCNlpSentenceNote]): Seq[NCTokenPartKey] =
+ notes.filter(_.isUser).flatMap(getParts).flatten.distinct
- optList
- }).flatMap(_.asScala).distinct
+ /**
+ *
+ * @param note
+ * @return
+ */
+ def getPartKeys(note: NCNlpSentenceNote): Seq[NCTokenPartKey] =
+ if (note.isUser) getParts(note).getOrElse(Seq.empty) else Seq.empty
/**
*
@@ -213,7 +223,8 @@
private def simpleCopy(
ns: NCNlpSentence,
history: mutable.ArrayBuffer[(Int, Int)],
- toksCopy: NCNlpSentence, i: Int
+ toksCopy: NCNlpSentence,
+ i: Int
): Seq[NCNlpSentenceToken] = {
val tokCopy = toksCopy(i)
@@ -279,9 +290,9 @@
private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]): Unit = {
// Replaces other notes indexes.
for (t <- userNoteTypes :+ "nlpcraft:nlp"; note <- ns.getNotes(t)) {
- val toks = ns.filter(_.contains(note)).sortBy(_.index)
+ val toks = ns.filter(_.contains(note))
- val newNote = note.clone(toks.map(_.index).toSeq, toks.flatMap(_.wordIndexes).toSeq.sorted)
+ val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).toSeq.sorted)
toks.foreach(t => {
t.remove(note)
@@ -486,8 +497,9 @@
*
* @param ns Sentence.
* @param notNlpTypes Token types.
+ * @param lastPhase Phase.
*/
- private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
+ private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String], lastPhase: Boolean): Boolean = {
ns.
filter(!_.isNlp).
filter(_.isStopWord).
@@ -522,7 +534,8 @@
fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, histSeq) &&
fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, histSeq)
- if (res) {
+ // On last phase - just for performance reasons.
+ if (res && lastPhase) {
// Validation (all indexes calculated well)
require(
!res ||
@@ -544,21 +557,23 @@
* @param mdl
* @param ns
*/
- private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
- if (!mdl.getAbstractTokens.isEmpty) {
- val notes = ns.flatten
+ private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit = {
+ val abstractToks = mdl.getAbstractTokens
- val keys = getPartKeys(notes: _*)
+ if (!abstractToks.isEmpty) {
+ val notes = ns.flatten.distinct.filter(n => abstractToks.contains(n.noteType))
+
+ val keys = getPartKeys(notes)
val noteLinks = getLinks(notes)
notes.filter(n => {
- val noteToks = ns.tokens.filter(_.contains(n))
+ lazy val noteToks = ns.tokens.filter(t => t.index >= n.tokenFrom && t.index <= n.tokenTo)
- mdl.getAbstractTokens.contains(n.noteType) &&
- !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
- !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
+ !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted)) &&
+ !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex))
}).foreach(ns.removeNote)
}
+ }
/**
*
@@ -602,7 +617,7 @@
if (lastPhase)
dropAbstract(mdl, ns)
- if (collapseSentence(ns, getNotNlpNotes(ns.toSeq).map(_.noteType).distinct)) Some(ns) else None
+ if (collapseSentence(ns, getNotNlpNotes(ns.tokens).map(_.noteType).distinct, lastPhase)) Some(ns) else None
}
// Always deletes `similar` notes.
@@ -635,8 +650,8 @@
redundant.foreach(sen.removeNote)
var delCombs: Seq[NCNlpSentenceNote] =
- getNotNlpNotes(sen.toSeq).
- flatMap(note => getNotNlpNotes(note.tokenIndexes.sorted.map(i => sen(i))).filter(_ != note)).
+ getNotNlpNotes(sen.tokens).
+ flatMap(note => getNotNlpNotes(note.tokenIndexes.map(sen(_))).filter(_ != note)).
distinct
// Optimization. Deletes all wholly swallowed notes.
@@ -647,9 +662,9 @@
// There aren't links on it.
filter(n => !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
// It doesn't have links.
- filter(getPartKeys(_).isEmpty).
+ filter(n => getPartKeys(n).isEmpty).
flatMap(note => {
- val noteWordsIdxs = note.wordIndexes.toSet
+ val noteWordsIdxs = note.wordIndexesSet
val key = NCTokenPartKey(note, sen)
val delCombOthers =
@@ -657,7 +672,7 @@
if (
delCombOthers.nonEmpty &&
- !delCombOthers.exists(o => noteWordsIdxs.subsetOf(o.wordIndexes.toSet))
+ !delCombOthers.exists(o => noteWordsIdxs.subsetOf(o.wordIndexesSet))
)
Some(note)
else
@@ -675,7 +690,7 @@
groupBy { case (idx, _) => idx }.
map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
toSeq.sortBy(-_.size)
-
+
def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq)
@@ -709,7 +724,7 @@
Holder(
// We have to delete some keys to have possibility to compare sentences.
- notes.map(_.clone().filter { case (name, _) => name != "direct" }).toSeq,
+ notes.map(_.clone().toMap.filter { case (name, _) => name != "direct" }).toSeq,
sen,
notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
)
@@ -720,8 +735,6 @@
else
collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)
- sens = sens.distinct
-
sens.par.foreach(sen =>
sen.foreach(tok =>
tok.size match {
@@ -732,7 +745,6 @@
)
)
-
def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp)
// Drops similar sentences (with same notes structure). Keeps with more found.
@@ -752,20 +764,38 @@
}
}.toSeq
+ var sensWithNotes = sens.map(s => s -> s.flatten.filter(!_.isNlp).toSet)
+
+ var sensWithNotesIdxs = sensWithNotes.zipWithIndex
+
sens =
- sens.filter(s => {
- def mkNotNlp(s: NCNlpSentence): Set[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp).toSet
-
- val notNlpNotes = mkNotNlp(s)
-
- !sens.filter(_ != s).map(mkNotNlp).exists(notNlpNotes.subsetOf)
- })
+ sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
+ !sensWithNotesIdxs.
+ filter { case (_, idx2) => idx2 != idx1 }.
+ exists { case((_, notNlpNotes2), _) => notNlpNotes1.subsetOf(notNlpNotes2) }
+ }.map { case ((sen, _), _) => sen }
// Drops similar sentences (with same tokens structure).
// Among similar sentences we prefer one with minimal free words count.
- sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
+ sens = sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
map { case (_, seq) => seq.minBy(_.filter(p => p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
toSeq
+
+ // Drops sentences if they are just subset of another.
+ sensWithNotes = sensWithNotes.filter { case (sen, _) => sens.contains(sen) }
+
+ sensWithNotesIdxs = sensWithNotes.zipWithIndex
+
+ sens = sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
+ !sensWithNotesIdxs.exists { case ((_, notNlpNotes2), idx2) =>
+ idx1 != idx2 && {
+ notNlpNotes2.size > notNlpNotes1.size &&
+ notNlpNotes1.forall(t1 => notNlpNotes2.exists(_.equalsWithoutIndexes(t1)))
+ }
+ }
+ }.map { case ((sen, _), _) => sen }
+
+ sens
}
override def start(parent: Span): NCService = {
@@ -797,5 +827,5 @@
*
* @param srvReqId
*/
- def clearCache(srvReqId: String): Unit = combCache -= srvReqId
+ def clearRequestData(srvReqId: String): Unit = combCache -= srvReqId
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
new file mode 100644
index 0000000..e2d59f6
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -0,0 +1,436 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.synonyms
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceNote => NlpNote, NCNlpSentenceToken => NlpToken}
+import org.apache.nlpcraft.common.{NCService, U}
+import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL, NCSynonymChunkKind, REGEX, TEXT}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeIdlToken => IdlToken, NCProbeSynonymChunk, NCProbeSynonym => Synonym}
+
+import scala.collection.mutable
+import scala.collection.parallel.CollectionConverters.ImmutableIterableIsParallelizable
+import scala.compat.java8.OptionConverters._
+import scala.jdk.CollectionConverters.ListHasAsScala
+
+/**
+ *
+ */
+object NCSynonymsManager extends NCService {
+ private class CacheHolder[T] {
+ private lazy val cache =
+ mutable.HashMap.empty[String, mutable.HashMap[Int, mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]]
+
+ def isUnprocessed(elemId: String, s: Synonym, tokens: Seq[T]): Boolean =
+ cache.
+ getOrElseUpdate(
+ elemId,
+ mutable.HashMap.empty[Int, mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]
+ ).
+ getOrElseUpdate(
+ tokens.length,
+ mutable.HashMap.empty[Seq[T], mutable.HashSet[Synonym]]
+ ).
+ getOrElseUpdate(
+ tokens,
+ mutable.HashSet.empty[Synonym]
+ ).add(s)
+ }
+
+ private case class SavedIdlKey(id: String, startCharIndex: Int, endCharIndex: Int, other: Map[String, AnyRef] = Map.empty)
+
+ private object SavedIdlKey {
+ def apply(t: NCToken): SavedIdlKey =
+ if (t.isUserDefined)
+ SavedIdlKey(t.getId, t.getStartCharIndex, t.getEndCharIndex)
+ else
+ SavedIdlKey(
+ t.getId,
+ t.getStartCharIndex,
+ t.getEndCharIndex,
+ NlpNote.getBuiltProperties(t.getId).flatMap(p => t.metaOpt(p).asScala match {
+ case Some(v) => Some(p -> v)
+ case None => None
+ }).toMap
+ )
+ }
+
+ private case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction) {
+ override def toString: String = variants.toString()
+ }
+
+ private val savedIdl = mutable.HashMap.empty[String, mutable.HashMap[SavedIdlKey, mutable.ArrayBuffer[Value]]]
+ private val idlChunksCache = mutable.HashMap.empty[String, mutable.HashMap[(IdlToken, NCProbeSynonymChunk), Boolean]]
+ private val idlCaches = mutable.HashMap.empty[String, CacheHolder[IdlToken]]
+ private val tokCaches = mutable.HashMap.empty[String, CacheHolder[Int]]
+
+ override def start(parent: Span): NCService = {
+ ackStarting()
+
+ ackStarted()
+ }
+
+ override def stop(parent: Span): Unit = {
+ ackStopping()
+
+ ackStopped()
+ }
+
+ /**
+ *
+ * @param tok
+ * @param chunk
+ */
+ private def isMatch(tok: NlpToken, chunk: NCProbeSynonymChunk): Boolean =
+ chunk.kind match {
+ case TEXT => chunk.wordStem == tok.stem
+ case REGEX => chunk.regex.matcher(tok.origText).matches() || chunk.regex.matcher(tok.normText).matches()
+ case IDL => throw new AssertionError()
+ case _ => throw new AssertionError()
+ }
+
+ /**
+ *
+ * @param kind
+ */
+ private def getSort(kind: NCSynonymChunkKind): Int =
+ kind match {
+ case TEXT => 0
+ case IDL => 1
+ case REGEX => 2
+ case _ => throw new AssertionError(s"Unexpected kind: $kind")
+ }
+
+ /**
+ *
+ * @param s
+ * @param toks
+ * @param isMatch
+ * @param getIndex
+ * @param shouldBeNeighbors
+ * @tparam T
+ */
+ private def sparseMatch0[T](
+ s: Synonym,
+ toks: Seq[T],
+ isMatch: (T, NCProbeSynonymChunk) => Boolean,
+ getIndex: T => Int,
+ shouldBeNeighbors: Boolean
+ ): Option[Seq[T]] =
+ if (toks.size >= s.size) {
+ lazy val res = mutable.ArrayBuffer.empty[T]
+ lazy val all = mutable.HashSet.empty[T]
+
+ var state = 0
+
+ for (chunk <- s if state != -1) {
+ val seq =
+ if (state == 0) {
+ state = 1
+
+ toks.filter(t => isMatch(t, chunk))
+ }
+ else
+ toks.filter(t => !res.contains(t) && isMatch(t, chunk))
+
+ if (seq.nonEmpty) {
+ val head = seq.head
+
+ if (!s.permute && res.nonEmpty && getIndex(head) <= getIndex(res.last))
+ state = -1
+ else {
+ all ++= seq
+
+ if (all.size > s.size)
+ state = -1
+ else
+ res += head
+ }
+ }
+ else
+ state = -1
+ }
+
+ if (state != -1 && all.size == res.size && (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).toSeq.sorted)))
+ Some(res.toSeq)
+ else
+ None
+ }
+ else
+ None
+
+ /**
+ *
+ * @param req
+ * @param tok
+ * @param pred
+ * @param variantsToks
+ */
+ private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit = {
+ savedIdl.
+ getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty).
+ getOrElseUpdate(SavedIdlKey(tok), mutable.ArrayBuffer.empty) +=
+ Value(req, variantsToks, pred)
+ }
+
+ /**
+ *
+ * @param srvReqId
+ * @param elemId
+ * @param s
+ * @param tokens
+ */
+ private def isUnprocessedTokens(srvReqId: String, elemId: String, s: Synonym, tokens: Seq[Int]): Boolean =
+ tokCaches.getOrElseUpdate(srvReqId, new CacheHolder[Int]).isUnprocessed(elemId, s, tokens)
+
+ /**
+ *
+ * @param srvReqId
+ * @param elemId
+ * @param s
+ * @param tokens
+ */
+ private def isUnprocessedIdl(srvReqId: String, elemId: String, s: Synonym, tokens: Seq[IdlToken]): Boolean =
+ idlCaches.getOrElseUpdate(srvReqId, new CacheHolder[IdlToken]).isUnprocessed(elemId, s, tokens)
+
+ /**
+ *
+ * @param tow
+ * @param chunk
+ * @param req
+ * @param variantsToks
+ */
+ private def isMatch(
+ tow: IdlToken, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
+ ): Boolean =
+ idlChunksCache.
+ getOrElseUpdate(req.getServerRequestId,
+ mutable.HashMap.empty[(IdlToken, NCProbeSynonymChunk), Boolean]
+ ).
+ getOrElseUpdate(
+ (tow, chunk),
+ {
+ def get0[T](fromToken: NCToken => T, fromWord: NlpToken => T): T =
+ if (tow.isToken) fromToken(tow.token) else fromWord(tow.word)
+
+ chunk.kind match {
+ case TEXT => chunk.wordStem == get0(_.stem, _.stem)
+
+ case REGEX =>
+ chunk.regex.matcher(get0(_.origText, _.origText)).matches() ||
+ chunk.regex.matcher(get0(_.normText, _.normText)).matches()
+
+ case IDL =>
+ val ok =
+ variantsToks.par.exists(vrntToks =>
+ get0(t =>
+ chunk.idlPred.apply(t, NCIdlContext(toks = vrntToks, req = req)).
+ value.asInstanceOf[Boolean],
+ _ => false
+ )
+ )
+
+ if (ok)
+ save(req, tow.token, chunk.idlPred, variantsToks)
+
+ ok
+
+ case _ => throw new AssertionError()
+ }
+ }
+ )
+
+ /**
+ *
+ * @param srvReqId
+ * @param elemId
+ * @param syn
+ * @param toks
+ * @param callback
+ */
+ def onMatch(srvReqId: String, elemId: String, syn: Synonym, toks: Seq[NlpToken], callback: Unit => Unit): Unit =
+ if (isUnprocessedTokens(srvReqId, elemId, syn, toks.map(_.index))) {
+ require(toks != null)
+ require(!syn.sparse && !syn.hasIdl)
+
+ if (
+ toks.length == syn.length && {
+ if (syn.isTextOnly)
+ toks.zip(syn).forall(p => p._1.stem == p._2.wordStem)
+ else
+ toks.zip(syn).sortBy(p => getSort(p._2.kind)).forall { case (tok, chunk) => isMatch(tok, chunk) }
+ }
+ )
+ callback()
+ }
+
+ /**
+ *
+ * @param srvReqId
+ * @param elemId
+ * @param s
+ * @param toks
+ * @param req
+ * @param variantsToks
+ * @param callback
+ */
+ def onMatch(
+ srvReqId: String,
+ elemId: String,
+ s: Synonym,
+ toks: Seq[IdlToken],
+ req: NCRequest,
+ variantsToks: Seq[Seq[NCToken]],
+ callback: Unit => Unit
+ ): Unit =
+ if (isUnprocessedIdl(srvReqId, elemId, s, toks)) {
+ require(toks != null)
+
+ if (
+ toks.length == s.length &&
+ toks.count(_.isToken) >= s.idlChunks && {
+ toks.zip(s).sortBy(p => getSort(p._2.kind)).forall {
+ case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
+ }
+ }
+ )
+ callback()
+ }
+
+ /**
+ *
+ * @param srvReqId
+ * @param elemId
+ * @param syn
+ * @param toks
+ * @param callback
+ */
+ def onSparseMatch(
+ srvReqId: String, elemId: String, syn: Synonym, toks: Seq[NlpToken], callback: Seq[NlpToken] => Unit
+ ): Unit =
+ if (isUnprocessedTokens(srvReqId, elemId, syn, toks.map(_.index))) {
+ require(toks != null)
+ require(syn.sparse && !syn.hasIdl)
+
+ sparseMatch0(syn, toks, isMatch, (t: NlpToken) => t.startCharIndex, shouldBeNeighbors = false) match {
+ case Some(res) => callback(res)
+ case None => // No-op.
+ }
+ }
+
+ /**
+ *
+ * @param srvReqId
+ * @param elemId
+ * @param syn
+ * @param toks
+ * @param req
+ * @param variantsToks
+ * @param callback
+ */
+ def onSparseMatch(
+ srvReqId: String,
+ elemId: String,
+ syn: Synonym,
+ toks: Seq[IdlToken],
+ req: NCRequest,
+ variantsToks: Seq[Seq[NCToken]],
+ callback: Seq[IdlToken] => Unit
+ ): Unit =
+ if (isUnprocessedIdl(srvReqId, elemId, syn, toks)) {
+ require(toks != null)
+ require(req != null)
+ require(syn.hasIdl)
+
+ sparseMatch0(
+ syn,
+ toks,
+ (t: IdlToken, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req, variantsToks),
+ (t: IdlToken) => if (t.isToken) t.token.getStartCharIndex else t.word.startCharIndex,
+ shouldBeNeighbors = !syn.sparse
+ ) match {
+ case Some(res) => callback(res)
+ case None => // No-op.
+ }
+ }
+
+ /**
+ *
+ * @param srvReqId
+ * @param senToks
+ */
+ def isStillValidIdl(srvReqId: String, senToks: Seq[NCToken]): Boolean =
+ savedIdl.get(srvReqId) match {
+ case Some(m) =>
+ lazy val allCheckedSenToks = {
+ val set = mutable.HashSet.empty[SavedIdlKey]
+
+ def add(t: NCToken): Unit = {
+ set += SavedIdlKey(t)
+
+ t.getPartTokens.asScala.foreach(add)
+ }
+
+ senToks.foreach(add)
+
+ set
+ }
+
+ senToks.forall(tok =>
+ m.get(SavedIdlKey(tok)) match {
+ case Some(vals) =>
+ vals.exists(
+ v =>
+ v.variants.exists(winHistVariant =>
+ v.predicate.apply(
+ tok, NCIdlContext(toks = winHistVariant, req = v.request)
+ ).value.asInstanceOf[Boolean] &&
+ winHistVariant.map(SavedIdlKey(_)).forall(t =>
+ t.id == "nlpcraft:nlp" || allCheckedSenToks.contains(t)
+ )
+ )
+ )
+
+ case None => true
+ })
+
+ case None => true
+ }
+
+ /**
+ *
+ * @param srvReqId
+ */
+ def clearRequestData(srvReqId: String): Unit = {
+ clearIteration(srvReqId)
+
+ savedIdl -= srvReqId
+ }
+
+ /**
+ *
+ * @param srvReqId
+ */
+ def clearIteration(srvReqId: String): Unit = {
+ idlChunksCache -= srvReqId
+ idlCaches -= srvReqId
+ tokCaches -= srvReqId
+ }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 636b263..df745a0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -156,7 +156,7 @@
if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'"))
- h.sentence
+ h.sentence.copy(Some(srvReqId))
}
else
process(srvReqId, normTxt, enabledBuiltInToks, span)
@@ -206,7 +206,10 @@
(x._1 * 100) + x._2.indexOf(hdr.noteName)
})
- val tbl = NCAsciiTable(headers.map(_.header))
+ val tbl = NCAsciiTable(headers.map(hdr => {
+ val s = hdr.header
+ if (s == "nlp:stopWord") s"${r(s)}" else s
+ }))
/**
*
@@ -214,15 +217,23 @@
* @param hdr
* @return
*/
- def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] =
+ def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] = {
+ val isStopWord = tok.isStopWord
+
tok
.getNotes(hdr.noteType)
.filter(_.contains(hdr.noteName))
- .map(_(hdr.noteName).toString())
- .toSeq
+ .map(note => {
+ val s = note(hdr.noteName).toString
- for (tok <- s)
+ if (isStopWord) s"${r(s)}" else s
+ })
+ .toSeq
+ }
+
+ for (tok <- s) {
tbl += (headers.map(mkNoteValue(tok, _)): _*)
+ }
tbl
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
index 670a4dc..cf39575 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
@@ -207,7 +207,7 @@
toIncl: Boolean,
toFractional: Boolean,
unitDataOpt: Option[NCNumericUnitData],
- ): Seq[NCNlpSentenceNote] = {
+ ): Unit= {
val params =
mutable.ArrayBuffer.empty[(String, Any)] ++
Seq(
@@ -223,7 +223,7 @@
"isToPositiveInfinity" -> (to == MAX_VALUE)
)
- def mkAndAssign(toks: Seq[NCNlpSentenceToken], typ: String, params: (String, Any)*):NCNlpSentenceNote = {
+ def mkAndAssign(toks: Seq[NCNlpSentenceToken], params: (String, Any)*):NCNlpSentenceNote = {
val note = NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*)
toks.foreach(_.add(note))
@@ -241,17 +241,17 @@
}
if (unitData.tokens == toks)
- Seq(mkAndAssign(toks, "nlpcraft:num", extend():_*))
+ Seq(mkAndAssign(toks, extend():_*))
else {
Seq(
mkAndAssign(
- toks.filter(t => !unitData.tokens.contains(t)), "nlpcraft:num", params.clone():_*
+ toks.filter(t => !unitData.tokens.contains(t)), params.clone():_*
),
- mkAndAssign(toks, "nlpcraft:num", extend():_*)
+ mkAndAssign(toks, extend():_*)
)
}
- case None => Seq(mkAndAssign(toks, "nlpcraft:num", params:_*))
+ case None => Seq(mkAndAssign(toks, params:_*))
}
}
@@ -316,7 +316,7 @@
Some(NCNumericUnitData(num1.unitData.get.unit, num1.tokens ++ num2.tokens))
}
- val notes = p._2 match {
+ p._2 match {
case BETWEEN_EXCLUSIVE =>
mkNotes(
prepToks,
@@ -364,79 +364,75 @@
processed ++= toks
- val notes =
- prep.prepositionType match {
- case MORE =>
- mkNotes(
- toks,
- num.value,
- fromIncl = false,
- fromFractional = num.isFractional,
- to = MAX_VALUE,
- toIncl = true,
- toFractional = num.isFractional,
- num.unitData
- )
- case MORE_OR_EQUAL =>
- mkNotes(
- toks,
- num.value,
- fromIncl = true,
- fromFractional = num.isFractional,
- to = MAX_VALUE,
- toIncl = true,
- toFractional = num.isFractional,
- num.unitData
- )
- case LESS =>
- mkNotes(
- toks,
- MIN_VALUE,
- fromIncl = true,
- fromFractional = num.isFractional,
- to = num.value,
- toIncl = false,
- toFractional = num.isFractional,
- num.unitData
- )
- case LESS_OR_EQUAL =>
- mkNotes(
- toks,
- MIN_VALUE,
- fromIncl = true,
- fromFractional = num.isFractional,
- to = num.value,
- toIncl = true,
- toFractional = num.isFractional,
- num.unitData
- )
- case EQUAL =>
- mkNotes(
- toks,
- num.value,
- fromIncl = true,
- fromFractional = num.isFractional,
- to = num.value,
- toIncl = true,
- toFractional = num.isFractional,
- num.unitData
- )
- case NOT_EQUAL =>
- mkNotes(
- toks,
- num.value,
- fromIncl = false,
- fromFractional = num.isFractional,
- to = num.value,
- toIncl = false,
- toFractional = num.isFractional,
- num.unitData
- )
- case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.")
- }
-
- for (note <- notes)
- toks.foreach(_.add(note))
+ prep.prepositionType match {
+ case MORE =>
+ mkNotes(
+ toks,
+ num.value,
+ fromIncl = false,
+ fromFractional = num.isFractional,
+ to = MAX_VALUE,
+ toIncl = true,
+ toFractional = num.isFractional,
+ num.unitData
+ )
+ case MORE_OR_EQUAL =>
+ mkNotes(
+ toks,
+ num.value,
+ fromIncl = true,
+ fromFractional = num.isFractional,
+ to = MAX_VALUE,
+ toIncl = true,
+ toFractional = num.isFractional,
+ num.unitData
+ )
+ case LESS =>
+ mkNotes(
+ toks,
+ MIN_VALUE,
+ fromIncl = true,
+ fromFractional = num.isFractional,
+ to = num.value,
+ toIncl = false,
+ toFractional = num.isFractional,
+ num.unitData
+ )
+ case LESS_OR_EQUAL =>
+ mkNotes(
+ toks,
+ MIN_VALUE,
+ fromIncl = true,
+ fromFractional = num.isFractional,
+ to = num.value,
+ toIncl = true,
+ toFractional = num.isFractional,
+ num.unitData
+ )
+ case EQUAL =>
+ mkNotes(
+ toks,
+ num.value,
+ fromIncl = true,
+ fromFractional = num.isFractional,
+ to = num.value,
+ toIncl = true,
+ toFractional = num.isFractional,
+ num.unitData
+ )
+ case NOT_EQUAL =>
+ mkNotes(
+ toks,
+ num.value,
+ fromIncl = false,
+ fromFractional = num.isFractional,
+ to = num.value,
+ toIncl = false,
+ toFractional = num.isFractional,
+ num.unitData
+ )
+ case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.")
+ }
}
}
@@ -448,7 +444,7 @@
// Numeric without conditions.
for (num <- nums if !processed.exists(num.tokens.contains)) {
- val notes = mkNotes(
+ mkNotes(
num.tokens,
num.value,
fromIncl = true,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index d5b3ba9..e8265e1 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -762,10 +762,10 @@
errCode
)
- logger.trace(s"Error result processed [srvReqId=${m(srvReqId)}, error=$err, code=$errCode]")
+ logger.info(s"${rb(w(" REJECT "))} result processed [srvReqId=${m(srvReqId)}, error=$err, code=$errCode]")
}
else { // OK result.
- require(resTypeOpt.isDefined && resBodyOpt.isDefined, "Result defined")
+ require(resTypeOpt.isDefined && resBodyOpt.isDefined)
NCQueryManager.setResult(
srvReqId,
@@ -776,7 +776,7 @@
intentId
)
- logger.trace(s"OK result processed [srvReqId=${m(srvReqId)}]")
+ logger.info(s"${gb(w(" SUCCESS "))} result processed [srvReqId=${m(srvReqId)}]")
}
}
catch {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
index a12a4e8..877cf60 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
@@ -262,7 +262,7 @@
startScopedSpan("future", parent, "srvReqId" -> srvReqId) { span =>
val tbl = NCAsciiTable()
- tbl += (s"${b("Text")}", rv(txt0))
+ tbl += (s"${b("Text")}", rv(" " + txt0 + " "))
tbl += (s"${b("User ID")}", usr.id)
tbl += (s"${b("Model ID")}", mdlId)
tbl += (s"${b("Agent")}", usrAgent.getOrElse("<n/a>"))
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
index daf1ab0..9d4c746 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
@@ -36,10 +36,12 @@
var permutateSynonyms: Optional[lang.Boolean] = super.isPermutateSynonyms
var sparse: Optional[lang.Boolean] = super.isSparse
var greedy: Optional[lang.Boolean] = super.isGreedy
+ var groups: Seq[String] = Seq(id)
override def getId: String = id
override def getSynonyms: util.List[String] = (syns :+ id).asJava
override def getValues: util.List[NCValue] = values
+ override def getGroups: util.List[String] = groups.asJava
override def getMetadata: util.Map[String, AnyRef] = metadata
override def getDescription: String = description
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stm/indexes/NCSpecModelAdapter.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stm/indexes/NCSpecModelAdapter.scala
index c0a8ac4..f9911f6 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stm/indexes/NCSpecModelAdapter.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stm/indexes/NCSpecModelAdapter.scala
@@ -19,11 +19,12 @@
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
+import org.apache.nlpcraft.NCTestElement
import org.apache.nlpcraft.model.{NCElement, NCModelAdapter}
import java.util
-import java.util.Collections
-import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
+import java.util.Optional
+import scala.jdk.CollectionConverters.SetHasAsJava
object NCSpecModelAdapter {
val mapper = new ObjectMapper()
@@ -34,8 +35,8 @@
class NCSpecModelAdapter extends NCModelAdapter("nlpcraft.stm.idxs.test", "STM Indexes Test Model", "1.0") {
override def getElements: util.Set[NCElement] =
Set(
- mkElement("A2", "G1", "a a"),
- mkElement("B2", "G1", "b b"),
+ mkElement("A2", "G1", "a a", greedy = false),
+ mkElement("B2", "G1", "b b", greedy = false),
mkElement("X", "G2", "x"),
mkElement("Y", "G2", "y"),
@@ -43,14 +44,12 @@
mkElement("Z", "G3", "z")
).asJava
- private def mkElement(id: String, group: String, syns: String*): NCElement =
- new NCElement {
- override def getId: String = id
- override def getSynonyms: util.List[String] = {
- val seq: Seq[String] = syns
+ private def mkElement(id: String, group: String, syns: String, greedy: Boolean = true): NCElement = {
+ val e = NCTestElement(id, syns)
- seq.asJava
- }
- override def getGroups: util.List[String] = Collections.singletonList(group)
- }
+ e.greedy = Optional.of(greedy)
+ e.groups = Seq(group)
+
+ e
+ }
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsAllowedSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsAllowedSpec.scala
new file mode 100644
index 0000000..3f87f35
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsAllowedSpec.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.stop
+
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.jdk.CollectionConverters.CollectionHasAsScala
+import scala.language.implicitConversions
+
+/**
+ *
+ */
+class NCStopWordsAllowedModelAdapter extends NCModelAdapter("nlpcraft.test", "Test Model", "1.0") {
+ override def getElements: util.Set[NCElement] = Set(NCTestElement("a", "the test"))
+
+ override def onContext(ctx: NCContext): NCResult = {
+ ctx.getVariants.asScala.forall(t => t.asScala.exists(_.isStopWord) == isStopWordsAllowed)
+
+ NCResult.text("OK")
+ }
+}
+/**
+ *
+ */
+class NCStopWordsAllowedModel extends NCStopWordsAllowedModelAdapter {
+ override def isStopWordsAllowed: Boolean = true
+}
+
+/**
+ *
+ */
+class NCStopWordsNotAllowedModel extends NCStopWordsAllowedModelAdapter {
+ override def isStopWordsAllowed: Boolean = false
+}
+
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCStopWordsAllowedModel], startClient = true)
+class NCStopWordsAllowedSpec extends NCTestContext {
+ @Test
+ def test(): Unit = {
+ checkResult("the", "OK")
+ checkResult("the test", "OK")
+ checkResult("the the test", "OK")
+ checkResult("test the the test", "OK")
+ }
+}
+
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCStopWordsNotAllowedModel], startClient = true)
+class NCStopWordsNotAllowedSpec extends NCStopWordsAllowedSpec
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsBaseSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsBaseSpec.scala
new file mode 100644
index 0000000..07ca216
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsBaseSpec.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.stop
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.language.implicitConversions
+
+/**
+ *
+ */
+class NCStopWordsBaseModel extends NCModelAdapter("nlpcraft.test", "Test Model", "1.0") {
+ override def getElements: util.Set[NCElement] = Set(
+ NCTestElement("a"),
+ NCTestElement("b"),
+ NCTestElement("xy", "x y"),
+ )
+
+ @NCIntent(
+ "intent=twoWords " +
+ " term(a)~{# == 'a'}" +
+ " term(b)~{# == 'b'}"
+ )
+ def onTwoWords(): NCResult = NCResult.text("OK")
+
+ @NCIntent(
+ "intent=oneWord " +
+ " term(xt)~{# == 'xy'}"
+ )
+ def onOneWord(): NCResult = NCResult.text("OK")
+}
+
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCStopWordsBaseModel], startClient = true)
+class NCStopWordsBaseSpec extends NCTestContext {
+ @Test
+ def testTwoWords(): Unit = {
+ checkIntent("a b", "twoWords")
+ checkIntent("a the b", "twoWords")
+ checkIntent("a the the b", "twoWords")
+ checkIntent("the a the b", "twoWords")
+ checkIntent("the a the b the the", "twoWords")
+ }
+
+ @Test
+ def testOneWord(): Unit = {
+ checkIntent("x y", "oneWord")
+ checkIntent("x the y", "oneWord")
+ checkIntent("x the the y", "oneWord")
+ checkIntent("the x the y", "oneWord")
+ checkIntent("the x the y the the", "oneWord")
+ }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsInsideSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsInsideSpec.scala
new file mode 100644
index 0000000..b51207c
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/stop/NCStopWordsInsideSpec.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.stop
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.jdk.CollectionConverters.CollectionHasAsScala
+import scala.language.implicitConversions
+
+/**
+ *
+ */
+class NCStopWordsInsideModel extends NCModelAdapter("nlpcraft.test", "Test Model", "1.0") {
+ override def getElements: util.Set[NCElement] = Set(NCTestElement("complex", "a b"))
+
+ @NCIntent("intent=i term={# == 'complex'}")
+ def onI(ctx: NCIntentMatch): NCResult = {
+ require(ctx.getContext.getVariants.size() == 1)
+ require(ctx.getContext.getVariants.asScala.head.asScala.size == 1)
+ require(ctx.getContext.getVariants.asScala.head.asScala.head.getNormalizedText == ctx.getContext.getRequest.getNormalizedText)
+
+ NCResult.text("OK")
+ }
+}
+
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCStopWordsInsideModel], startClient = true)
+class NCStopWordsInsideSpec extends NCTestContext {
+ @Test
+ def test(): Unit = {
+ checkIntent("a b", "i")
+ checkIntent("a the b", "i")
+ checkIntent("a , b", "i")
+ checkIntent("a, b", "i")
+ checkIntent("a, the b", "i")
+ checkIntent("a, the, b", "i")
+ }
+}
+
+class NCStopWordsInsideSparseModel extends NCStopWordsInsideModel {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
+@NCTestEnvironment(model = classOf[NCStopWordsInsideSparseModel], startClient = true)
+class NCStopWordsInsideSparseSpec extends NCStopWordsInsideSpec {
+ @Test
+ def test2(): Unit = {
+ // TODO: extend it.
+ }
+}
+
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy/readme.txt b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy/readme.txt
new file mode 100644
index 0000000..32b1050
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy/readme.txt
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+Do not add new classes here. This package scanned by tests models.
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/NCElementAnnotationsSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/NCElementAnnotationsSpec.scala
new file mode 100644
index 0000000..88ff99f
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/NCElementAnnotationsSpec.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.deploy1
+
+import org.apache.nlpcraft.model.{NCElement, NCAddElement, NCAddElementClass, NCIntent, NCModelAdapter, NCModelAddClasses, NCModelAddPackage, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+
+@NCAddElement("""{ "id": "e5" }""")
+@NCAddElement("""{ "id": "e6", "synonyms": ["e66"] }""")
+class NCElementAnnotations1 {
+ @NCAddElement("""{ "id": "e7" }""")
+ @NCAddElement("""{ "id": "e8" }""")
+ def x(): Unit = ()
+}
+
+class NCElementAnn1 extends NCElement {
+ override def getId: String = "e12"
+}
+/**
+ *
+ */
+@NCModelAddClasses(Array{classOf[NCElementAnnotations1]})
+@NCModelAddPackage(Array("org.apache.nlpcraft.probe.mgrs.deploy1.pack"))
+@NCAddElement("""{ "id": "e3" }""")
+@NCAddElement("""{ "id": "e4" }""")
+class NCElementAnnotationsSpecModel extends NCModelAdapter("nlpcraft.intents.idl.test", "IDL Test Model", "1.0") {
+ override def getElements: util.Set[NCElement] = Set(NCTestElement("e1"))
+
+ @NCIntent("intent=onE1 term={# == 'e1'}")
+ def onE1(): NCResult = NCResult.text("OK")
+
+ @NCAddElement("""{ "id": "e2" }""")
+ @NCIntent("intent=onE2 term={# == 'e2'}")
+ def onE2(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE3 term={# == 'e3'}")
+ def onE3(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE4 term={# == 'e4'}")
+ def onE4(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE5 term={# == 'e5'}")
+ def onE5(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE6 term={# == 'e6'}")
+ def onE6(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE7 term={# == 'e7'}")
+ def onE7(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE8 term={# == 'e8'}")
+ def onE8(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE9 term={# == 'e9'}")
+ def onE9(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE10 term={# == 'e10'}")
+ def onE10(): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=onE11 term={# == 'e11'}")
+ def onE11(): NCResult = NCResult.text("OK")
+
+ @NCAddElementClass(classOf[NCElementAnn1])
+ @NCIntent("intent=onE12 term={# == 'e12'}")
+ def onE12(): NCResult = NCResult.text("OK")
+}
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCElementAnnotationsSpecModel], startClient = true)
+class NCElementAnnotationsSpec extends NCTestContext {
+ /**
+ *
+ */
+ @Test
+ def test(): Unit = {
+ // Defined in model.
+ checkIntent("e1", "onE1")
+
+ // Added via annotation to model method.
+ checkIntent("e2", "onE2")
+
+ // Added via annotation to model class.
+ checkIntent("e3", "onE3")
+ checkIntent("e4", "onE4")
+
+ // Added via annotation to class and methods, where class added via NCModelAddClasses.
+ // Multiple annotation tested.
+ // Complex JSON tested.
+ checkIntent("e5", "onE5")
+ checkIntent("e66", "onE6")
+ checkIntent("e7", "onE7")
+ checkIntent("e8", "onE8")
+
+ // Added via annotation to class and method, where class added via NCModelAddPackage.
+ // Complex YAML tested.
+ checkIntent("e9", "onE9")
+ checkIntent("e101", "onE10")
+ checkIntent("e11", "onE11")
+
+ // Added via class annotation (second approach).
+ checkIntent("e12", "onE12")
+ }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/pack/NCElementAnnotations2.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/pack/NCElementAnnotations2.scala
new file mode 100644
index 0000000..a3e3d0d
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/pack/NCElementAnnotations2.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.deploy1.pack
+
+import org.apache.nlpcraft.model.NCAddElement
+
+@NCAddElement("""{ "id": "e9" }""")
+@NCAddElement(
+ """---
+ id: "e10"
+ synonyms:
+ - "e100"
+ - "e101"
+ """
+)
+class NCElementAnnotations2 {
+ @NCAddElement("""{ "id": "e11" }""")
+ def x(): Unit = ()
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/readme.txt b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/readme.txt
new file mode 100644
index 0000000..32b1050
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/deploy1/readme.txt
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+Do not add new classes here. This package scanned by tests models.
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
index 503e093..3aee776 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
@@ -68,12 +68,6 @@
lim(text = "handful of", limit = 5, index = 1, note = "A", asc = false),
usr(text = "A", id = "A"),
usr(text = "B", id = "B")
- ),
- Seq(
- nlp("handful"),
- nlp("of"),
- usr(text = "A", id = "A"),
- usr(text = "B", id = "B")
)
)
)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 4d5d991..8b25e87 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -94,8 +94,7 @@
),
_ => checkExists(
"y the y",
- usr(text = "y y", id = "y3"),
- nlp(text = "the", isStop = true)
+ usr(text = "y the y", id = "y3")
),
_ => checkExists(
"y xxx y",
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
index 27082f1..758171f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -17,17 +17,15 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
-import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCModelAdapter, NCResult}
import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
import org.junit.jupiter.api.Test
import java.util
import scala.jdk.CollectionConverters.SetHasAsJava
-/**
- * Nested Elements test model.
- */
-class NCNestedTestModel41 extends NCModelAdapter("nlpcraft.nested4.test.mdl", "Nested Test Model", "1.0") {
+// It shouldn't be too slow.
+class NCNestedTestModel4Adapter extends NCModelAdapter("nlpcraft.nested4.test.mdl", "Nested Test Model", "1.0") {
override def getElements: util.Set[NCElement] =
Set(
NCTestElement("e1", "//[a-zA-Z0-9]+//"),
@@ -36,16 +34,22 @@
override def getAbstractTokens: util.Set[String] = Set("e1").asJava
override def getEnabledBuiltInTokens: util.Set[String] = Set.empty[String].asJava
+}
- @NCIntent("intent=onE2 term(t1)={# == 'e2'}[8, 100]")
- def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+/**
+ * Greedy(one element expected) + not permuted.
+ */
+class NCNestedTestModel41 extends NCNestedTestModel4Adapter {
+ @NCIntent("intent=onE2 term(t1)={# == 'e2'}")
+ def onAB(): NCResult = NCResult.text("OK")
+ override def isGreedy: Boolean = true
override def isPermutateSynonyms: Boolean = false
override def isSparse: Boolean = false
}
/**
- * It shouldn't be too slow.
+ *
*/
@NCTestEnvironment(model = classOf[NCNestedTestModel41], startClient = true)
class NCEnricherNestedModelSpec41 extends NCTestContext {
@@ -53,17 +57,66 @@
def test(): Unit = checkIntent("the a " * 11, "onE2")
}
-class NCNestedTestModel42 extends NCNestedTestModel41 {
+/**
+ * Not-greedy(few elements expected) + not permuted.
+ */
+class NCNestedTestModel42 extends NCNestedTestModel4Adapter {
+ @NCIntent("intent=onE2 term(t1)={# == 'e2'}[3, 100]")
+ def onAB(): NCResult = NCResult.text("OK")
+
+ override def isGreedy: Boolean = false
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = false
+}
+
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel41], startClient = true)
+class NCEnricherNestedModelSpec42 extends NCTestContext {
+ @Test
+ def test(): Unit = checkIntent("the a " * 11, "onE2")
+}
+
+/**
+ * Greedy(one element expected) + permuted.
+ */
+class NCNestedTestModel43 extends NCNestedTestModel4Adapter {
+ @NCIntent("intent=onE2 term(t1)={# == 'e2'}[1, 100]")
+ def onAB(): NCResult = NCResult.text("OK")
+
+ override def isGreedy: Boolean = true
override def isPermutateSynonyms: Boolean = true
override def isSparse: Boolean = true
}
/**
- * It shouldn't be too slow.
+ *
*/
-@NCTestEnvironment(model = classOf[NCNestedTestModel42], startClient = true)
-class NCEnricherNestedModelSpec42 extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCNestedTestModel43], startClient = true)
+class NCEnricherNestedModelSpec43 extends NCTestContext {
@Test
- def test(): Unit = checkIntent("the a " * 8, "onE2")
+ def test(): Unit = checkIntent("the a " * 4, "onE2")
+}
+
+/**
+ * Not-greedy(few elements expected) + permuted.
+ */
+class NCNestedTestModel44 extends NCNestedTestModel4Adapter {
+ @NCIntent("intent=onE2 term(t1)={# == 'e2'}[3, 100]")
+ def onAB(): NCResult = NCResult.text("OK")
+
+ override def isGreedy: Boolean = false
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
+/**
+ *
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel44], startClient = true)
+class NCEnricherNestedModelSpec44 extends NCTestContext {
+ @Test
+ def test(): Unit = checkIntent("the a " * 4, "onE2")
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 228885d..7b8d858 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -224,8 +224,7 @@
_ => checkExists(
"sort A the A the A",
srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index = 1),
- usr("A A A", "wrapperA"),
- nlp("the the", isStop = true)
+ usr("A the A the A", "wrapperA")
)
)
}
diff --git a/pom.xml b/pom.xml
index dc05c48..bf178ba 100644
--- a/pom.xml
+++ b/pom.xml
@@ -155,7 +155,7 @@
<prometheus.ver>0.9.0</prometheus.ver>
<lightstep.ver>0.0.3</lightstep.ver>
<lightstep.grpc.ver>0.15.8</lightstep.grpc.ver>
- <junit.ver>5.5.1</junit.ver>
+ <junit.ver>5.7.2</junit.ver>
<jsonpath.ver>2.4.0</jsonpath.ver>
<!-- Force specific encoding on text resources. -->
@@ -636,6 +636,7 @@
<profile>
<id>examples</id>
<modules>
+ <module>nlpcraft-examples/cargps</module>
<module>nlpcraft-examples/alarm</module>
<module>nlpcraft-examples/echo</module>
<module>nlpcraft-examples/helloworld</module>