src/main/java/org/apache/nlpcraft/client/NCModelInfo.java - incubator-nlpcraft-java-client - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.nlpcraft.client;

 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 /**
  * A model view descriptor.
  *
  * @see NCClient#getModelInfo(String)
  * @see NCModel
  */
 public interface NCModelInfo {
     /**
      * Gets unique, <i>immutable</i> ID of this model.
      * <p>
      * Note that <b>model IDs are immutable</b> while name and version
      * can be changed freely. Changing model ID is equal to creating a completely new model.
      * Model IDs (unlike name and version) are not exposed to the end user and only serve a
      * technical purpose. ID's max length is 32 characters.
      *
      * @return Unique, <i>immutable</i> ID of this model.
      */
     String getId();

     /**
      * Gets descriptive name of this model. Name's max length is 64 characters.
      *
      * @return Descriptive name for this model.
      */
     String getName();

     /**
      * Gets the version of this model using semantic versioning. Version's max length is 16 characters.
      *
      * @return A version compatible with (<a href="http://www.semver.org">www.semver.org</a>) specification.
      */
     String getVersion();

     /**
      * Gets optional short model description. This can be displayed by the management tools.
      *
      * @return Optional short model description.
      */
     String getDescription();

     /**
      * Gets the origin of this model like name of the class, file path or URL.
      *
      * @return Origin of this model like name of the class, file path or URL.
      */
     String getOrigin();

     /**
      * Gets maximum number of unknown words until automatic rejection. An unknown word is a word
      * that is not part of Princeton WordNet database. If you expect a very formalized and well-defined
      * input without uncommon slang and abbreviations you can set this to a small number
      * like one or two. However, in most cases we recommend leaving it as or set it to a larger
      * number like five or more.
      *
      * @return Maximum number of unknown words until automatic rejection.
      */
     int getMaxUnknownWords();

     /**
      * Gets maximum number of free words until automatic rejection. A free word is a known word that is
      * not part of any recognized token. In other words, a word that is present in the user input
      * but won't be used to understand its meaning. Setting it to a non-zero risks the misunderstanding
      * of the user input, while setting it to zero often makes understanding logic too rigid. In most
      * cases we recommend setting to between one and three. If you expect the user input to contain
      * many <i>noisy</i> idioms, slang or colloquials - you can set it to a larger number.
      *
      * @return Maximum number of free words until automatic rejection.
      */
     int getMaxFreeWords();

     /**
      * Gets maximum number of suspicious words until automatic rejection. A suspicious word is a word
      * that is defined by the model that should not appear in a valid user input under no circumstances.
      * A typical example of suspicious words would be words "sex" or "porn" when processing
      * queries about children books. In most cases this should be set to zero (default) to automatically
      * reject any such suspicious words in the user input.
      *
      * @return Maximum number of suspicious words until automatic rejection.
      */
     int getMaxSuspiciousWords();

     /**
      * Gets minimum word count (<i>including</i> stopwords) below which user input will be automatically
      * rejected as too short. In almost all cases this value should be greater than or equal to one.
      *
      * @return Minimum word count (<i>including</i> stopwords) below which user input will be automatically
      * rejected as too short.
      */
     int getMinWords();

     /**
      * Gets maximum word count (<i>including</i> stopwords) above which user input will be automatically
      * rejected as too long. In almost all cases this value should be greater than or equal to one.
      *
      * @return Maximum word count (<i>including</i> stopwords) above which user input will be automatically
      * rejected as too long.
      */
     int getMaxWords();

     /**
      * Gets minimum number of all tokens (system and user defined) below which user input will be
      * automatically rejected as too short. In almost all cases this value should be greater than or equal to one.
      *
      * @return Minimum number of all tokens.
      */
     int getMinTokens();

     /**
      * Gets maximum number of all tokens (system and user defined) above which user input will be
      * automatically rejected as too long. Note that sentences with large number of token can result
      * in significant processing delay and substantial memory consumption.
      *
      * @return Maximum number of all tokens.
      */
     int getMaxTokens();

     /**
      * Gets minimum word count (<i>excluding</i> stopwords) below which user input will be automatically rejected
      * as ambiguous sentence.
      *
      * @return Minimum word count (<i>excluding</i> stopwords) below which user input will be automatically
      * rejected as too short.
      */
     int getMinNonStopwords();

     /**
      * Whether to allow non-English language in user input.
      * Currently, only English language is supported. However, model can choose whether
      * to automatically reject user input that is detected to be a non-English. Note that current
      * algorithm only works reliably on longer user input (10+ words). On short sentences it will
      * often produce an incorrect result.
      *
      * @return Whether to allow non-English language in user input.
      */
     boolean isNonEnglishAllowed();

     /**
      * Whether to allow non-Latin charset in user input. Currently, only
      * Latin charset is supported. However, model can choose whether to automatically reject user
      * input with characters outside of Latin charset. If {@code false} such user input will be automatically
      * rejected.
      *
      * @return Whether to allow non-Latin charset in user input.
      */
     boolean isNotLatinCharsetAllowed();

     /**
      * Whether to allow known English swear words in user input. If {@code false} - user input with
      * detected known English swear words will be automatically rejected.
      *
      * @return Whether to allow known swear words in user input.
      */
     boolean isSwearWordsAllowed();

     /**
      * Whether to allow user input without a single noun. If {@code false} such user input
      * will be automatically rejected. Typically, for strict command or query-oriented models this should be set to
      * {@code false} as any command or query should have at least one noun subject. However, for conversational
      * models this can be set to {@code false} to allow for a smalltalk and one-liners.
      *
      * @return Whether to allow user input without a single noun.
      */
     boolean isNoNounsAllowed();

     /**
      * Whether to permutate multi-word synonyms. Automatic multi-word synonyms permutations greatly
      * increase the total number of synonyms in the system and allows for better multi-word synonym detection.
      * For example, if permutation is allowed the synonym "a b c" will be automatically converted into a
      * sequence of synonyms of "a b c", "b a c", "a c b". This property is closely related to {@link #isSparse()}
      * which are typically changed together. Note that individual model elements can override this property using
      * {@link NCElement#isPermutateSynonyms()} method.
      *
      * @return Whether to permutate multi-word synonyms.
      * @see NCElement#isPermutateSynonyms()
      * @see NCElement#isSparse()
      * @see #isSparse()
      */
     boolean isPermutateSynonyms();

     /**
      * Whether duplicate synonyms are allowed. If {@code true} - the model will pick the random
      * model element when multiple elements found due to duplicate synonyms. If {@code false} - model
      * will print error message and will not deploy.
      *
      * @return Whether to allow duplicate synonyms.
      */
     boolean isDupSynonymsAllowed();

     /**
      * Total number of synonyms allowed per model. Model won't deploy if total number of synonyms exceeds this
      * number.
      *
      * @return Total number of synonyms allowed per model.
      * @see #getMaxElementSynonyms()
      */
     int getMaxTotalSynonyms();

     /**
      * Whether to allow the user input with no user token detected. If {@code false} such user
      * input will be automatically rejected. Note that this property only applies to user-defined
      * token (i.e. model element). Even if there are no user defined tokens, the user input may still
      * contain system token like <code>nlpcraft:city</code> or <code>nlpcraft:date</code>. In many cases models
      * should be build to allow user input without user tokens. However, set it to {@code false} if presence
      * of at least one user token is mandatory.
      *
      * @return Whether to allow the user input with no user token detected.
      */
     boolean isNoUserTokensAllowed();

     /**
      * Whether this model elements allow non-stop words gaps in their multi-word synonyms.
      * This property is closely related to {@link #isPermutateSynonyms()} which are typically changed together.
      * Note that individual model elements can override this property using {@link NCElement#isSparse()}
      * method.
      *
      * @return Optional multi-word synonym sparsity model property.
      * @see NCElement#isSparse()
      * @see NCElement#isPermutateSynonyms()
      * @see #isPermutateSynonyms()
      */
     boolean isSparse();

     /**
      * Gets optional user defined model metadata that can be set by the developer and accessed later.
      * By default, it returns an empty map. Note that this metadata is mutable and can be
      * changed at runtime by the model's code.
      *
      * @return Optional user defined model metadata. By default, returns an empty map. Never returns {@code null}.
      */
     Map<String, Object> getMetadata();

     /**
      * Gets an optional list of stopwords to add to the built-in ones.
      * <p>
      * Stopword is an individual word (i.e. sequence of characters excluding whitespaces) that contribute no
      * semantic meaning to the sentence. For example, 'the', 'wow', or 'hm' provide no semantic meaning to the
      * sentence and can be safely excluded from semantic analysis.
      * <p>
      * NLPCraft comes with a carefully selected list of English stopwords which should be sufficient
      * for a majority of use cases. However, you can add additional stopwords to this list. The typical
      * use for user-defined stopwords are jargon parasite words that are specific to the model's domain.
      *
      * @return Potentially empty list of additional stopwords.
      */
     Set<String> getAdditionalStopWords();

     /**
      * Gets an optional list of stopwords to exclude from the built-in list of stopwords.
      * <p>
      * Just like you can add additional stopwords via {@link #getAdditionalStopWords()} you can exclude
      * certain words from the list of stopwords. This can be useful in rare cases when built-in
      * stopword has specific meaning of your model. In order to process them you need to exclude them
      * from the list of stopwords.
      *
      * @return Potentially empty list of excluded stopwords.
      */
     Set<String> getExcludedStopWords();

     /**
      * Gets an optional list of suspicious words. A suspicious word is a word that generally should not appear in user
      * sentence when used with this model. For example, if a particular model is for children oriented book search,
      * the words "sex" and "porn" should probably NOT appear in the user input and can be automatically rejected
      * when added here and model's metadata {@code MAX_SUSPICIOUS_WORDS} property set to zero.
      *
      * @return Potentially empty list of suspicious words in their lemma form.
      */
     Set<String> getSuspiciousWords();

     /**
      * Gets an optional map of macros to be used in this model. Macros and option groups are instrumental
      * in defining model's elements.
      *
      * @return Potentially empty map of macros.
      */
     Map<String, String> getMacros();

     /**
      * Gets a set of model elements or named entities. Model can have zero or more user defined elements.
      *
      * @return Set of model elements, potentially empty.
      */
     Set<NCElement> getElements();

     /**
      * Gets a set of IDs for built-in named entities (tokens) that should be enabled and detected for this model.
      * Unless model requests (i.e. enables) the built-in tokens in this method the NLP subsystem will not attempt
      * to detect them. Explicit enablement of the token significantly improves the overall performance by avoiding
      * unnecessary token detection. Note that you don't have to specify your own user elements here as they are
      * always enabled.
      *
      * @return Set of built-in tokens, potentially empty but never {@code null}, that should be enabled
      *      and detected for this model.
      */
     Set<String> getEnabledBuiltInTokens();

     /**
      * Gets s set of named entities (token) IDs that will be considered as abstract tokens.
      * An abstract token is only detected when it is either a constituent part of some other non-abstract token
      * or referenced by built-in tokens. In other words, an abstract token will not be detected in a standalone
      * unreferenced position. By (unless returned by this method), all named entities considered to be
      * non-abstract.
      * <p>
      * Declaring tokens as abstract is important to minimize number of parsing variants automatically
      * generated as permutation of all possible parsing compositions. For example, if it is known that a particular
      * named entity will only be used as a constituent part of some other token - declaring such named entity as
      * abstract can significantly reduce the number of parsing variants leading to a better performance,
      * and often simpler corresponding intent definition and callback logic.
      *
      * @return Set of abstract token IDs. Can be empty but never {@code null}.
      */
     Set<String> getAbstractTokens();

     /**
      * Gets maximum number of unique synonyms per model element after which either warning or error will be
      * triggered. Note that there is no technical limit on how many synonyms a model element can have apart
      * from memory consumption and performance considerations. However, in cases where synonyms are auto-generated
      * (i.e. from database) this property can serve as a courtesy notification that a model element has too many
      * synonyms. Also, in general, too many synonyms can potentially lead to a performance degradation.
      *
      * @return Maximum number of unique synonyms per model element after which either warning or
      *      error will be triggered.
      * @see #isMaxSynonymsThresholdError()
      * @see #getMaxTotalSynonyms()
      */
     int getMaxElementSynonyms();

     /**
      * Whether exceeding {@link #getMaxElementSynonyms()} will trigger a warning log or throwing an exception.
      * Note that throwing exception will prevent data probe from starting.
      *
      * @return Whether exceeding {@link #getMaxElementSynonyms()} will trigger a warning log or
      *      throwing an exception.
      * @see #getMaxElementSynonyms()
      */
     boolean isMaxSynonymsThresholdError();

     /**
      * Gets timeout in ms after which the unused conversation element is automatically "forgotten".
      * <p>
      * Just like in a normal human conversation if we talk about, say, "Chicago", and then don't mention it
      * for certain period of time during further dialog, the conversation participants subconsciously "forget"
      * about it and exclude it from conversation context. In other words, the term "Chicago" is no longer in
      * conversation's short-term-memory.
      * <p>
      * Note that both conversation timeout and {@link #getConversationDepth() depth}
      * combined define the expiration policy for the conversation management. These two properties allow fine-tuning
      * for different types of dialogs. For example, setting longer timeout and smaller depth mimics
      * slow-moving but topic-focused conversation. Alternatively, settings shorter timeout and longer depth better
      * supports fast-moving wide-ranging conversation that may cover multiple topics.
      *
      * @return Timeout in ms after which the unused conversation element is automatically "forgotten".
      * @see #getConversationDepth()
      */
     long getConversationTimeout();

     /**
      * Gets maximum number of requests after which the unused conversation element is automatically "forgotten".
      * <p>
      * Just like in a normal human conversation if we talk about, say, "Chicago", and then don't mention it
      * for a certain number of utterances during further dialog, the conversation participants subconsciously "forget"
      * about it and exclude it from conversation context. In other words, the term "Chicago" is no longer in
      * conversation's short-term-memory.
      * <p>
      * Note that both conversation {@link #getConversationTimeout() timeout} and depth
      * combined define the expiration policy for the conversation management. These two properties allow fine-tuning
      * for different types of dialogs. For example, setting longer timeout and smaller depth mimics
      * slow-moving but topic-focused conversation. Alternatively, settings shorter timeout and longer depth better
      * supports fast-moving wide-ranging conversation that may cover multiple topics.
      *
      * @return Maximum number of requests after which the unused conversation element is automatically "forgotten".
      * @see #getConversationTimeout()
      */
     int getConversationDepth();

     /**
      * Gets an optional map of restricted named entity combinations (linkage). Returned map is a map of entity ID to a set
      * of other entity IDs, with each key-value pair defining the restricted combination. Restricting certain entities
      * from being linked (or referenced) by some other entities allows reducing "wasteful" parsing variant
      * generation. For example, if we know that entity with ID "adjective" cannot be sorted, we can restrict it
      * from being linked with <code>nlpcraft:limit</code> and <code>nlpcraft:sort</code> entities to reduce the
      * amount of parsing variants being generated.
      * <p>
      * Only the following built-in entities can be restricted (i.e., to be the keys in the returned map):
      * <ul>
      *     <li><code>nlpcraft:limit</code></li>
      *     <li><code>nlpcraft:sort</code></li>
      *     <li><code>nlpcraft:relation</code></li>
      * </ul>
      * Note that entity cannot be restricted to itself (entity ID cannot appear as key as well as a
      * part of the value's set).
      *
      * @return Optional map of restricted named entity combinations. Can be empty but never {@code null}.
      */
     Map<String, Set<String>> getRestrictedCombinations();
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.nlpcraft.client;

	import java.util.Collections;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;

	/**
	* A model view descriptor.
	*
	* @see NCClient#getModelInfo(String)
	* @see NCModel
	*/
	public interface NCModelInfo {
	/**
	* Gets unique, <i>immutable</i> ID of this model.
	* <p>
	* Note that <b>model IDs are immutable</b> while name and version
	* can be changed freely. Changing model ID is equal to creating a completely new model.
	* Model IDs (unlike name and version) are not exposed to the end user and only serve a
	* technical purpose. ID's max length is 32 characters.
	*
	* @return Unique, <i>immutable</i> ID of this model.
	*/
	String getId();

	/**
	* Gets descriptive name of this model. Name's max length is 64 characters.
	*
	* @return Descriptive name for this model.
	*/
	String getName();

	/**
	* Gets the version of this model using semantic versioning. Version's max length is 16 characters.
	*
	* @return A version compatible with (<a href="http://www.semver.org">www.semver.org</a>) specification.
	*/
	String getVersion();

	/**
	* Gets optional short model description. This can be displayed by the management tools.
	*
	* @return Optional short model description.
	*/
	String getDescription();

	/**
	* Gets the origin of this model like name of the class, file path or URL.
	*
	* @return Origin of this model like name of the class, file path or URL.
	*/
	String getOrigin();

	/**
	* Gets maximum number of unknown words until automatic rejection. An unknown word is a word
	* that is not part of Princeton WordNet database. If you expect a very formalized and well-defined
	* input without uncommon slang and abbreviations you can set this to a small number
	* like one or two. However, in most cases we recommend leaving it as or set it to a larger
	* number like five or more.
	*
	* @return Maximum number of unknown words until automatic rejection.
	*/
	int getMaxUnknownWords();

	/**
	* Gets maximum number of free words until automatic rejection. A free word is a known word that is
	* not part of any recognized token. In other words, a word that is present in the user input
	* but won't be used to understand its meaning. Setting it to a non-zero risks the misunderstanding
	* of the user input, while setting it to zero often makes understanding logic too rigid. In most
	* cases we recommend setting to between one and three. If you expect the user input to contain
	* many <i>noisy</i> idioms, slang or colloquials - you can set it to a larger number.
	*
	* @return Maximum number of free words until automatic rejection.
	*/
	int getMaxFreeWords();

	/**
	* Gets maximum number of suspicious words until automatic rejection. A suspicious word is a word
	* that is defined by the model that should not appear in a valid user input under no circumstances.
	* A typical example of suspicious words would be words "sex" or "porn" when processing
	* queries about children books. In most cases this should be set to zero (default) to automatically
	* reject any such suspicious words in the user input.
	*
	* @return Maximum number of suspicious words until automatic rejection.
	*/
	int getMaxSuspiciousWords();

	/**
	* Gets minimum word count (<i>including</i> stopwords) below which user input will be automatically
	* rejected as too short. In almost all cases this value should be greater than or equal to one.
	*
	* @return Minimum word count (<i>including</i> stopwords) below which user input will be automatically
	* rejected as too short.
	*/
	int getMinWords();

	/**
	* Gets maximum word count (<i>including</i> stopwords) above which user input will be automatically
	* rejected as too long. In almost all cases this value should be greater than or equal to one.
	*
	* @return Maximum word count (<i>including</i> stopwords) above which user input will be automatically
	* rejected as too long.
	*/
	int getMaxWords();

	/**
	* Gets minimum number of all tokens (system and user defined) below which user input will be
	* automatically rejected as too short. In almost all cases this value should be greater than or equal to one.
	*
	* @return Minimum number of all tokens.
	*/
	int getMinTokens();

	/**
	* Gets maximum number of all tokens (system and user defined) above which user input will be
	* automatically rejected as too long. Note that sentences with large number of token can result
	* in significant processing delay and substantial memory consumption.
	*
	* @return Maximum number of all tokens.
	*/
	int getMaxTokens();

	/**
	* Gets minimum word count (<i>excluding</i> stopwords) below which user input will be automatically rejected
	* as ambiguous sentence.
	*
	* @return Minimum word count (<i>excluding</i> stopwords) below which user input will be automatically
	* rejected as too short.
	*/
	int getMinNonStopwords();

	/**
	* Whether to allow non-English language in user input.
	* Currently, only English language is supported. However, model can choose whether
	* to automatically reject user input that is detected to be a non-English. Note that current
	* algorithm only works reliably on longer user input (10+ words). On short sentences it will
	* often produce an incorrect result.
	*
	* @return Whether to allow non-English language in user input.
	*/
	boolean isNonEnglishAllowed();

	/**
	* Whether to allow non-Latin charset in user input. Currently, only
	* Latin charset is supported. However, model can choose whether to automatically reject user
	* input with characters outside of Latin charset. If {@code false} such user input will be automatically
	* rejected.
	*
	* @return Whether to allow non-Latin charset in user input.
	*/
	boolean isNotLatinCharsetAllowed();

	/**
	* Whether to allow known English swear words in user input. If {@code false} - user input with
	* detected known English swear words will be automatically rejected.
	*
	* @return Whether to allow known swear words in user input.
	*/
	boolean isSwearWordsAllowed();

	/**
	* Whether to allow user input without a single noun. If {@code false} such user input
	* will be automatically rejected. Typically, for strict command or query-oriented models this should be set to
	* {@code false} as any command or query should have at least one noun subject. However, for conversational
	* models this can be set to {@code false} to allow for a smalltalk and one-liners.
	*
	* @return Whether to allow user input without a single noun.
	*/
	boolean isNoNounsAllowed();

	/**
	* Whether to permutate multi-word synonyms. Automatic multi-word synonyms permutations greatly
	* increase the total number of synonyms in the system and allows for better multi-word synonym detection.
	* For example, if permutation is allowed the synonym "a b c" will be automatically converted into a
	* sequence of synonyms of "a b c", "b a c", "a c b". This property is closely related to {@link #isSparse()}
	* which are typically changed together. Note that individual model elements can override this property using
	* {@link NCElement#isPermutateSynonyms()} method.
	*
	* @return Whether to permutate multi-word synonyms.
	* @see NCElement#isPermutateSynonyms()
	* @see NCElement#isSparse()
	* @see #isSparse()
	*/
	boolean isPermutateSynonyms();

	/**
	* Whether duplicate synonyms are allowed. If {@code true} - the model will pick the random
	* model element when multiple elements found due to duplicate synonyms. If {@code false} - model
	* will print error message and will not deploy.
	*
	* @return Whether to allow duplicate synonyms.
	*/
	boolean isDupSynonymsAllowed();

	/**
	* Total number of synonyms allowed per model. Model won't deploy if total number of synonyms exceeds this
	* number.
	*
	* @return Total number of synonyms allowed per model.
	* @see #getMaxElementSynonyms()
	*/
	int getMaxTotalSynonyms();

	/**
	* Whether to allow the user input with no user token detected. If {@code false} such user
	* input will be automatically rejected. Note that this property only applies to user-defined
	* token (i.e. model element). Even if there are no user defined tokens, the user input may still
	* contain system token like <code>nlpcraft:city</code> or <code>nlpcraft:date</code>. In many cases models
	* should be build to allow user input without user tokens. However, set it to {@code false} if presence
	* of at least one user token is mandatory.
	*
	* @return Whether to allow the user input with no user token detected.
	*/
	boolean isNoUserTokensAllowed();

	/**
	* Whether this model elements allow non-stop words gaps in their multi-word synonyms.
	* This property is closely related to {@link #isPermutateSynonyms()} which are typically changed together.
	* Note that individual model elements can override this property using {@link NCElement#isSparse()}
	* method.
	*
	* @return Optional multi-word synonym sparsity model property.
	* @see NCElement#isSparse()
	* @see NCElement#isPermutateSynonyms()
	* @see #isPermutateSynonyms()
	*/
	boolean isSparse();

	/**
	* Gets optional user defined model metadata that can be set by the developer and accessed later.
	* By default, it returns an empty map. Note that this metadata is mutable and can be
	* changed at runtime by the model's code.
	*
	* @return Optional user defined model metadata. By default, returns an empty map. Never returns {@code null}.
	*/
	Map<String, Object> getMetadata();

	/**
	* Gets an optional list of stopwords to add to the built-in ones.
	* <p>
	* Stopword is an individual word (i.e. sequence of characters excluding whitespaces) that contribute no
	* semantic meaning to the sentence. For example, 'the', 'wow', or 'hm' provide no semantic meaning to the
	* sentence and can be safely excluded from semantic analysis.
	* <p>
	* NLPCraft comes with a carefully selected list of English stopwords which should be sufficient
	* for a majority of use cases. However, you can add additional stopwords to this list. The typical
	* use for user-defined stopwords are jargon parasite words that are specific to the model's domain.
	*
	* @return Potentially empty list of additional stopwords.
	*/
	Set<String> getAdditionalStopWords();

	/**
	* Gets an optional list of stopwords to exclude from the built-in list of stopwords.
	* <p>
	* Just like you can add additional stopwords via {@link #getAdditionalStopWords()} you can exclude
	* certain words from the list of stopwords. This can be useful in rare cases when built-in
	* stopword has specific meaning of your model. In order to process them you need to exclude them
	* from the list of stopwords.
	*
	* @return Potentially empty list of excluded stopwords.
	*/
	Set<String> getExcludedStopWords();

	/**
	* Gets an optional list of suspicious words. A suspicious word is a word that generally should not appear in user
	* sentence when used with this model. For example, if a particular model is for children oriented book search,
	* the words "sex" and "porn" should probably NOT appear in the user input and can be automatically rejected
	* when added here and model's metadata {@code MAX_SUSPICIOUS_WORDS} property set to zero.
	*
	* @return Potentially empty list of suspicious words in their lemma form.
	*/
	Set<String> getSuspiciousWords();

	/**
	* Gets an optional map of macros to be used in this model. Macros and option groups are instrumental
	* in defining model's elements.
	*
	* @return Potentially empty map of macros.
	*/
	Map<String, String> getMacros();

	/**
	* Gets a set of model elements or named entities. Model can have zero or more user defined elements.
	*
	* @return Set of model elements, potentially empty.
	*/
	Set<NCElement> getElements();

	/**
	* Gets a set of IDs for built-in named entities (tokens) that should be enabled and detected for this model.
	* Unless model requests (i.e. enables) the built-in tokens in this method the NLP subsystem will not attempt
	* to detect them. Explicit enablement of the token significantly improves the overall performance by avoiding
	* unnecessary token detection. Note that you don't have to specify your own user elements here as they are
	* always enabled.
	*
	* @return Set of built-in tokens, potentially empty but never {@code null}, that should be enabled
	* and detected for this model.
	*/
	Set<String> getEnabledBuiltInTokens();

	/**
	* Gets s set of named entities (token) IDs that will be considered as abstract tokens.
	* An abstract token is only detected when it is either a constituent part of some other non-abstract token
	* or referenced by built-in tokens. In other words, an abstract token will not be detected in a standalone
	* unreferenced position. By (unless returned by this method), all named entities considered to be
	* non-abstract.
	* <p>
	* Declaring tokens as abstract is important to minimize number of parsing variants automatically
	* generated as permutation of all possible parsing compositions. For example, if it is known that a particular
	* named entity will only be used as a constituent part of some other token - declaring such named entity as
	* abstract can significantly reduce the number of parsing variants leading to a better performance,
	* and often simpler corresponding intent definition and callback logic.
	*
	* @return Set of abstract token IDs. Can be empty but never {@code null}.
	*/
	Set<String> getAbstractTokens();

	/**
	* Gets maximum number of unique synonyms per model element after which either warning or error will be
	* triggered. Note that there is no technical limit on how many synonyms a model element can have apart
	* from memory consumption and performance considerations. However, in cases where synonyms are auto-generated
	* (i.e. from database) this property can serve as a courtesy notification that a model element has too many
	* synonyms. Also, in general, too many synonyms can potentially lead to a performance degradation.
	*
	* @return Maximum number of unique synonyms per model element after which either warning or
	* error will be triggered.
	* @see #isMaxSynonymsThresholdError()
	* @see #getMaxTotalSynonyms()
	*/
	int getMaxElementSynonyms();

	/**
	* Whether exceeding {@link #getMaxElementSynonyms()} will trigger a warning log or throwing an exception.
	* Note that throwing exception will prevent data probe from starting.
	*
	* @return Whether exceeding {@link #getMaxElementSynonyms()} will trigger a warning log or
	* throwing an exception.
	* @see #getMaxElementSynonyms()
	*/
	boolean isMaxSynonymsThresholdError();

	/**
	* Gets timeout in ms after which the unused conversation element is automatically "forgotten".
	* <p>
	* Just like in a normal human conversation if we talk about, say, "Chicago", and then don't mention it
	* for certain period of time during further dialog, the conversation participants subconsciously "forget"
	* about it and exclude it from conversation context. In other words, the term "Chicago" is no longer in
	* conversation's short-term-memory.
	* <p>
	* Note that both conversation timeout and {@link #getConversationDepth() depth}
	* combined define the expiration policy for the conversation management. These two properties allow fine-tuning
	* for different types of dialogs. For example, setting longer timeout and smaller depth mimics
	* slow-moving but topic-focused conversation. Alternatively, settings shorter timeout and longer depth better
	* supports fast-moving wide-ranging conversation that may cover multiple topics.
	*
	* @return Timeout in ms after which the unused conversation element is automatically "forgotten".
	* @see #getConversationDepth()
	*/
	long getConversationTimeout();

	/**
	* Gets maximum number of requests after which the unused conversation element is automatically "forgotten".
	* <p>
	* Just like in a normal human conversation if we talk about, say, "Chicago", and then don't mention it
	* for a certain number of utterances during further dialog, the conversation participants subconsciously "forget"
	* about it and exclude it from conversation context. In other words, the term "Chicago" is no longer in
	* conversation's short-term-memory.
	* <p>
	* Note that both conversation {@link #getConversationTimeout() timeout} and depth
	* combined define the expiration policy for the conversation management. These two properties allow fine-tuning
	* for different types of dialogs. For example, setting longer timeout and smaller depth mimics
	* slow-moving but topic-focused conversation. Alternatively, settings shorter timeout and longer depth better
	* supports fast-moving wide-ranging conversation that may cover multiple topics.
	*
	* @return Maximum number of requests after which the unused conversation element is automatically "forgotten".
	* @see #getConversationTimeout()
	*/
	int getConversationDepth();

	/**
	* Gets an optional map of restricted named entity combinations (linkage). Returned map is a map of entity ID to a set
	* of other entity IDs, with each key-value pair defining the restricted combination. Restricting certain entities
	* from being linked (or referenced) by some other entities allows reducing "wasteful" parsing variant
	* generation. For example, if we know that entity with ID "adjective" cannot be sorted, we can restrict it
	* from being linked with <code>nlpcraft:limit</code> and <code>nlpcraft:sort</code> entities to reduce the
	* amount of parsing variants being generated.
	* <p>
	* Only the following built-in entities can be restricted (i.e., to be the keys in the returned map):
	* <ul>
	* <li><code>nlpcraft:limit</code></li>
	* <li><code>nlpcraft:sort</code></li>
	* <li><code>nlpcraft:relation</code></li>
	* </ul>
	* Note that entity cannot be restricted to itself (entity ID cannot appear as key as well as a
	* part of the value's set).
	*
	* @return Optional map of restricted named entity combinations. Can be empty but never {@code null}.
	*/
	Map<String, Set<String>> getRestrictedCombinations();
	}