Update NCToken.java

commit: 14c8ef19508af6b040e39bdfef67634fd908cf21 [log] [tgz]
author: Aaron Radzinski <aradiznski@apache.org> Tue Mar 23 18:39:26 2021 -0700
committer: Aaron Radzinski <aradiznski@apache.org> Tue Mar 23 18:39:26 2021 -0700
tree: ba90ae2756fdddf664d10633783f6c4216ae9ae6
parent: aa4b13124e84367d109db8e277f2771bf7059cbd [diff]
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index a362992..38afc21 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java

@@ -236,6 +236,7 @@
      * <pre class="brush: java">
      *     return meta("nlpcraft:nlp:stopword");
      * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
      * 
      * @return Whether or not this token is a stopword.
      */
@@ -251,6 +252,7 @@
      * <pre class="brush: java">
      *     return meta("nlpcraft:nlp:freeword");
      * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
      *
      * @return Whether or not this token is a freeword.
      */
@@ -265,6 +267,7 @@
      * <pre class="brush: java">
      *     return meta("nlpcraft:nlp:origtext");
      * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
      *
      * @return Original user input text for this token.
      */
@@ -279,6 +282,7 @@
      * <pre class="brush: java">
      *     return meta("nlpcraft:nlp:index");
      * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
      *
      * @return Index of this token in the sentence.
      */
@@ -287,6 +291,161 @@
     }
 
     /**
+     * A shortcut method that gets normalized user input text for this token.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:normtext");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Normalized user input text for this token.
+     */
+    default String getNormalizedText() { return meta("nlpcraft:nlp:normtext"); }
+
+    /**
+     * A shortcut method on whether or not this token was matched on direct (not permutated) synonym.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:direct");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Whether or not this token was matched on direct (not permutated) synonym.
+     */
+    default boolean isDirect() { return meta("nlpcraft:nlp:direct"); }
+
+    /**
+     * A shortcut method on whether this token represents an English word. Note that this only
+     * checks that token's text consists of characters of English alphabet, i.e. the text
+     * doesn't have to be necessary a known valid English word.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:english");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Whether this token represents an English word.
+     */
+    default boolean isEnglish() { return meta("nlpcraft:nlp:english"); }
+
+    /**
+     * A shortcut method on whether or not this token is a swear word. NLPCraft has built-in list of
+     * common English swear words.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:swear");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Whether or not this token is a swear word.
+     */
+    default boolean isSwear() { return meta("nlpcraft:nlp:swear"); }
+
+    /**
+     * A shortcut method on whether or not this token is surrounded by single or double quotes.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:quoted");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Whether or not this token is surrounded by single or double quotes.
+     */
+    default boolean isQuoted() { return meta("nlpcraft:nlp:quoted"); }
+
+    /**
+     * A shortcut method on whether or not this token is surrounded by any of '[', ']', '{', '}', '(', ')' brackets.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:bracketed");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Whether or not this token is surrounded by any of '[', ']', '{', '}', '(', ')' brackets.
+     */
+    default boolean isBracketed() { return meta("nlpcraft:nlp:bracketed"); }
+
+    /**
+     * A shortcut method on whether or not this token is found in Princeton WordNet database.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:dict");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Whether or not this token is found in Princeton WordNet database.
+     */
+    default boolean isWordnet() { return meta("nlpcraft:nlp:dict"); }
+
+    /**
+     * A shortcut method to get lemma of this token, i.e. a canonical form of this word. Note that
+     * stemming and lemmatization allow to reduce inflectional forms and sometimes derivationally related
+     * forms of a word to a common base form. Lemmatization refers to the use of a vocabulary and
+     * morphological analysis of words, normally aiming to remove inflectional endings only and to
+     * return the base or dictionary form of a word, which is known as the lemma.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:lemma");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Lemma of this token, i.e. a canonical form of this word.
+     */
+    default String getLemma() { return meta("nlpcraft:nlp:lemma"); }
+
+    /**
+     * A shortcut method to get stem of this token. Note that stemming and lemmatization allow to reduce
+     * inflectional forms and sometimes derivationally related forms of a word to a common base form.
+     * Unlike lemma, stemming is a basic heuristic process that chops off the ends of words in the
+     * hope of achieving this goal correctly most of the time, and often includes the removal of derivational affixes.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:stem");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Stem of this token.
+     */
+    default String getStem() { return meta("nlpcraft:nlp:stem"); }
+
+    /**
+     * A shortcut method to get numeric value of how sparse the token is. Sparsity zero means that all
+     * individual words in the token follow each other.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:sparsity");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Numeric value of how sparse the token is.
+     */
+    default int getSparsity() { return meta("nlpcraft:nlp:sparsity"); }
+
+    /**
+     * A shortcut method to get Penn Treebank POS tag for this token. Note that additionally to standard Penn
+     * Treebank POS tags NLPCraft introduced '---' synthetic tag to indicate a POS tag for multiword tokens.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:pos");
+     * </pre>
+     * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
+     *
+     * @return Penn Treebank POS tag for this token.
+     */
+    default String getPos() { return meta("nlpcraft:nlp:pos"); }
+
+    /**
      * A shortcut method that gets internal globally unique system ID of the token.
      * <p>
      * This method is equivalent to:
commit	14c8ef19508af6b040e39bdfef67634fd908cf21	[log] [tgz]
author	Aaron Radzinski <aradiznski@apache.org>	Tue Mar 23 18:39:26 2021 -0700
committer	Aaron Radzinski <aradiznski@apache.org>	Tue Mar 23 18:39:26 2021 -0700
tree	ba90ae2756fdddf664d10633783f6c4216ae9ae6
parent	aa4b13124e84367d109db8e277f2771bf7059cbd [diff]