WIP.
diff --git a/custom-components.html b/custom-components.html
index 71cced3..9e0030e 100644
--- a/custom-components.html
+++ b/custom-components.html
@@ -24,13 +24,171 @@
<div class="col-md-8 second-column">
<section id="overview">
<h2 class="section-title">Custom components <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+
+ <p>
+ NlpCraft provides a numeric of useful built components for English language.
+ You can use them to prepare <code>Pipeline</code> for your <code>Model</code>.
+ You also can use provided wrappers on <a href="https://opennlp.apache.org/">Apache OpenNLP</a> and
+ <a href="https://nlp.stanford.edu/">Stanford NLP</a> projects NER components.
+ Their models work with English and some another languages.
+ </p>
+ <p>
+ But you can need to extend provided functionality and develop your own components.
+ Let's review these components step by step.
+ </p>
</section>
+ <section id="token-parser">
+ <h2 class="section-title">Token parser <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCTokenParser.html">NCTokenParser</a> trait.
+ </p>
+ <p>
+ It's not often situation when you need to prepare your own language tokenizer.
+ Mostly it can be necessary if you want to work with some new language.
+ You have to prepare new implementation once and can use it for all projects on this language.
+ Usually you just should find open source solution and wrap it for
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCTokenParser.html">NCTokenParser</a> trait.
+ </p>
+ <pre class="brush: scala, highlight: [2, 6]">
+ import org.apache.nlpcraft.*
+ import org.languagetool.tokenizers.fr.FrenchWordTokenizer
+ import scala.jdk.CollectionConverters.*
+
+ class NCFrTokenParser extends NCTokenParser:
+ private val tokenizer = new FrenchWordTokenizer
+
+ override def tokenize(text: String): List[NCToken] =
+ val toks = collection.mutable.ArrayBuffer.empty[NCToken]
+ var sumLen = 0
+
+ for ((word, idx) <- tokenizer.tokenize(text).asScala.zipWithIndex)
+ val start = sumLen
+ val end = sumLen + word.length
+
+ if word.strip.nonEmpty then
+ toks += new NCPropertyMapAdapter with NCToken:
+ override def getText: String = word
+ override def getIndex: Int = idx
+ override def getStartCharIndex: Int = start
+ override def getEndCharIndex: Int = end
+
+ sumLen = end
+
+ toks.toList
+ </pre>
+ <ul>
+ <li>
+ <code>NCFrTokenParser</code> is a simple wrapper which implements <code>NCTokenParser</code> based on
+ open source <a href="https://languagetool.org">Language Tool</a> library.
+ </li>
+ </ul>
+ </section>
+
+ <section id="token-enricher">
+ <h2 class="section-title">Token enricher <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCTokenEnricher.html">NCTokenEnricher</a> trait.
+ </p>
+ <p>
+ <a href="apis/latest/org/apache/nlpcraft/NCToken.html">NCToken</a> are used in
+ <a href="intent-matching.html">Intent matching</a>. NlpCraft provides a numeric of built token enricher
+ implementations for English language.
+ You may want to create your own or extends existing. Look at the following example:
+ </p>
+ <pre class="brush: scala, highlight: [25, 26]">
+ import org.apache.nlpcraft.*
+ import org.languagetool.AnalyzedToken
+ import org.languagetool.tagging.ru.RussianTagger
+ import scala.jdk.CollectionConverters.*
+
+ class NCRuLemmaPosTokenEnricher extends NCTokenEnricher:
+ private def nvl(v: String, dflt : => String): String = if v != null then v else dflt
+
+ override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
+ val tags = RussianTagger.INSTANCE.tag(toks.map(_.getText).asJava).asScala
+
+ require(toks.size == tags.size)
+
+ toks.zip(tags).foreach { case (tok, tag) =>
+ val readings = tag.getReadings.asScala
+
+ val (lemma, pos) = readings.size match
+ // No data. Lemma is word as is, POS is undefined.
+ case 0 => (tok.getText, "")
+ // Takes first. Other variants ignored.
+ case _ =>
+ val aTok: AnalyzedToken = readings.head
+ (nvl(aTok.getLemma, tok.getText), nvl(aTok.getPOSTag, ""))
+
+ tok.put("pos", pos)
+ tok.put("lemma", lemma)
+
+ () // Otherwise NPE.
+ }
+ </pre>
+ <ul>
+ <li>
+ <code>Lines 25 and 26</code> enriches <a href="apis/latest/org/apache/nlpcraft/NCToken.html">NCToken</a>
+ by two new properties which can be used for <a href="intent-matching.html">Intent matching</a> later.
+ </li>
+ </ul>
+ </section>
+
+ <section id="token-validator">
+ <h2 class="section-title">Token validator <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCTokenValidator.html">NCTokenValidator</a> trait.
+ </p>
+ </section>
+
+ <section id="entity-parser">
+ <h2 class="section-title">Entity parser <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCEntityParser.html">NCEntityParser</a> trait.
+ </p>
+ </section>
+
+ <section id="entity-enricher">
+ <h2 class="section-title">Entity enricher <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCEntityEnricher.html">NCEntityEnricher</a> trait.
+ </p>
+ </section>
+
+ <section id="entity-mapper">
+ <h2 class="section-title">Entity enricher<a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCEntityMapper.html">NCEntityMapper</a> trait.
+ </p>
+ </section>
+
+ <section id="entity-validator">
+ <h2 class="section-title">Entity validator<a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCEntityValidator.html">NCEntityValidator</a> trait.
+ </p>
+ </section>
+
+ <section id="variant-filter">
+ <h2 class="section-title">Variant filter<a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2>
+ <p>
+ You have to implement <a href="apis/latest/org/apache/nlpcraft/NCVariantFilter.html">NCVariantFilter</a> trait.
+ </p>
+ </section>
+
</div>
<div class="col-md-2 third-column">
<ul class="side-nav">
<li class="side-nav-title">On This Page</li>
<li><a href="#overview">Overview</a></li>
-
+ <li><a href="#token-parser">Token parser</a></li>
+ <li><a href="#token-enricher">Token enricher</a></li>
+ <li><a href="#token-validator">Token validator</a></li>
+ <li><a href="#entity-parser">Entity parser</a></li>
+ <li><a href="#entity-enricher">Entity enricher</a></li>
+ <li><a href="#entity-mapper">Entity mapper</a></li>
+ <li><a href="#entity-validator">Entity validator</a></li>
+ <li><a href="#variant-filter">Variant filter</a></li>
{% include quick-links.html %}
</ul>
</div>