blob: d0004e8ee350d106e8ca8525db46f89495a23797 [file] [log] [blame]
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!doctype html><html lang="en"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="An open source API to convert natural language into actions."><meta name="author" content="NLPCraft."><title>Apache NLPCraft - Natural Language Interface</title><link href="//netdna.bootstrapcdn.com/bootstrap/4.1.0/css/bootstrap.min.css" rel="stylesheet"><link href="//use.fontawesome.com/releases/v5.7.1/css/all.css" integrity="sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr" rel="stylesheet" crossorigin="anonymous"><link href="/ext/syntaxhighlighter/styles/shCoreNLPCraft.css" rel="stylesheet" type="text/css"><link href="/ext/syntaxhighlighter/styles/shThemeNLPCraft.css" rel="stylesheet" type="text/css"><link href="//fonts.googleapis.com/css?family=Amatic+SC|Roboto+Mono" rel="stylesheet"> <script src="/ext/syntaxhighlighter/scripts/XRegExp.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shCore.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shBrushXml.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shBrushPlain.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shBrushJava.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shBrushScala.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shBrushPython.js" type="text/javascript"></script> <script src="/ext/syntaxhighlighter/scripts/shBrushJScript.js" type="text/javascript"></script> <script async defer src="https://buttons.github.io/buttons.js"></script><link rel="stylesheet" type="text/css" href="/assets/css/style.css"/> <script type="text/javascript" src="//use.typekit.net/pso2adz.js"></script> <script type="text/javascript"> try { Typekit.load(); } catch(e) { // Ignore. } </script><nav class="navbar navbar-expand-lg navbar-light bg-light" id="top-header"> <a class="navbar-brand mr-4" href="/index.html"> <img src="/images/nlpcraft_logo_white.gif" height="24px"> </a> <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation"> <span class="navbar-toggler-icon"></span> </button><div class="collapse navbar-collapse" id="navbarSupportedContent"><ul class="navbar-nav mr-auto"><li class="nav-item"> <a class="nav-link" href="/index.html#features">Features</a><li class="nav-item"> <a class="nav-link" href="/docs.html">Docs</a><li class="nav-item"> <a class="nav-link" href="/download.html">Downloads <i class="fas fa-download"></i></a><li class="nav-item"> <a class="nav-link" href="/community.html">Community</a><li class="nav-item"> <a class="nav-link" href="/use-cases.html">Use Cases</a></ul><ul class="navbar-nav ml-auto"><li class="nav-item mr-2"> <a class="nav-link" href="/download.html">v.0.5.0</a><li class="nav-item"> <a class="nav-link fork-link" target="github" href="https://github.com/apache/incubator-nlpcraft">GitHub <img height="20px" src="/images/github_logo_white.png"></a></ul></div></nav><div class="container-fluid"><div class="navbar-aligned"><ol class="breadcrumb"><li class="mr-1"><a href="/index.html">Home</a><li class="mr-1 active">Basic Concepts</ol><h1 class="page-title"> <span><i class="fas fa-fw fa-book"></i> Basic Concepts</span></h1><div class="three-cols-container"><div class="col-md-2 first-column"><ul class="side-nav"><li class="side-nav-title">Introduction<li> <a href="/docs.html">Overview</a><li> <a href="/installation.html">Installation</a><li> <a href="/getting-started.html">Getting Started</a><li class="side-nav-title">Developer Guide<li> <a class="active" href="/basic-concepts.html">Basic Concepts</a><li> <a href="/first-example.html">First Example</a><li> <a href="/data-model.html">Data Model</a><li> <a href="/intent-matching.html">Intent Matching</a><li> <a href="/using-rest.html">REST API</a><li> <a href="/server-and-probe.html">Server <span class="amp">&amp;</span> Probe</a><li> <a href="/metrics-and-tracing.html">Metrics <span class="amp">&amp;</span> Tracing</a><li> <a href="/integrations.html">Integrations</a><li class="side-nav-title">Examples<li> <a href="/examples/alarm_clock.html">Alarm Clock</a><li> <a href="/examples/light_switch.html">Light Switch</a></ul></div><div id="basic-concepts" class="col-md-8 second-column"><section id="overview"><h2 class="section-title">Basic Concepts</h2><p> Below we’ll cover some of the key concepts that are important for NLPCraft:</p><ul><li><a href="#model">Data Model</a><li><a href="#ne">Named Entities</a><li><a href="#intent">Intent Matching</a><li><a href="#stm">Conversation <span class="amp">&amp;</span> STM</a></ul></section><section id="model"><h3 class="section-sub-title">Data Model</h3><p> Data model is a central concept in NLPCraft. It defines natural language interface to your public or private data sources like on-premise database or a cloud SaaS application. NLPCraft employs <em>model-as-a-code</em> approach where entire data model is an implementation of <a target="javadoc" href="/apis/latest/org/apache/nlpcraft/model/NCModel.html">NCModel</a> interface which can be developed using any JVM programming language like Java, Scala, Kotlin, or Groovy.</p><p> A data model defines:</p><ul><li>Set of model <a href="data-model.html">elements</a> (a.k.a. <em>named entities</em>) to be detected in the user input.<li>How to query a particular data source based on detected named entities.<li>Common model configuration and <a href="data-model.html">life-cycle</a> callbacks.</ul><p> Note that model-as-a-code approach allows you to use any software lifecycle tools and frameworks like various build tools, CI/SCM tools, IDEs, etc. to develop and maintain your data model. You don't have to use additional web-based tools to manage some aspects of your data models - your entire model and all of its components are part of your project source code.</p><p> Read more about data models <a href="data-model.html">here</a>.</p></section><section id="ne"><h3 class="section-sub-title">Named Entities</h3><p> Named entity, also known as a model element or a token, is main a component defined by the NLPCraft data model. A named entity is one or more individual words that have a consistent semantic meaning and typically denote a real-world object, such as persons, locations, number, date and time, organizations, products, etc. Such object can be abstract or have a physical existence.</p><p> For example, in the following sentence:</p><p> <i class="fa fa-fw fa-angle-right"></i><code>Meeting is set for 12pm today in San Francisco.</code></p><p> the following named entities can be detected:</p><table class="gradient-table"><thead><tr><th>Words<th>Type<th>Normalized Value<tbody><tr><td><code>Meeting</code><td>CUSTOM_OBJ<td>meeting<tr><td><code>set</code><td>CUSTOM_ACT<td>set<tr><td><code>12pm today</code><td>DATE_TIME<td>12:00 September 1, 2019 GMT<tr><td><code>San Francisco</code><td>GEO_CITY<td>San Francisco, CA USA</table><p> In most cases named entities will have associated <em>normalized value</em>. It is especially important for named entities that have many different notational forms such as time and date, currency, geographical locations, etc. For example, <code>New York</code>, <code>New York City</code> and <code>NYC</code> all refer to the same "New York City, NY USA" location which is a valid normalized form.</p><p> The process of detecting named entities is called Named Entity Recognition (NER). There are many different ways of how a certain named entity can be detected: through list of synonyms, by name, rule-based or by using statistical techniques like neural networks with large corpus of predefined data. NLPCraft allows you define named entities through powerful DSL and also supports named entities that can be composed from other named entities including named entities from external projects such OpenNLP, spaCy or Stanford CoreNLP.</p><p> Named entities allow you to abstract from basic linguistic forms like nouns and verbs to deal with the higher level semantic abstractions like geographical location or time when you are trying to understand the meaning of the sentence. One of the main goals of named entities is to act as an input ingredients for intent matching.</p><p> Read more in-depth about named entities <a href="data-model.html">here</a>.</p></section><section id="intent"><h3 class="section-sub-title">Intent Matching</h3><p> You can think of intent matching as regular expression matching where instead of characters you deal with detected named entities. Intent defines a pattern in terms of detected named entities (or tokens) and a callback to call when submitted sentence matches that pattern.</p><p> Intents can also match on the <em>dialog flow</em> additionally to the matching on the current user sentence. Dialog flow matching means matching an intent based on what intents were matched previously for the same user and data model, i.e. the flow of the dialog. Note that you should not confuse dialog flow intent matching with conversational STM that is used to fill in missing tokens from memory.</p><div class="bq success"><p> You can think of NLPCraft data model as a mechanism to define named entities and intents that use these named entities to pattern match the user input.</p></div><p> Learn more details about intent matching <a href="intent-matching.html">here</a>.</p></section><section id="stm"><h3 class="section-sub-title">Conversation <span class="amp">&amp;</span> STM</h3><p> NLPCraft provides automatic conversation management right out of the box. Conversation management is based on the idea of short-term-memory (STM). STM is automatically maintained by NLPCraft per each user and data model. Essentially, NLPCraft "remembers" the context of the conversation and can supply the currently missing elements from its memory (i.e. from STM). STM implementation is also fully integrated with intent matching.</p><p> Maintaining conversation state is necessary for effective context resolution, so that users could ask, for example, the following sequence of questions using example weather model:</p><dl class="stm-example"><dd><i class="fa fa-fw fa-angle-right"></i>What’s the weather in London today?<dt><p> User gets the current London’s weather. STM is empty at this moment so NLPCraft expects to get all necessary information from the user sentence. Meaningful parts of the sentence get stored in STM.</p><div class="stm-state"><div class="stm"> <label>STM Before:</label> <span>&nbsp;</span></div><div class="stm"> <label>STM After:</label> <span>weather</span> <span>London</span> <span>today</span></div></div><dd><i class="fa fa-fw fa-angle-right"></i>And what about Berlin?<dt><p> User gets the current Berlin’s weather. The only useful data in the user sentence is name of the city <code>Berlin</code>. But since NLPCraft now has data from the previous question in its STM it can safely deduce that we are asking about <code>weather</code> for <code>today</code>. <code>Berlin</code> overrides <code>London</code> in STM.</p><div class="stm-state"><div class="stm"> <label>STM Before:</label> <span>weather</span> <span>London</span> <span>today</span></div><div class="stm"> <label>STM After:</label> <span>weather</span> <span><b>Berlin</b></span> <span>today</span></div></div><dd><i class="fa fa-fw fa-angle-right"></i>Next week forecast?<dt><p> User gets the next week forecast for Berlin. Again, the only useful data in the user sentence is <code>next week</code> and <code>forecast</code>. STM supplies <code>Berlin</code>. <code>Next week</code> override <code>today</code>, and <code>forecast</code> override <code>weather</code> in STM.</p><div class="stm-state"><div class="stm"> <label>STM Before:</label> <span>weather</span> <span>Berlin</span> <span>today</span></div><div class="stm"> <label>STM After:</label> <span><b>forecast</b></span> <span>Berlin</span> <span><b>Next week</b></span></div></div></dl><p> Note that STM is maintained per user and per data model. Conversation management implementation is also smart enough to clear STM after certain period of time, i.e. it “forgets” the conversational context after few minutes of inactivity. Note also that conversational context can also be cleared explicitly via <a href="https://nlpcraft.docs.apiary.io" target="apiary">REST API</a>.</p></section></div><div class="col-md-2 third-column"><ul class="side-nav"><li class="side-nav-title">On This Page<li><a href="#model">Data Model</a><li><a href="#ne">Named Entities</a><li><a href="#intent">Intent Matching</a><li><a href="#stm">Conversation <span class="amp">&amp;</span> STM</a><li class="side-nav-title">Quick Links<li> <i class="fab fa-fw fa-github mr-2"></i><a target="github" href="https://github.com/apache/incubator-nlpcraft/tree/master/src/main/scala/org/apache/nlpcraft/examples">Examples</a><li> <i class="fab fa-fw fa-java mr-2"></i><a target="_" href="/apis/latest/index.html">Javadoc</a><li> <i class="fas fa-fw fa-code mr-2"></i><a href="https://github.com/apache/incubator-nlpcraft/blob/master/openapi/nlpcraft_swagger.yml" target="github">REST API</a><li> <i class="fas fa-fw fa-download mr-2"></i><a href="/download.html">Download</a><li class="side-nav-title">Support<li> <nobr> <i class="fab fa-fw fa-jira mr-2"></i><a target="jira" href="https://issues.apache.org/jira/projects/NLPCRAFT/issues">JIRA</a> </nobr><li> <nobr> <i class="far fa-fw fa-envelope mr-2"></i><a href="http://mail-archives.apache.org/mod_mbox/nlpcraft-dev/">Dev List</a> </nobr><li> <nobr> <i class="fab fa-fw fa-stack-overflow mr-2"></i><a target="so" href="https://stackoverflow.com/questions/ask">Stack Overflow</a> </nobr><li> <nobr> <i class="fab fa-fw fa-github mr-2"></i><a target="github" href="https://github.com/apache/incubator-nlpcraft">GitHub</a> </nobr></ul></div></div></div></div><div id="footer"><div class="container"><div class="text-muted text-center"> <span>Copyright &copy; 2020 Apache Software Foundation</span> <span> <a target=_new href="https://apache.org"><img alt="asf" src="/images/asf_logo.png" height="24px"></a> </span> <a target="asf" href="http://apache.org/foundation/policies/privacy.html" class="btn btn-link ml-4">Privacy</a> <span class="sep"></span> <a href="/index.html#news" class="btn btn-link">News</a> <span class="sep"></span> <a href="/docs.html" class="btn btn-link">Docs</a> <span class="ml-4">release: <a href="/download.html"><code>0.5.0</code></a></span> <span class="ml-2"> <a target="jenkins" href="https://builds.apache.org/view/Incubator%20Projects/job/incubator-nlpcraft/"><img src="https://img.shields.io/jenkins/build?jobUrl=https%3A%2F%2Fbuilds.apache.org%2Fview%2FIncubator%2520Projects%2Fjob%2Fincubator-nlpcraft%2F"></a> <a target=_ href="https://gitter.im/apache-nlpcraft/community"><img alt="Gitter" src="https://badges.gitter.im/apache-nlpcraft/community.svg"></a> </span></div></div></div><script src="//code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script> <script src="//cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.0/umd/popper.min.js" integrity="sha384-cs/chFZiN24E4KMATLdqdvsezGxaGsi4hLGOzlXwp5UZB1LY//20VyM2taTB4QvJ" crossorigin="anonymous"></script> <script src="//stackpath.bootstrapcdn.com/bootstrap/4.1.0/js/bootstrap.min.js" integrity="sha384-uefMccjFJAIv6A+rW+L4AHf99KvxDjWSu1z9VI8SKNVmz4sk7buKt/6v9KI65qnm" crossorigin="anonymous"></script> <script src="//cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.4/lodash.min.js" type="text/javascript" ></script> <script src="//cdnjs.cloudflare.com/ajax/libs/moment.js/2.12.0/moment.min.js" type="text/javascript" ></script> <script src="//cdnjs.cloudflare.com/ajax/libs/moment-timezone/0.5.5/moment-timezone-with-data.min.js" type="text/javascript" ></script> <script type="text/javascript"> SyntaxHighlighter.defaults["auto-links"] = false; SyntaxHighlighter.defaults["tab-size"] = 2; SyntaxHighlighter.all(); </script>