blob: db41dd09be4398bd854bcb4a21d4c327f7d32a4f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.model.intent.impl
import java.util.function.Function
import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.ascii.NCAsciiTable
import org.apache.nlpcraft.common.debug.{NCLogGroupToken, NCLogHolder}
import org.apache.nlpcraft.common.opencensus.NCOpenCensusTrace
import org.apache.nlpcraft.model.intent.utils.{NCDslFlowItem, NCDslIntent, NCDslTerm}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.model.impl.NCTokenLogger
import org.apache.nlpcraft.probe.mgrs.dialogflow.NCDialogFlowManager
import collection.convert.ImplicitConversions._
import scala.collection.mutable
/**
* Intent solver that finds the best matching intent given user sentence.
*/
object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
private class Weight extends Ordered[Weight] {
private val weights: Array[Int] = new Array(3)
/**
*
* @param w0
* @param w1
* @param w2
*/
def this(w0: Int, w1: Int, w2: Int) = {
this()
weights(0) = w0
weights(1) = w1
weights(2) = w2
}
/**
* Sets specific weight at a given index.
*
* @param idx
* @param w
*/
def setWeight(idx: Int, w: Int): Unit =
weights(idx) = w
/**
*
* @param that
* @return
*/
def ++=(that: Weight): Weight = {
for (i ← 0 until 3)
this.setWeight(i, this.weights(i) + that.weights(i))
this
}
/**
*
* @param that
* @return
*/
override def compare(that: Weight): Int = {
var res = 0
for ((i1, i2)this.weights.zip(that.weights) if res == 0)
res = Integer.compare(i1, i2)
res
}
def get: Seq[Int] = weights.toSeq
override def toString: String = s"Weight (${weights.mkString(", ")})"
}
/**
*
* @param used
* @param token
*/
private case class UsedToken(
var used: Boolean,
var conv: Boolean,
token: NCToken
)
/**
* @param termId
* @param usedTokens
* @param weight
*/
private case class TermMatch(
termId: String,
usedTokens: List[UsedToken],
weight: Weight
) {
lazy val maxIndex: Int = usedTokens.maxBy(_.token.index).token.index
}
/**
*
* @param termId
* @param usedTokens
*/
private case class TermTokensGroup(
termId: String,
usedTokens: List[UsedToken]
)
/**
*
* @param tokenGroups
* @param weight
* @param intent
* @param exactMatch
*/
private case class IntentMatch(
tokenGroups: List[TermTokensGroup],
weight: Weight,
intent: NCDslIntent,
exactMatch: Boolean
)
/**
* Main entry point for intent engine.
*
* @param ctx Query context.
* @param intents Set of intents to match for.
* @param logHldr Log holder.
* @return
*/
@throws[NCE]
def solve(
ctx: NCContext,
intents: List[(NCDslIntent/*Intent*/, NCIntentMatchNCResult)/*Callback*/],
logHldr: NCLogHolder
): List[NCIntentSolverResult] = {
case class MatchHolder(
intentMatch: IntentMatch, // Match.
callback: Function[NCIntentMatch, NCResult], // Callback function.
variant: NCIntentSolverVariant, // Variant used for the match.
variantIdx: Int // Variant index.
)
val req = ctx.getRequest
startScopedSpan("solve",
"srvReqId" → req.getServerRequestId,
"userId" → req.getUser.getId,
"mdlId" → ctx.getModel.getId,
"normText" → req.getNormalizedText) { _ ⇒
val matches = mutable.ArrayBuffer.empty[MatchHolder]
// Find all matches across all intents and sentence variants.
for ((vrn, vrnIdx) ← ctx.getVariants.zipWithIndex) {
val availToks = vrn.filter(t ⇒ !t.isStopWord)
matches.appendAll(
intents.flatMap(pair ⇒ {
val intent = pair._1
val callback = pair._2
// Isolated sentence tokens.
val senToks = Seq.empty[UsedToken] ++ availToks.map(UsedToken(false, false, _))
val senTokGroups = availToks.map(t ⇒ if (t.getGroups != null) t.getGroups.sorted else Seq.empty)
// Isolated conversation tokens.
val convToks =
if (intent.terms.exists(_.isConversational))
Seq.empty[UsedToken] ++
// We shouldn't mix tokens with same group from conversation
// history and processed sentence.
ctx.getConversation.getTokens.
filter(t ⇒ {
val convTokGroups = t.getGroups.sorted
!senTokGroups.exists(convTokGroups.containsSlice)
}).
map(UsedToken(used = false, conv = true, _))
else
Seq.empty[UsedToken]
// Solve intent in isolation.
solveIntent(ctx, intent, senToks, convToks, vrnIdx) match {
case Some(intentMatch)Some(MatchHolder(intentMatch, callback, NCIntentSolverVariant(vrn), vrnIdx))
case NoneNone
}
})
)
}
val sorted =
matches.sortWith((m1: MatchHolder, m2: MatchHolder)
// 1. First with maximum weight.
m1.intentMatch.weight.compare(m2.intentMatch.weight) match {
case x1 if x1 < 0false
case x1 if x1 > 0true
case x1 ⇒
require(x1 == 0)
// 2. First with maximum variant.
m1.variant.compareTo(m2.variant) match {
case x2 if x2 < 0false
case x2 if x2 > 0true
case x2 ⇒
require(x2 == 0)
def calcHash(m: MatchHolder): Int =
m.variant.tokens.map(t ⇒
s"${t.getId}${t.getGroups}${t.getValue}${t.normText}"
).mkString("").hashCode
// Order doesn't make sense here.
// It is just to provide deterministic result for the matches with the same weight.
calcHash(m1) > calcHash(m2)
}
}
)
if (sorted.nonEmpty) {
val tbl = NCAsciiTable("Variant", "Intent", "Term Tokens")
sorted.foreach(m ⇒ {
val im = m.intentMatch
if (m == sorted.head)
tbl += (
Seq(
s"#${m.variantIdx + 1}",
r("'best match'")
),
Seq(
im.intent.id,
r("'best match'")
),
mkPickTokens(im)
)
else
tbl += (
s"#${m.variantIdx + 1}",
im.intent.id,
mkPickTokens(im)
)
if (logHldr != null)
logHldr.addIntent(
im.intent.id,
im.exactMatch,
im.weight.get,
im.tokenGroups.map(g ⇒
(if (g.termId == null) "" else g.termId)
g.usedTokens.map(t ⇒ NCLogGroupToken(t.token, t.conv, t.used))
).toMap
)
})
tbl.info(logger, Some(s"Found matching intents (sorted ${r("best")} to worst):"))
}
else
logger.info("No matching intent found.")
sorted.map(m ⇒
NCIntentSolverResult(
m.intentMatch.intent.id,
m.callback,
m.intentMatch.tokenGroups.map(grp ⇒ NCIntentTokensGroup(grp.termId, grp.usedTokens.map(_.token))),
m.intentMatch.exactMatch,
m.variant,
m.variantIdx
)
).toList
}
}
/**
*
* @param im
* @return
*/
private def mkPickTokens(im: IntentMatch): List[String] = {
val buf = mutable.ListBuffer.empty[String]
buf += im.intent.toString
var grpIdx = 0
for (grp ← im.tokenGroups) {
val termId = if (grp.termId == null) s"#$grpIdx" else s"'${grp.termId}'"
buf += s" Term $termId"
grpIdx += 1
if (grp.usedTokens.nonEmpty) {
var tokIdx = 0
for (tok ← grp.usedTokens) {
val conv = if (tok.conv) "(conv) " else ""
buf += s" #$tokIdx: $conv${tok.token}"
tokIdx += 1
}
}
else
buf += " <empty>"
}
buf.toList
}
/**
*
* @param flow
* @param hist
* @return
*/
private[impl] def matchFlow(flow: Array[NCDslFlowItem], hist: Seq[String]): Boolean = {
var flowIdx = 0
var histIdx = 0
var abort = false
while (flowIdx < flow.length && !abort) {
val item = flow(flowIdx)
val intents = item.intents
val min = item.min
val max = item.max
var i = 0
// Check min first.
while (i < min && histIdx < hist.length && !abort) {
abort = !intents.contains(hist(histIdx))
histIdx += 1
i += 1
}
if (!abort && i < min)
abort = true // Need at least min.
if (!abort) {
var ok = true
// Grab up until max, if available.
while (i < max && histIdx < hist.length && ok) {
ok = intents.contains(hist(histIdx))
if (ok)
histIdx += 1
i += 1
}
}
flowIdx += 1
}
!abort
}
/**
*
* @param intent
* @param senToks
* @param convToks
* @return
*/
private def solveIntent(
ctx: NCContext,
intent: NCDslIntent,
senToks: Seq[UsedToken],
convToks: Seq[UsedToken],
varIdx: Int
): Option[IntentMatch] = {
val intentId = intent.id
val hist = NCDialogFlowManager.getDialogFlow(ctx.getRequest.getUser.getId, ctx.getModel.getId)
val varStr = s"(variant #${varIdx + 1})"
// Check dialog flow first.
if (!intent.flow.isEmpty && !matchFlow(intent.flow, hist)) {
logger.info(s"Intent '$intentId' didn't match because of dialog flow $varStr.")
None
}
else {
val intentW = new Weight()
val intentGrps = mutable.ListBuffer.empty[TermTokensGroup]
var abort = false
val ordered = intent.ordered
var lastTermMatch: TermMatch = null
// Check terms.
for (term ← intent.terms if !abort) {
solveTerm(
term,
senToks,
if (term.isConversational) convToks else Seq.empty
) match {
case Some(termMatch)
if (ordered && lastTermMatch != null && lastTermMatch.maxIndex > termMatch.maxIndex)
abort = true
else {
// Term is found.
// Add its weight and grab its tokens.
intentW ++= termMatch.weight
intentGrps += TermTokensGroup(termMatch.termId, termMatch.usedTokens)
lastTermMatch = termMatch
}
case None
// Term is missing. Stop further processing for this intent.
// This intent cannot be matched.
logger.trace(s"Term '$term' is missing for intent '$intentId' (stopping further processing).")
abort = true
}
}
if (abort) {
logger.info(s"Intent '$intentId' didn't match because of unmatched term $varStr.")
None
}
else if (senToks.exists(tok ⇒ !tok.used && tok.token.isUserDefined)) {
logger.info(s"Intent '$intentId' didn't match because of remaining unused user tokens $varStr.")
NCTokenLogger.prepareTable(senToks.filter(tok ⇒ !tok.used && tok.token.isUserDefined).map(_.token)).
info(
logger,
Some(s"Unused user tokens for intent '$intentId' $varStr:")
)
None
}
else if (!senToks.exists(tok ⇒ tok.used && !tok.conv)) {
logger.info(s"Intent '$intentId' didn't match because all its matched tokens came from STM $varStr.")
None
}
else {
// Exact match calculation DOES NOT include tokens from conversation, if any.
val exactMatch = !senToks.exists(tok ⇒ !tok.used && !tok.token.isFreeWord)
val mainWeight = {
// Best weight if the match is exact and conversation WAS NOT used.
if (exactMatch && convToks.isEmpty)
2
// Second best weight if the match is exact and conversation WAS used.
else if (exactMatch)
1
// Third best (i.e. worst) weight if match WAS NOT EXACT.
else
0
}
intentW.setWeight(0, mainWeight)
Some(IntentMatch(
tokenGroups = intentGrps.toList,
weight = intentW,
intent = intent,
exactMatch = exactMatch
))
}
}
}
/**
*
* @param term
* @param convToks
* @param senToks
* @return
*/
@throws[NCE]
private def solveTerm(
term: NCDslTerm,
senToks: Seq[UsedToken],
convToks: Seq[UsedToken]
): Option[TermMatch] = {
var termToks = List.empty[UsedToken]
var termWeight = new Weight()
solvePredicate(term.getPredicate, term.getMin, term.getMax, senToks, convToks) match {
case Some(t)
termToks = termToks ::: t._1
termWeight ++= t._2
Some(TermMatch(term.getId, termToks, termWeight))
case None
None
}
}
/**
*
* @param pred
* @param min
* @param max
* @param senToks
* @param convToks
* @return
*/
@throws[NCE]
private def solvePredicate(
pred: Function[NCToken, java.lang.Boolean],
min: Int,
max: Int,
senToks: Seq[UsedToken],
convToks: Seq[UsedToken]
): Option[(List[UsedToken], Weight)] = {
// Algorithm is "hungry", i.e. it will fetch all tokens satisfying item's predicate
// in entire sentence even if these tokens are separated by other already used tokens
// and conversation will be used only to get to the 'max' number of the item.
var combToks = List.empty[UsedToken]
var predW = 0
/**
*
* @param from Collection to collect tokens from.
* @param maxLen Maximum number of tokens to collect.
*/
def collect(from: Iterable[UsedToken], maxLen: Int): Unit =
for (tok ← from.filter(!_.used) if combToks.lengthCompare(maxLen) < 0) {
if (pred.apply(tok.token)) {
combToks :+= tok
predW += 1
}
}
// Collect to the 'max', if possible.
collect(senToks, max)
collect(convToks, max)
if (combToks.lengthCompare(min) < 0) // We couldn't collect even 'min' tokens.
None
else if (combToks.isEmpty) { // Item is optional and no tokens collected (valid result).
require(min == 0)
Some(combToks → new Weight())
}
else { // We've collected some tokens.
// Youngest first.
val convSrvReqIds = convToks.map(_.token.getServerRequestId).distinct
// Specificity weight ('1' if conversation wasn't used, -'index of conversation depth' if wasn't).
// (It is better to be not from conversation or be youngest tokens from conversation)
val convW = -combToks.map(t ⇒ convSrvReqIds.indexOf(t.token.getServerRequestId)).sum
combToks.foreach(_.used = true) // Mark tokens as used.
Some(combToks → new Weight(0/* set later */, convW, predW))
}
}
}