blob: 670a4dca0f953770fd7a4756bf116cbac749752a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.server.nlp.enrichers.numeric
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp._
import org.apache.nlpcraft.common.nlp.numeric._
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
import scala.collection.mutable
/**
* Numeric enricher.
*/
object NCNumericEnricher extends NCServerEnricher {
val MAX_VALUE: Double = Double.MaxValue
val MIN_VALUE: Double = Double.MinValue
object T extends Enumeration {
type T = Value
val MORE: Value = Value
val MORE_OR_EQUAL: Value = Value
val LESS: Value = Value
val LESS_OR_EQUAL: Value = Value
val EQUAL: Value = Value
val NOT_EQUAL: Value = Value
val BETWEEN_INCLUSIVE: Value = Value
val BETWEEN_EXCLUSIVE: Value = Value
}
import T._
case class P(text: String, length: Int, prepositionType: T)
// Note, that some sequences duplicated (like '==' and '= =') to be independent from tokenizer.
// Note also that we account for frequent misspelling of 'then' vs. 'than'.
private val BEFORE_PREPS: Map[String, P] =
mkMap(Seq(
"more than",
"more then",
"more",
"greater than",
"greater then",
"greater",
"larger than",
"larger then",
"larger",
"bigger than",
"bigger then",
"bigger",
">"
), MORE) ++
mkMap(Seq(
"more or equal to",
"more or equal",
"more than or equal",
"more than or equal to",
"more then or equal",
"more then or equal to",
"greater or equal to",
"greater or equal",
"greater than or equal",
"greater than or equal to",
"greater then or equal",
"greater then or equal to",
"larger or equal to",
"larger or equal",
"larger than or equal",
"larger than or equal to",
"larger then or equal",
"larger then or equal to",
"bigger or equal to",
"bigger or equal",
"bigger than or equal",
"bigger than or equal to",
"bigger then or equal",
"bigger then or equal to",
"not less than",
"not less then",
"not less",
"no less than",
"no less then",
"no less",
">=",
"> ="
), MORE_OR_EQUAL) ++
mkMap(Seq(
"less than",
"less then",
"less",
"<"
), LESS) ++
mkMap(Seq(
"less or equal to",
"less than or equal to",
"less than or equal",
"less then or equal to",
"less then or equal",
"less or equal",
"smaller or equal to",
"smaller than or equal to",
"smaller than or equal",
"smaller then or equal to",
"smaller then or equal",
"smaller or equal",
"no more than",
"no more then",
"no more",
"no greater than",
"no greater then",
"no greater",
"not greater than",
"not greater then",
"not greater",
"<=",
"< ="
), LESS_OR_EQUAL) ++
mkMap(Seq(
"same as",
"equal to",
"equal",
"= =",
"==",
"="
), EQUAL) ++
mkMap(Seq(
"not",
"not equal to",
"not equal",
"not same as",
"!=",
"! =",
"<>",
"< >"
), NOT_EQUAL)
// Supported prepositions which contains one word only.
private val BETWEEN_PREPS: Map[(String, String), T] = Map(
("between", "and") -> BETWEEN_EXCLUSIVE,
("from", "to") -> BETWEEN_INCLUSIVE,
("since", "to") -> BETWEEN_INCLUSIVE,
("since", "till") -> BETWEEN_INCLUSIVE,
("from", "till") -> BETWEEN_INCLUSIVE
)
/**
*
* @param parent Optional parent span.
* @return
*/
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ =>
ackStarting()
ackStarted()
}
/**
*
* @param parent Optional parent span.
*/
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
ackStopping()
ackStopped()
}
private def mkMap(seq: Seq[String], c: T): Map[String, P] =
seq.map(s => s -> P(s, s.split(" ").length, c)).toMap
private def toString(seq: Seq[NCNlpSentenceToken], sep: String = " ", stem: Boolean = false) =
seq.map(t => if (stem) t.stem else t.normText).mkString(sep)
private def mkNotes(
toks: Seq[NCNlpSentenceToken],
from: Double,
fromIncl: Boolean,
fromFractional: Boolean,
to: Double,
toIncl: Boolean,
toFractional: Boolean,
unitDataOpt: Option[NCNumericUnitData],
): Seq[NCNlpSentenceNote] = {
val params =
mutable.ArrayBuffer.empty[(String, Any)] ++
Seq(
"from" -> from,
"fromIncl" -> fromIncl,
"to" -> to,
"toIncl" -> toIncl,
"isFractional" -> (fromFractional || toFractional),
"isRangeCondition" -> (from != to),
"isEqualCondition" -> (from == to && fromIncl && toIncl),
"isNotEqualCondition" -> (from == to && !fromIncl && !toIncl),
"isFromNegativeInfinity" -> (from == MIN_VALUE),
"isToPositiveInfinity" -> (to == MAX_VALUE)
)
def mkAndAssign(toks: Seq[NCNlpSentenceToken], typ: String, params: (String, Any)*):NCNlpSentenceNote = {
val note = NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*)
toks.foreach(_.add(note))
note
}
unitDataOpt match {
case Some(unitData) =>
def extend(): Seq[(String, Any)] = {
params += "unit" -> unitData.unit.name
params += "unitType" -> unitData.unit.unitType
params
}
if (unitData.tokens == toks)
Seq(mkAndAssign(toks, "nlpcraft:num", extend():_*))
else {
Seq(
mkAndAssign(
toks.filter(t => !unitData.tokens.contains(t)), "nlpcraft:num", params.clone():_*
),
mkAndAssign(toks, "nlpcraft:num", extend():_*)
)
}
case None => Seq(mkAndAssign(toks, "nlpcraft:num", params:_*))
}
}
/**
*
* @param ns NLP sentence to enrich.
* @param parent Optional parent span.
* @throws NCE
*/
@throws[NCE]
override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
require(isStarted)
startScopedSpan("enrich", parent, "srvReqId" -> ns.srvReqId, "txt" -> ns.text) { _ =>
val nums = NCNumericManager.find(ns)
val toksStopFree = ns.filter(!_.isStopWord)
val processed = mutable.Buffer.empty[NCNlpSentenceToken]
// Adds complex 'condition' notes.
// Note that all complex prepositions contains from single words only.
for (ps <- nums.sliding(2) if !processed.exists(p => ps.flatMap(_.tokens).contains(p)); p <- BETWEEN_PREPS) {
val num1 = ps.head
val num2 = ps.last
val ts1 = num1.tokens
val d1 = num1.value
val isF1 = num1.isFractional
val ts2 = num2.tokens
val d2 = num2.value
val isF2 = num2.isFractional
// Part of sentence started from first part of preposition.
val subLine = toString(toksStopFree.takeRight(toksStopFree.length - toksStopFree.indexOf(ts1.head) + 1))
// Line represents complex condition (from num1 to num2)
val condTxt = Seq(p._1._1, toString(ts1), p._1._2, toString(ts2)).mkString(" ")
if (subLine.startsWith(condTxt)) {
def getBefore(ts: Seq[NCNlpSentenceToken]): NCNlpSentenceToken = toksStopFree(toksStopFree.indexOf(ts.head) - 1)
val prepToks = Seq(getBefore(ts1)) ++ ts1 ++ Seq(getBefore(ts2)) ++ ts2
val badRange =
num1.unitData.isDefined &&
num2.unitData.isDefined &&
num1.unitData.get.unit != num2.unitData.get.unit
if (!badRange) {
val unit =
if (num1.unitData.isDefined && num2.unitData.isEmpty)
num1.unitData
else if (num1.unitData.isEmpty && num2.unitData.isDefined)
num2.unitData
else if (num1.unitData.isEmpty && num2.unitData.isEmpty)
None
else {
require(num1.unitData.get.unit == num2.unitData.get.unit)
Some(NCNumericUnitData(num1.unitData.get.unit, num1.tokens ++ num2.tokens))
}
val notes = p._2 match {
case BETWEEN_EXCLUSIVE =>
mkNotes(
prepToks,
d1,
fromIncl = false,
fromFractional = isF1,
to = d2,
toIncl = false,
toFractional = isF2,
unit
)
case BETWEEN_INCLUSIVE =>
mkNotes(
prepToks,
d1,
fromIncl = true,
fromFractional = isF1,
to = d2,
toIncl = true,
toFractional = isF2,
unit
)
case _ => throw new AssertionError(s"Illegal note type: ${p._2}.")
}
processed ++= ts1
processed ++= ts2
}
}
}
// Special case - processing with words which were defined as stop words before.
// Example: symbol '!' for condition '! ='.
// Adds simple 'condition' notes.
for (num <- nums) {
def process(candidates: Seq[NCNlpSentenceToken]): Unit =
if (!processed.exists(num.tokens.contains)) {
val strBuf = toString(candidates)
val preps: Seq[(String, P)] = BEFORE_PREPS.filter(p => strBuf.endsWith(p._1)).toSeq.sortBy(-_._2.length)
if (preps.nonEmpty) {
val prep = preps.head._2
val toks = candidates.takeRight(prep.length) ++ num.tokens
processed ++= toks
val notes =
prep.prepositionType match {
case MORE =>
mkNotes(
toks,
num.value,
fromIncl = false,
fromFractional = num.isFractional,
to = MAX_VALUE,
toIncl = true,
toFractional = num.isFractional,
num.unitData
)
case MORE_OR_EQUAL =>
mkNotes(
toks,
num.value,
fromIncl = true,
fromFractional = num.isFractional,
to = MAX_VALUE,
toIncl = true,
toFractional = num.isFractional,
num.unitData
)
case LESS =>
mkNotes(
toks,
MIN_VALUE,
fromIncl = true,
fromFractional = num.isFractional,
to = num.value,
toIncl = false,
toFractional = num.isFractional,
num.unitData
)
case LESS_OR_EQUAL =>
mkNotes(
toks,
MIN_VALUE,
fromIncl = true,
fromFractional = num.isFractional,
to = num.value,
toIncl = true,
toFractional = num.isFractional,
num.unitData
)
case EQUAL =>
mkNotes(
toks,
num.value,
fromIncl = true,
fromFractional = num.isFractional,
to = num.value,
toIncl = true,
toFractional = num.isFractional,
num.unitData
)
case NOT_EQUAL =>
mkNotes(
toks,
num.value,
fromIncl = false,
fromFractional = num.isFractional,
to = num.value,
toIncl = false,
toFractional = num.isFractional,
num.unitData
)
case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.")
}
for (note <- notes)
toks.foreach(_.add(note))
}
}
val toks = ns.takeWhile(_ != num.tokens.head).toSeq
process(toks)
process(toks.filter(!_.isStopWord))
}
// Numeric without conditions.
for (num <- nums if !processed.exists(num.tokens.contains)) {
val notes = mkNotes(
num.tokens,
num.value,
fromIncl = true,
num.isFractional,
num.value,
toIncl = true,
num.isFractional,
num.unitData
)
processed ++= num.tokens
}
}
}
}