blob: b4d2f71dd630c54a6c4fc96cbbfd1aca69abc8a6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
import org.apache.nlpcraft.model.NCToken
import scala.util.Using
import java.util.{List => JList}
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import java.nio.charset.StandardCharsets.UTF_8
import java.util
import java.util.{Base64, Optional}
import scala.compat.java8.OptionConverters._
import scala.jdk.CollectionConverters.ListHasAsScala
/**
* Tests infrastructure beans.
*/
sealed trait NCTestToken {
def id: String
def text: String // Case-sensitive
def isStop: Boolean = false
}
// Simplified set of tokens data. Added only fields for validation.
// Server enrichers.
case class NCTestNlpToken(text: String, override val isStop: Boolean = false) extends NCTestToken {
require(text != null)
override def id: String = "nlpcraft:nlp"
override def toString: String = s"$text(nlp)<isStop=$isStop>"
}
// Skip non-deterministic properties verification.
case class NCTestDateToken(text: String) extends NCTestToken {
require(text != null)
override def id: String = "nlpcraft:date"
override def toString: String = s"$text(date)"
}
case class NCTestCoordinateToken(text: String, latitude: Double, longitude: Double) extends NCTestToken {
require(text != null)
override def id: String = "nlpcraft:coordinate"
override def toString: String = s"$text(coordinate)<lon=$longitude, lat=$longitude>"
}
case class NCTestNumericToken(text: String, from: Double, to: Double, unit: Option[String] = None) extends NCTestToken {
require(text != null)
override def id: String = "nlpcraft:num"
override def toString: String = {
var s = s"$text(num)<from=$from, to=$to>"
unit match {
case Some(u) => s = s"$s($u)"
case None => // No-op.
}
s
}
}
case class NCTestCityToken(text: String, city: String) extends NCTestToken {
require(text != null)
require(city != null)
override def id: String = "nlpcraft:city"
override def toString: String = s"$text(city)[city=$city]"
}
case class NCTestCountryToken(text: String, country: String) extends NCTestToken {
require(text != null)
require(country != null)
override def id: String = "nlpcraft:country"
override def toString: String = s"$text(country)<country=$country>"
}
case class NCTestRegionToken(text: String, region: String) extends NCTestToken {
require(text != null)
require(region != null)
override def id: String = "nlpcraft:region"
override def toString: String = s"$text(region)<region=$region>"
}
case class NCTestContinentToken(text: String, continent: String) extends NCTestToken {
require(text != null)
require(continent != null)
override def id: String = "nlpcraft:continent"
override def toString: String = s"$text(continent)<continent=$continent>"
}
case class NCTestSubcontinentToken(text: String, subcontinent: String) extends NCTestToken {
require(text != null)
require(subcontinent != null)
override def id: String = "nlpcraft:subcontinent"
override def toString: String = s"$text(subcontinent)<subcontinent=$subcontinent>"
}
case class NCTestMetroToken(text: String, metro: String) extends NCTestToken {
require(text != null)
require(metro != null)
override def id: String = "nlpcraft:metro"
override def toString: String = s"$text(metro)<metro=$metro>"
}
// Probe enrichers.
case class NCTestSortToken(
text: String,
subjNotes: Seq[String] = Seq.empty,
subjIndexes: Seq[Int] = Seq.empty,
byNotes: Seq[String] = Seq.empty,
byIndexes: Seq[Int] = Seq.empty,
asc: Option[Boolean] = None
) extends NCTestToken {
require(text != null)
require(subjNotes != null)
require(subjIndexes != null)
require(byNotes != null)
require(byIndexes != null)
require(asc != null)
require(subjNotes.nonEmpty || byNotes.nonEmpty)
require(subjIndexes.nonEmpty || byIndexes.nonEmpty)
require(subjNotes.isEmpty && subjIndexes.isEmpty || subjNotes.nonEmpty && subjIndexes.nonEmpty)
require(byNotes.isEmpty && byIndexes.isEmpty || byNotes.nonEmpty && byIndexes.nonEmpty)
override def id: String = "nlpcraft:sort"
override def toString: String = {
var s = s"$text(sort)<"
if (subjNotes.nonEmpty)
s = s"${s}subjNotes=[${subjNotes.mkString(",")}], subjIndexes=[${subjIndexes.mkString(",")}]"
if (byNotes.nonEmpty) {
val sBy = s"byNotes=[${byNotes.mkString(",")}], byIndexes=[${byIndexes.mkString(",")}]"
s = if (subjNotes.nonEmpty) s"$s, $sBy" else s"$s$sBy"
}
if (asc.isDefined)
s = s"$s, asc=${asc.get}"
s = s"$s>"
s
}
}
object NCTestSortTokenType extends Enumeration {
type NCTestSortTokenType = Value
val SUBJ_ONLY, BY_ONLY = Value
}
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.NCTestSortTokenType._
object NCTestSortToken {
def apply(
text: String,
subjNotes: Seq[String],
subjIndexes: Seq[Int],
byNotes: Seq[String],
byIndexes: Seq[Int],
asc: Boolean
): NCTestSortToken = new NCTestSortToken(text, subjNotes, subjIndexes, byNotes, byIndexes, Some(asc))
def apply(
text: String,
subjNote: String,
subjIndex: Int,
byNote: String,
byIndex: Int
): NCTestSortToken = new NCTestSortToken(text, Seq(subjNote), Seq(subjIndex), Seq(byNote), Seq(byIndex), None)
def apply(
text: String,
typ: NCTestSortTokenType,
note: String,
index: Int
): NCTestSortToken =
typ match {
case SUBJ_ONLY => new NCTestSortToken(text, subjNotes = Seq(note), subjIndexes = Seq(index), asc = None)
case BY_ONLY => new NCTestSortToken(text, byNotes = Seq(note), byIndexes = Seq(index), asc = None)
case _ => throw new AssertionError(s"Unexpected type: $typ")
}
def apply(
text: String,
typ: NCTestSortTokenType,
note: String,
index: Int,
asc: Boolean
): NCTestSortToken =
typ match {
case SUBJ_ONLY => new NCTestSortToken(text, subjNotes = Seq(note), subjIndexes = Seq(index), asc = Some(asc))
case BY_ONLY => new NCTestSortToken(text, byNotes = Seq(note), byIndexes = Seq(index), asc = Some(asc))
case _ => throw new AssertionError(s"Unexpected type: $typ")
}
def apply(
text: String,
subjNote: String,
subjIndex: Int,
byNote: String,
byIndex: Int,
asc: Boolean
): NCTestSortToken = new NCTestSortToken(text, Seq(subjNote), Seq(subjIndex), Seq(byNote), Seq(byIndex), Some(asc))
}
case class NCTestRelationToken(text: String, `type`: String, indexes: Seq[Int], note: String) extends NCTestToken {
require(text != null)
require(`type` != null)
require(indexes != null)
require(indexes.nonEmpty)
require(note != null)
override def id: String = "nlpcraft:relation"
override def toString: String =
s"$text(relation)" +
s"<type=${`type`}" +
s", indexes=[${indexes.mkString(",")}]" +
s", note=$note>"
}
case class NCTestLimitToken(
text: String,
limit: Double,
indexes: Seq[Int],
note: String,
asc: Option[Boolean]
) extends NCTestToken {
require(text != null)
require(indexes != null)
require(indexes.nonEmpty)
require(note != null)
require(asc != null)
override def id: String = "nlpcraft:limit"
override def toString: String = {
var s = s"$text(limit)" +
s"<limit=$limit" +
s", indexes=[${indexes.mkString(",")}]" +
s", note=$note"
if (asc.isDefined)
s = s"$s, asc=${asc.get}"
s = s"$s>"
s
}
}
object NCTestLimitToken {
def apply(text: String, limit: Double, indexes: Seq[Int], note: String, asc: Boolean): NCTestLimitToken =
new NCTestLimitToken(text, limit, indexes, note, Some(asc))
def apply(text: String, limit: Double, indexes: Seq[Int], note: String): NCTestLimitToken =
new NCTestLimitToken(text, limit, indexes, note, None)
def apply(text: String, limit: Double, index: Int, note: String, asc: Boolean): NCTestLimitToken =
new NCTestLimitToken(text, limit, Seq(index), note, Some(asc))
def apply(text: String, limit: Double, index: Int, note: String): NCTestLimitToken =
new NCTestLimitToken(text, limit, Seq(index), note, None)
}
case class NCTestUserToken(text: String, id: String) extends NCTestToken {
require(text != null)
require(id != null)
override def toString: String = s"$text(user)<id=$id>"
}
// Token and sentence beans and utilities.
object NCTestToken {
def apply(t: NCToken): NCTestToken = {
val txt = t.getOriginalText
val id = t.getId
id match {
case "nlpcraft:nlp" => NCTestNlpToken(txt, t.isStopWord)
case "nlpcraft:coordinate" =>
NCTestCoordinateToken(
txt,
latitude = t.meta("nlpcraft:coordinate:latitude"),
longitude = t.meta("nlpcraft:coordinate:longitude")
)
case "nlpcraft:num" =>
val unit: Optional[String] = t.metaOpt("nlpcraft:num:unit")
NCTestNumericToken(
txt,
from = t.meta("nlpcraft:num:from"),
to = t.meta("nlpcraft:num:to"),
unit = unit.asScala
)
case "nlpcraft:date" => NCTestDateToken(txt)
case "nlpcraft:city" => NCTestCityToken(txt, city = t.meta("nlpcraft:city:city"))
case "nlpcraft:region" => NCTestRegionToken(txt, region = t.meta("nlpcraft:region:region"))
case "nlpcraft:country" => NCTestCountryToken(txt, country = t.meta("nlpcraft:country:country"))
case "nlpcraft:subcontinent" => NCTestSubcontinentToken(txt, subcontinent = t.meta("nlpcraft:subcontinent:subcontinent"))
case "nlpcraft:continent" => NCTestContinentToken(txt, continent = t.meta("nlpcraft:continent:continent"))
case "nlpcraft:metro" => NCTestMetroToken(txt, metro = t.meta("nlpcraft:metro:metro"))
case "nlpcraft:sort" =>
val subjNotes: Optional[JList[String]] = t.metaOpt("nlpcraft:sort:subjnotes")
val subjIndexes: Optional[JList[Int]] = t.metaOpt("nlpcraft:sort:subjindexes")
val byNotes: Optional[JList[String]] = t.metaOpt("nlpcraft:sort:bynotes")
val byIndexes: Optional[JList[Int]] = t.metaOpt("nlpcraft:sort:byindexes")
val asc: Optional[Boolean] = t.metaOpt("nlpcraft:sort:asc")
def get[T](opt: Optional[util.List[T]]) =
opt.asScala match {
case Some(list) => list.asScala.toSeq
case None => Seq.empty
}
NCTestSortToken(txt, get(subjNotes), get(subjIndexes), get(byNotes), get(byIndexes), asc.asScala)
case "nlpcraft:relation" =>
val indexes: JList[Int] = t.meta("nlpcraft:relation:indexes")
NCTestRelationToken(
txt,
`type` = t.meta("nlpcraft:relation:type"),
indexes = indexes.asScala.toSeq,
note = t.meta("nlpcraft:relation:note")
)
case "nlpcraft:limit" =>
val indexes: JList[Int] = t.meta("nlpcraft:limit:indexes")
val asc: Optional[Boolean] = t.metaOpt("nlpcraft:limit:asc")
NCTestLimitToken(
txt,
limit = t.meta("nlpcraft:limit:limit"),
indexes = indexes.asScala.toSeq,
note = t.meta("nlpcraft:limit:note"),
asc.asScala
)
case _ =>
if (t.isUserDefined)
NCTestUserToken(txt, id)
else
throw new AssertionError(s"Unsupported token: $id")
}
}
def apply(text: String, isStop: Boolean): NCTestToken = NCTestNlpToken(text, isStop)
}
case class NCTestSentence(tokens: Seq[NCTestToken]) {
override def toString = s"Sentence: ${tokens.mkString("|")}"
}
object NCTestSentence {
def serialize(sens: Iterable[NCTestSentence]): String =
Using.resource(new ByteArrayOutputStream()) { bos =>
Using.resource(new ObjectOutputStream(bos)) { os =>
os.writeObject(sens)
os.flush()
new String(Base64.getEncoder.encode(bos.toByteArray), UTF_8)
}
}
def deserialize(s: String): Iterable[NCTestSentence] =
Using.resource(new ObjectInputStream(
new ByteArrayInputStream(Base64.getDecoder.decode(s.getBytes(UTF_8))))
) { is =>
is.readObject.asInstanceOf[Iterable[NCTestSentence]]
}
}