blob: 106f6735e120ea99830537b4d4ae84c0b1a4d186 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.model.impl
import java.io.Serializable
import java.util.Collections
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
import scala.collection.JavaConverters._
import scala.collection.{Seq, mutable}
/**
*
* @param srvReqId Server request ID.
* @param id
* @param grps
* @param parentId
* @param value
* @param meta
*/
private[nlpcraft] class NCTokenImpl(
mdl: NCModelView,
srvReqId: String,
id: String,
grps: Seq[String],
parentId: String,
ancestors: Seq[String],
value: String,
startCharIndex: Int,
endCharIndex: Int,
meta: Map[String, Object]
) extends NCToken with Serializable {
require(mdl != null)
require(srvReqId != null)
require(id != null)
require(grps != null)
require(ancestors != null)
require(meta != null)
private final val hash =
Seq(srvReqId, id, startCharIndex, endCharIndex).map(_.hashCode()).foldLeft(0)((a, b)31 * a + b)
private var parts = Seq.empty[NCToken]
override lazy val getModel: NCModelView = mdl
override lazy val getMetadata: java.util.Map[String, Object] = mutable.HashMap(meta.toSeq:_ *).asJava // We need mutable metadata.
override lazy val getServerRequestId: String = srvReqId
override lazy val getId: String = id
override lazy val getGroups: java.util.List[String] = grps.asJava
override lazy val getParentId: String = parentId
override lazy val getAncestors: java.util.List[String] = ancestors.asJava
override lazy val getValue: String = value
override lazy val getStartCharIndex: Int = startCharIndex
override lazy val getEndCharIndex: Int = endCharIndex
override lazy val getAliases: java.util.List[String] = meta(TOK_META_ALIASES_KEY, Collections.emptyList())
override def getPartTokens: java.util.List[NCToken] = parts.asJava
def setParts(parts: Seq[NCToken]): Unit = this.parts = parts
override def equals(other: Any): Boolean = other match {
case t: NCTokenImpl
getServerRequestId == t.getServerRequestId &&
getId == t.getId &&
getStartCharIndex == t.getStartCharIndex &&
getEndCharIndex == t.getEndCharIndex
case _ ⇒ false
}
override def hashCode(): Int = hash
override def toString: String =
s"Token [" +
s"id=$id, " +
s"text=${meta[String]("nlpcraft:nlp:normtext")}, " +
s"groups=${if (grps == null) null else grps.mkString(", ")}, " +
s"parentId=$parentId, " +
s"value=$value" +
s"]"
}
private[nlpcraft] object NCTokenImpl {
def apply(mdl: NCProbeModel, srvReqId: String, tok: NCNlpSentenceToken): NCTokenImpl = {
// nlpcraft:nlp and some optional (after collapsing).
require(tok.size <= 2, s"Unexpected token [size=${tok.size}, token=$tok]")
val md = mutable.HashMap.empty[String, java.io.Serializable]
tok.foreach(n ⇒ {
val id = n.noteType.toLowerCase
n.asMetadata().foreach { case (k, v) ⇒ md += s"$id:$k" → v}
})
val usrNotes = tok.filter(_.isUser)
// No overlapping allowed at this point.
require(usrNotes.size <= 1, s"Unexpected elements notes: $usrNotes")
def convertMeta(): Map[String, AnyRef] = md.toMap.map(p ⇒ p._1 → p._2.asInstanceOf[AnyRef])
usrNotes.headOption match {
case Some(usrNote)
require(mdl.elements.contains(usrNote.noteType), s"Element is not found: ${usrNote.noteType}")
val elm = mdl.elements(usrNote.noteType)
val ancestors = mutable.ArrayBuffer.empty[String]
var prntId = elm.getParentId
while (prntId != null) {
ancestors += prntId
prntId = mdl.
elements.
getOrElse(prntId, throw new AssertionError(s"Element not found: $prntId")).
getParentId
}
// Special synthetic meta data element.
md.put("nlpcraft:nlp:freeword", false)
elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k, v.asInstanceOf[java.io.Serializable]) }
new NCTokenImpl(
mdl.model,
srvReqId = srvReqId,
id = elm.getId,
grps = elm.getGroups.asScala,
parentId = elm.getParentId,
ancestors = ancestors,
value = usrNote.dataOpt("value").orNull,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
meta = convertMeta()
)
case None
require(tok.size <= 2)
val note = tok.toSeq.minBy(n ⇒ if (n.isNlp) 1 else 0)
val isStop: Boolean = md("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
// Special synthetic meta data element.
md.put("nlpcraft:nlp:freeword", !isStop && note.isNlp)
new NCTokenImpl(
mdl.model,
srvReqId = srvReqId,
id = note.noteType, // Use NLP note type as synthetic element ID.
grps = Seq(note.noteType), // Use NLP note type as synthetic element group.
parentId = null,
ancestors = Seq.empty,
value = null,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
meta = convertMeta()
)
}
}
}