blob: 53719cd01933cfd3cf0dbc9c3c7b9f31ab7a6aa4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.server.nlp.core.stanford
import edu.stanford.nlp.ling.CoreAnnotations.NormalizedNamedEntityTagAnnotation
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote}
import org.apache.nlpcraft.server.ignite.NCIgniteInstance
import org.apache.nlpcraft.server.nlp.core.NCNlpNerEnricher
import scala.collection.JavaConverters._
/**
* Stanford NLP NER enricher.
*/
object NCStanfordNerEnricher extends NCService with NCNlpNerEnricher with NCIgniteInstance {
/**
*
* @param parent Optional parent span.
* @return
*/
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { span ⇒
// Should be started even if another NLP engine configured.
if (!NCStanfordCoreManager.isStarted)
NCStanfordCoreManager.start(span)
ackStart()
}
/**
*
* @param parent Optional parent span.
*/
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { span ⇒
if (NCStanfordCoreManager.isStarted)
NCStanfordCoreManager.stop(span)
ackStop()
}
/**
*
* @param ns
* @param ebiTokens Set of enabled built-in token IDs.
*/
override def enrich(ns: NCNlpSentence, ebiTokens: Set[String], parent: Span = null): Unit =
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
NCStanfordCoreManager.
annotate(ns.text).
entityMentions().asScala.
filter(e ⇒ ebiTokens.contains(e.entityType().toLowerCase)).
foreach(e ⇒ {
val offsets = e.charOffsets()
val t1 = ns.find(_.startCharIndex == offsets.first)
val t2 = ns.find(_.endCharIndex == offsets.second)
if (t1.nonEmpty && t2.nonEmpty) {
val buf = collection.mutable.ArrayBuffer.empty[(String, Any)]
val nne = e.coreMap().get(classOf[NormalizedNamedEntityTagAnnotation])
if (nne != null)
buf += "nne" → nne
val conf = e.entityTypeConfidences()
// Key ignored because it can be category with higher level (`location` for type `country`)
if (conf.size() == 1)
buf += "confidence" → conf.asScala.head._2
val typ = e.entityType().toLowerCase
val i1 = t1.get.startCharIndex
val i2 = t2.get.endCharIndex
val toks = ns.filter(t ⇒ t.startCharIndex >= i1 && t.endCharIndex <= i2)
val note = NCNlpSentenceNote(toks.map(_.index), s"stanford:$typ", buf: _*)
toks.foreach(_.add(note))
}
})
}
}