WIP.
diff --git a/nlpcraft/src/main/resources/date/full.txt.gz b/nlpcraft/src/main/resources/date/full.txt.gz
index 37216a0..15d7718 100644
--- a/nlpcraft/src/main/resources/date/full.txt.gz
+++ b/nlpcraft/src/main/resources/date/full.txt.gz
Binary files differ
diff --git a/nlpcraft/src/main/resources/date/parts.txt.gz b/nlpcraft/src/main/resources/date/parts.txt.gz
index 4b7008a..9c90460 100644
--- a/nlpcraft/src/main/resources/date/parts.txt.gz
+++ b/nlpcraft/src/main/resources/date/parts.txt.gz
Binary files differ
diff --git a/nlpcraft/src/main/resources/date/parts_dmy.txt.gz b/nlpcraft/src/main/resources/date/parts_dmy.txt.gz
index 42d35d7..58b2792 100644
--- a/nlpcraft/src/main/resources/date/parts_dmy.txt.gz
+++ b/nlpcraft/src/main/resources/date/parts_dmy.txt.gz
Binary files differ
diff --git a/nlpcraft/src/main/resources/date/parts_mdy.txt.gz b/nlpcraft/src/main/resources/date/parts_mdy.txt.gz
index faeac93..834719e 100644
--- a/nlpcraft/src/main/resources/date/parts_mdy.txt.gz
+++ b/nlpcraft/src/main/resources/date/parts_mdy.txt.gz
Binary files differ
diff --git a/nlpcraft/src/main/resources/date/parts_ymd.txt.gz b/nlpcraft/src/main/resources/date/parts_ymd.txt.gz
index 22f38bf..da5d4ba 100644
--- a/nlpcraft/src/main/resources/date/parts_ymd.txt.gz
+++ b/nlpcraft/src/main/resources/date/parts_ymd.txt.gz
Binary files differ
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index d8c1900..d2c2b03 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -552,7 +552,7 @@
* @return
*/
private def readLcTrimFilter(in: BufferedSource): List[String] =
- in.getLines().map(_.toLowerCase.strip).filter(s => s.nonEmpty && !s.startsWith("#")).toList
+ in.getLines().map(_.toLowerCase.strip).filter(s => s.nonEmpty && s.head!= '#').toList
/**
* Reads lines from given file converting to lower case, trimming, and filtering
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index a4e8e11..9d7a549 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -27,12 +27,9 @@
import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateFormatType._
import java.util
-import java.util.{Calendar => C}
-import java.util.{List => JList}
-
+import java.util.{Calendar => C, List => JList}
import scala.collection.immutable.Iterable
import scala.collection.mutable
-import scala.collection.mutable.{LinkedHashMap => LHM}
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.ListHasAsScala
@@ -40,8 +37,6 @@
* Date enricher.
*/
object NCDateEnricher extends NCServerEnricher {
- private type LHM_SS = LHM[String, String]
-
private object Config extends NCConfigurable {
def style: NCDateFormatType = getObject("nlpcraft.server.datesFormatStyle", NCDateFormatType.withName)
}
@@ -55,8 +50,8 @@
private[date] val prepsBtwIncl = mkBetweenPrepositions(BETWEEN_INCLUSIVE)
private[date] val prepsBtwExcl = mkBetweenPrepositions(BETWEEN_EXCLUSIVE)
- @volatile private var cacheFull: LHM_SS = _
- @volatile private var cacheParts: LHM_SS = _
+ private val cacheFull = new util.HashMap[String, String]()
+ private val cacheParts = new util.HashMap[String, String]()
// Preposition data holder.
case class P(text: String) {
@@ -121,6 +116,10 @@
*/
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
ackStopping()
+
+ cacheFull.clear()
+ cacheParts.clear()
+
ackStopped()
}
@@ -132,18 +131,25 @@
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { span =>
ackStarting()
- def read(res: String): LHM_SS = {
+ val sep = '|'.asInstanceOf[Int]
+
+ def read(dest: util.Map[String, String], res: String): Unit =
+ U.readTextGzipResource(res, "UTF-8", logger).foreach(p => {
+ val idx = p.indexOf(sep)
+
+ // Data already trimmed.
+ dest.put(p.take(idx), p.drop(idx + 1))
+ })
+
+ def readCommon(dest: util.Map[String, String], res: String): Unit = {
startScopedSpan("read", span, "res" -> res) { _ =>
- val m: LHM_SS = new LHM_SS()
-
- val map = U.readTextGzipResource(res, "UTF-8", logger).map(p => {
- val idx = p.indexOf("|")
- p.take(idx).strip -> p.drop(idx + 1).trim
- })
-
- m ++= map
-
- m
+ val m = new util.HashMap[String, String]()
+
+ read(m, res)
+
+ dest.synchronized {
+ dest.putAll(m)
+ }
}
}
@@ -155,17 +161,12 @@
case _ => throw new AssertionError(s"Unexpected format type: ${Config.style}")
}
- var p1: LHM_SS = null
- var p2: LHM_SS = null
-
U.executeParallel(
- () => cacheFull = read("date/full.txt.gz"),
- () => p1 = read("date/parts.txt.gz"),
- () => p2 = read(s"date/$file")
+ () => read(cacheFull, "date/full.txt.gz"),
+ () => readCommon(cacheParts, "date/parts.txt.gz"),
+ () => readCommon(cacheParts, s"date/$file")
)
- cacheParts = p1 ++ p2
-
ackStarted()
}
@@ -342,12 +343,13 @@
}
cacheFull.get(s) match {
- case Some(body) => add(body, isFull = true)
- case None =>
+ case null =>
cacheParts.get(s) match {
- case Some(body) => add(body, isFull = false)
- case None => // No-op.
+ case null => // No-op.
+ case body => add(body, isFull = false)
}
+
+ case body => add(body, isFull = true)
}
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala
index 646267c..6fbbff7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala
@@ -17,17 +17,16 @@
package org.apache.nlpcraft.server.nlp.enrichers.date.tools
-import java.text.{DateFormat, SimpleDateFormat}
-import java.util.{Date, Locale, Calendar => C}
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.numeric.NCNumericGenerator
import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateConstants._
import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateFormatType._
+import org.apache.nlpcraft.server.nlp.enrichers.date.tools.NCDateGenerator._
-import scala.collection.mutable.{LinkedHashMap => LHM}
-import NCDateGenerator._
-
+import java.text.{DateFormat, SimpleDateFormat}
+import java.util.{Date, Locale, Calendar => C}
import scala.collection.mutable
+import scala.collection.mutable.{LinkedHashMap => LHM}
/**
* Pre-built date ranges generator.
@@ -106,6 +105,7 @@
private val NUM_MONTH_MAP = zipIndexes(CAL_MONTHS)
private val MMMM_MONTH_SEQ = CAL_MONTHS.map(month)
private val YEARS_SEQ = for (i <- 1900 to C.getInstance().get(C.YEAR) + 5) yield i
+ private val YEARS_SEQ_EXT = for (i <- 1500 to C.getInstance().get(C.YEAR) + 5) yield i
private val MMMM_MONTH_MAP = zipIndexes(MMMM_MONTH_SEQ)
// USA week.
@@ -403,7 +403,7 @@
}
private[date] def years(df: LHM_SS): Unit =
- for (y <- YEARS_SEQ)
+ for (y <- YEARS_SEQ_EXT)
mkYears(y).foreach(s => df += s"$s" -> s"${y}y")
private[date] def months(df: LHM_SS, fmts: Seq[SimpleDateFormat]): Unit = {
@@ -463,10 +463,10 @@
}
// Between.
- for ((from, to) <- BETWEEN_INCLUSIVE; y1 <- YEARS_SEQ; y2 <- YEARS_SEQ if y2 > y1)
+ for ((from, to) <- BETWEEN_INCLUSIVE; y1 <- YEARS_SEQ_EXT; y2 <- YEARS_SEQ_EXT if y2 > y1)
addRange(from, to, y1, y2, s"${y1}y:${y2}y")
- for ((from, to) <- BETWEEN_EXCLUSIVE; y1 <- YEARS_SEQ; y2 <- YEARS_SEQ if y2 > y1)
+ for ((from, to) <- BETWEEN_EXCLUSIVE; y1 <- YEARS_SEQ_EXT; y2 <- YEARS_SEQ_EXT if y2 > y1)
addRange(from, to, y1, y2, s"${y1}y:${y2-1}y")
def add(word: String, y: Int, templ: String): Unit = {
@@ -478,10 +478,10 @@
}
// From.
- for (f <- FROM; y <- YEARS_SEQ) add(f, y, toNow(s"${y}y"))
+ for (f <- FROM; y <- YEARS_SEQ_EXT) add(f, y, toNow(s"${y}y"))
// Till.
- for (t <- TO; y <- YEARS_SEQ) add(t, y, to(s"${y}y"))
+ for (t <- TO; y <- YEARS_SEQ_EXT) add(t, y, to(s"${y}y"))
}
private[date] def simpleQuarters(df: LHM_SS): Unit = {
@@ -856,7 +856,7 @@
object DLDateGeneratorRunner extends App {
private def mkPath(path: String): String = U.mkPath(s"nlpcraft/src/main/resources/date/$path")
- private def convert(entry: (String, String)): String = s"${entry._1} | ${entry._2}"
+ private def convert(entry: (String, String)): String = s"${entry._1.strip}|${entry._2.strip}"
private def process(): Unit = {
val fileFull = mkPath("full.txt")
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala
index 429b24c..30f9a65 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala
@@ -38,7 +38,8 @@
"1900 year",
"from 1900 year",
"between 1900 and 1905",
- "between 1900 and 1905 years"
+ "between 1501 and 1905 years",
+ "after 1501 year"
).map(txt => {
val f: Unit => Unit = _ => checkExists(txt, dte(text = txt))