blob: 497f8aa1134dbceadd6721c376c2287456dbb857 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.common.makro
import org.apache.nlpcraft.common._
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
import scala.compat.Platform._
import scala.util.control.Exception._
/**
* Tests for text parser.
*/
class NCMacroParserSpec {
private val parser = NCMacroParser(
"<A>""aaa",
"<B>""<A> bbb",
"<C>""<A> bbb {z|w}"
)
// Add macros for testing...
parser.addMacro("<OF>", "{of|for|per}")
parser.addMacro("<QTY>", "{number|tally|count|quantity|amount}")
parser.addMacro("<NUM>", "{overall|total|grand total|entire|complete|full|_} <QTY>")
parser.addMacro("<WEBSITE>", "{html|_} {site|website|web site|web-site|web property}")
parser.addMacro("<BY>", "{segmented|grouped|combined|arranged|organized|categorized|_} {for|by|over|along|over by}")
parser.addMacro("<RATE>", "{rate|percentage|speed|momentum|frequency}")
parser.addMacro("<AVG>", "{avg|average} <QTY>")
parser.addMacro("<ID>", "{unique|_} {id|guid|identifier|identification} {number|_}")
parser.addMacro("<USER>", "{{<WEBSITE>}|web|_} {user|visitor}")
parser.addMacro("<SES>", "{{<WEBSITE>}|web|_} {session|visit}")
parser.addMacro("<DCM>", "{double click|double-click|doubleclick|dcm} {manager|_}")
parser.addMacro("<PAGE>", "{{<WEBSITE>}|_} {web page|web-page|webpage|page} {path|_}")
parser.addMacro("<CTR>", "{ctr|click-through-rate|{click through|click-through} <RATE>}")
parser.addMacro("<URL>", "{{uniform|_} resource {locator|identifier}|{{<PAGE>}|_} {link|_} {uri|url|address}}")
parser.addMacro("<METRICS_A>", "{analytics|statistics|measurements|analysis|report|efficiency|performance}")
parser.addMacro("<METRICS_B>", "{metrics|data|info|information|facts}")
parser.addMacro("<METRICS>","{<METRICS_A>|<METRICS_B>|<METRICS_A> <METRICS_B>|<METRICS_B> <METRICS_A>}")
private val ignoreNCE = ignoring(classOf[NCE])
/**
*
* @param txt Text to expand.
* @param exp Expected expansion strings.
*/
def checkEq(txt: String, exp: Seq[String]): Unit = {
val z = parser.expand(txt).sorted
val w = exp.sorted
if (z != w)
println(s"$z != $w")
assertTrue(z == w)
}
// @Test
def testPerformance() {
val start = currentTime
val N = 50000
for (_ ← 0 to N)
parser.expand("a {{{<C>}}} {c|d|e|f|g|h|j|k|l|n|m|p|r}")
val duration = currentTime - start
println(s"${N * 1000 / duration} expansions/sec.")
}
/**
*
* @param txt
*/
private def checkError(txt: String): Unit = {
try {
parser.expand(txt)
assert(false)
} catch {
case e: NCE ⇒
println(e.getMessage)
assert(true)
}
}
@Test
def testExpand() {
// Make sure we can parse these.
parser.expand("<OF>")
parser.expand("<QTY>")
parser.expand("<NUM>")
parser.expand("<WEBSITE>")
parser.expand("<BY>")
parser.expand("<RATE>")
parser.expand("<AVG>")
parser.expand("<ID>")
parser.expand("<USER>")
parser.expand("<SES>")
parser.expand("<DCM>")
parser.expand("<PAGE>")
parser.expand("<CTR>")
parser.expand("<URL>")
parser.expand("<METRICS_A>")
parser.expand("<METRICS_B>")
parser.expand("<METRICS>")
checkEq("<A> {b|_} c", Seq("aaa b c", "aaa c"))
checkEq("<B> {b|_} c", Seq("aaa bbb b c", "aaa bbb c"))
checkEq("{tl;dr|j/k}", Seq("tl;dr", "j/k"))
checkEq("a {b|_}. c", Seq("a b . c", "a . c"))
checkEq("""a {/abc.*/|\{\_\}} c""", Seq("a /abc.*/ c", "a {_} c"))
checkEq("""{`a`|\`a\`}""", Seq("`a`", """\`a\`"""))
checkEq("""a {/abc.\{\}*/|/d/} c""", Seq("a /abc.{}*/ c", "a /d/ c"))
checkEq("""a .{b\, |_}. c""", Seq("a . b, . c", "a . . c"))
checkEq("a {{b|c}|_}.", Seq("a .", "a b .", "a c ."))
checkEq("a {{{<C>}}|_} c", Seq("a aaa bbb z c", "a aaa bbb w c", "a c"))
checkEq("a {b|_}", Seq("a b", "a"))
checkEq("a {b|_}d", Seq("a b d", "a d"))
checkEq("a {b|_} d", Seq("a b d", "a d"))
checkEq("a {b|_} d", Seq("a b d", "a d"))
checkEq("a {b}", Seq("a b"))
checkEq("a {b} {c|_}", Seq("a b", "a b c"))
checkEq("a {{b|c}}", Seq("a b", "a c"))
checkEq("a {b|_|{g\\}}[1,2]}", Seq("a", "a b", "a g}", "a g} g}"))
checkEq("a {b|_|{//[]{}//}[1,2]}", Seq("a", "a b", "a //[]{}//", "a //[]{}// //[]{}//"))
checkEq("a {b|_|{//[]^^// ^^{_}^^}[1,2]}", Seq("a", "a b", "a //[]^^// ^^{_}^^", "a //[]^^// ^^{_}^^ //[]^^// ^^{_}^^"))
checkEq("//[a-zA-Z0-9]+//", Seq("//[a-zA-Z0-9]+//"))
checkEq("the ^^[internal](id == 'anyWord')^^", Seq("the ^^[internal](id == 'anyWord')^^"))
checkEq("{A}[0,1] ^^[internal](id == 'anyWord')^^", Seq("^^[internal](id == 'anyWord')^^", "A ^^[internal](id == 'anyWord')^^"))
checkEq("w1 ^^id == 'nlpcraft:num'^^ w2", Seq("w1 ^^id == 'nlpcraft:num'^^ w2"))
checkEq("before limit ^^[limitAlias](id == 'nlpcraft:limit')^^", Seq("before limit ^^[limitAlias](id == 'nlpcraft:limit')^^"))
checkEq("wrap ^^[wrapLimitAlias](id == 'wrapLimit')^^", Seq("wrap ^^[wrapLimitAlias](id == 'wrapLimit')^^"))
checkError("a {| b")
checkError("{a}}")
}
@Test
def testLimit() {
checkError("<METRICS> <USER> <BY> <WEBSITE> <BY> <SES> <BY> <METRICS> <BY> <USER> <BY> <METRICS>")
}
}