blob: 1a77bf43d1b91f7d237a17fdac78f218e11bbe39 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nlpcraft.common.makro
import org.apache.nlpcraft.common._
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
import scala.compat.Platform._
import scala.util.control.Exception._
/**
* Tests for text parser.
*/
class NCMacroParserSpec {
private val parser = NCMacroParser(
"<A>""aaa",
"<B>""<A> bbb",
"<C>""<A> bbb {z|w}"
)
// Add macros for testing...
parser.addMacro("<OF>", "{of|for|per}")
parser.addMacro("<QTY>", "{number|tally|count|quantity|amount}")
parser.addMacro("<NUM>", "{overall|total|grand total|entire|complete|full|*} <QTY>")
parser.addMacro("<WEBSITE>", "{html|*} {site|website|web site|web-site|web property}")
parser.addMacro("<BY>", "{segmented|grouped|combined|arranged|organized|categorized|*} {for|by|over|along|over by}")
parser.addMacro("<RATE>", "{rate|percentage|speed|momentum|frequency}")
parser.addMacro("<AVG>", "{avg|average} <QTY>")
parser.addMacro("<ID>", "{unique|*} {id|guid|identifier|identification} {number|*}")
parser.addMacro("<USER>", "{{<WEBSITE>}|web|*} {user|visitor}")
parser.addMacro("<SES>", "{{<WEBSITE>}|web|*} {session|visit}")
parser.addMacro("<DCM>", "{double click|double-click|doubleclick|dcm} {manager|*}")
parser.addMacro("<PAGE>", "{{<WEBSITE>}|*} {web page|web-page|webpage|page} {path|*}")
parser.addMacro("<CTR>", "{ctr|click-through-rate|{click through|click-through} <RATE>}")
parser.addMacro("<URL>", "{{uniform|*} resource {locator|identifier}|{{<PAGE>}|*} {link|*} {uri|url|address}}")
parser.addMacro("<METRICS_A>", "{analytics|statistics|measurements|analysis|report|efficiency|performance}")
parser.addMacro("<METRICS_B>", "{metrics|data|info|information|facts}")
parser.addMacro("<METRICS>","{<METRICS_A>|<METRICS_B>|<METRICS_A> <METRICS_B>|<METRICS_B> <METRICS_A>}")
private val ignoreNCE = ignoring(classOf[NCE])
/**
*
* @param txt Text to find next token in.
* @param tokHead Expected head value of the token.
* @param tokTail Expected tail value of the token.
*/
def testToken(txt: String, tokHead: String, tokTail: String): Unit = {
val tok = parser.nextToken(txt)
assertTrue(tok.get.head == tokHead)
assertTrue(tok.get.tail == tokTail)
}
/**
*
* @param txt Text to expand.
* @param exp Expected expansion strings.
*/
def testParser(txt: String, exp: Seq[String]): Unit =
assertTrue(parser.expand(txt).sorted == exp.sorted)
/**
*
* @param txt Group text.
* @param grps Sequence of group's elements.
*/
def testGroup(txt: String, grps: Seq[String]): Unit = {
val elms = parser.parseGroup(txt)
assertTrue(grps == elms.map(_.head))
}
@Test
def testPerformance() {
val start = currentTime
val N = 50000
for (_ ← 0 to N)
parser.expand("a {{{<C>}}|{*}} {c|d|e|f|g|h|j|k|l|n|m|p|r}")
val duration = currentTime - start
println(s"${N * 1000 / duration} ops/second.")
}
@Test
def testExpand() {
// Make sure we can parse these.
parser.expand("<OF>")
parser.expand("<QTY>")
parser.expand("<NUM>")
parser.expand("<WEBSITE>")
parser.expand("<BY>")
parser.expand("<RATE>")
parser.expand("<AVG>")
parser.expand("<ID>")
parser.expand("<USER>")
parser.expand("<SES>")
parser.expand("<DCM>")
parser.expand("<PAGE>")
parser.expand("<CTR>")
parser.expand("<URL>")
parser.expand("<METRICS_A>")
parser.expand("<METRICS_B>")
parser.expand("<METRICS>")
testParser("<A> {b|*} c", Seq(
"aaa b c",
"aaa c"
))
testParser("<B> {b|*} c", Seq(
"aaa bbb b c",
"aaa bbb c"
))
testParser("{tl;dr|j/k}", Seq(
"tl;dr",
"j/k"
))
testParser("a {b|*}. c", Seq(
"a b. c",
"a . c"
))
testParser("""a {/abc.\*/|\{\*\}} c""", Seq(
"a /abc.*/ c",
"a {*} c"
))
testParser("""{`a`|\`a\`}""", Seq(
"`a`",
"\\`a\\`"
))
testParser("""a {/abc.\{\}\*/|/d/} c""", Seq(
"a /abc.{}*/ c",
"a /d/ c"
))
testParser("a .{b, |*}. c", Seq(
"a .b, . c",
"a .. c"
))
testParser("a {{b|c}|*}.", Seq(
"a .",
"a b.",
"a c."
))
testParser("a {{{<C>}}|{*}} c", Seq(
"a aaa bbb z c",
"a aaa bbb w c",
"a c"
))
testParser("a {b|*}", Seq(
"a b",
"a"
))
testParser("a {b|*}d", Seq(
"a bd",
"a d"
))
testParser("a {b|*} d", Seq(
"a b d",
"a d"
))
testParser("a {b|*} d", Seq(
"a b d",
"a d"
))
testParser("{{{a}}} {b||*|{{*}}||*}", Seq(
"a b",
"a"
))
testParser("a {b}", Seq(
"a b"
))
testParser("a {b} {c|*}", Seq(
"a b",
"a b c"
))
testParser("a {{b|c}}", Seq(
"a b",
"a c"
))
ignoreNCE { testParser("a | b", Seq.empty); assertTrue(false) }
ignoreNCE { testParser("a *", Seq.empty); assertTrue(false) }
ignoreNCE { testParser("a}}", Seq.empty); assertTrue(false) }
ignoreNCE { testParser("a {a|b} *", Seq.empty); assertTrue(false) }
}
@Test
def testOptionGroup() {
testGroup("{a {b|c} | d}", Seq("a {b|c} ", " d"))
testGroup("{a|b}", Seq("a", "b"))
testGroup("{a}", Seq("a"))
testGroup("{{{a}}}", Seq("{{a}}"))
testGroup("{{{a}}|{b}}", Seq("{{a}}", "{b}"))
testGroup("{a {c}|b|*}", Seq("a {c}", "b", "*"))
testGroup("""{/abc.\*/|\{\*\}}""", Seq("/abc.\\*/", "\\{\\*\\}"))
ignoreNCE { parser.parseGroup("a"); assertTrue(false) }
ignoreNCE { parser.parseGroup("{a"); assertTrue(false) }
ignoreNCE { parser.parseGroup("a}"); assertTrue(false) }
}
@Test
def testParseTokens() {
testToken("""a \* b""", """a \* b""", "")
testToken("""a \\\* b""", """a \\\* b""", "")
testToken("""a \{\*\*\*\} b""", """a \{\*\*\*\} b""", "")
testToken("""a{b\|\*\}|c}""", "a", """{b\|\*\}|c}""")
testToken("""/\|\*\{\}/ a {bc|d}""", """/\|\*\{\}/ a """, """{bc|d}""")
testToken("{a} b", "{a}", " b")
testToken("{a|{c|d}}", "{a|{c|d}}", "")
testToken("{a {c|d} xxx {f|g}} b", "{a {c|d} xxx {f|g}}", " b")
testToken("c{a} b", "c", "{a} b")
testToken("{{{a}}}", "{{{a}}}", "")
ignoreNCE { parser.nextToken("a } b"); assertTrue(false) }
ignoreNCE { parser.nextToken("{c b"); assertTrue(false) }
ignoreNCE { parser.nextToken("a | b"); assertTrue(false) }
ignoreNCE { parser.nextToken("a |*"); assertTrue(false) }
assertTrue(parser.nextToken("").isEmpty)
assertTrue(parser.nextToken(" ").isDefined)
}
@Test
def testLimit() {
ignoreNCE {
parser.expand("<METRICS> <USER> <BY> <WEBSITE> <BY> <SES> <BY> <METRICS> <BY> <USER> <BY> <METRICS>")
assertTrue(false)
}
}
}