blob: 9d4deffe071782e4056c56f095137578e1fd58eb [file] [log] [blame]
/*
* Licensed to the Apache Software FoSWation (ASF) SWer one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you SWer the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed SWer the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* SWer the License.
*/
package org.apache.uima.ruta.rule;
import org.apache.uima.cas.CAS;
import org.apache.uima.ruta.engine.Ruta;
import org.apache.uima.ruta.engine.RutaTestUtils;
import org.junit.Test;
public class LiteralStringMatchTest {
@Test
public void test() {
CAS cas = RutaTestUtils.processTestScript(this.getClass());
RutaTestUtils.assertAnnotationsEquals(cas, 1, 4, "CW", "CW", "CW", "CW");
RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "SW");
RutaTestUtils.assertAnnotationsEquals(cas, 3, 0);
RutaTestUtils.assertAnnotationsEquals(cas, 6, 2, "CW COMMA CW COMMA", "CW COMMA");
RutaTestUtils.assertAnnotationsEquals(cas, 7, 5, "CW", "CW", "CW", "SW", "CW");
RutaTestUtils.assertAnnotationsEquals(cas, 8, 5, "CW", "CW", "CW SW CW", "SW CW", "CW");
RutaTestUtils.assertAnnotationsEquals(cas, 9, 2, "CW COMMA CW COMMA CW", "CW COMMA CW");
RutaTestUtils.assertAnnotationsEquals(cas, 10, 3, "CW COMMA CW COMMA CW SW", "CW COMMA CW SW",
"CW SW");
RutaTestUtils.assertAnnotationsEquals(cas, 11, 3, "CW COMMA CW COMMA CW SW CW PERIOD",
"CW COMMA CW SW CW PERIOD", "CW SW CW PERIOD");
RutaTestUtils.assertAnnotationsEquals(cas, 13, 6, "CW COMMA", "CW COMMA", "CW COMMA", "CW SW",
"CW SW", "CW SW");
RutaTestUtils.assertAnnotationsEquals(cas, 14, 3, "CW COMMA CW COMMA CW SW", "CW COMMA CW SW",
"CW SW");
cas.release();
}
@Test
public void testLiteralMatchingInInvisible() throws Exception {
String document = "<W a >";
String script = "";
script += "\"W\"{-> T1};";
CAS cas = RutaTestUtils.getCAS(document);
Ruta.apply(cas, script);
RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);
}
@Test
public void testExamples() throws Exception {
String document = "breast and ovarian cancer.\r\n"
+ "colorectal, endometrial, and ovarian cancers.\r\n"
+ "colorectal, endometrial and ovarian cancers.\r\n"
+ "vasculopathy of the heart and brain.\r\n"
+ "abnormalities of eyes, nervous system, and kidneys.\r\n"
+ "Prader-Willi and Angelman syndromes\r\n" + "breast or ovarian cancer.\r\n"
+ "breast and/or ovarian cancer.";
String script = "";
script += "\"and\"{-> T1};";
script += "\"or\"{-> T2};";
script += "\"endometrial, and ovarian\"{-> T3};";
script += "(\"colorectal\" # \"cancers.\"){-> T4};";
CAS cas = RutaTestUtils.getCAS(document);
Ruta.apply(cas, script);
RutaTestUtils.assertAnnotationsEquals(cas, 1, 7, "and", "and", "and", "and", "and");
RutaTestUtils.assertAnnotationsEquals(cas, 2, 2, "or", "or");
RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "endometrial, and ovarian");
RutaTestUtils.assertAnnotationsEquals(cas, 4, 2,
"colorectal, endometrial, and ovarian cancers.",
"colorectal, endometrial and ovarian cancers.");
}
@Test
public void testMatcherCombination() throws Exception {
// see UIMA-6195
String document = "PI_____I How to test it?\nP I_____I How to test it?\n";
String script = "";
script += "\"____+\" -> T1;";
script += "\"I\" T1 \"I\"? { -> MARKONCE(T2,1,2)};";
CAS cas = RutaTestUtils.getCAS(document);
Ruta.apply(cas, script);
RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "I_____");
}
}