blob: 3a80c9d346b919ec6f723ae8041dd50c56b76de6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Iterator;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.IntArrayFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
import org.apache.uima.test.junit_extension.JUnitExtension;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLSerializer;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import junit.framework.TestCase;
public class XCASDeserializerTest extends TestCase {
private TypeSystemDescription typeSystem;
private FsIndexDescription[] indexes;
/**
* Constructor for XCASDeserializerTest.
*
* @param arg0
*/
public XCASDeserializerTest(String arg0) {
super(arg0);
}
@Override
protected void setUp() throws Exception {
File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");
typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
new XMLInputSource(typeSystemFile));
indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
.getFsIndexes();
}
public void testNoInitialSofa() throws Exception {
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
// create non-annotation type so as not to create the _InitialView Sofa
IntArrayFS intArrayFS = cas.createIntArrayFS(5);
intArrayFS.set(0, 1);
intArrayFS.set(1, 2);
intArrayFS.set(2, 3);
intArrayFS.set(3, 4);
intArrayFS.set(4, 5);
cas.getIndexRepository().addFS(intArrayFS);
// serialize the CAS
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
// deserialize into another CAS
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
XCASDeserializer deser = new XCASDeserializer(cas2.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas2);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(new StringReader(xml)));
// serialize the new CAS
sw = new StringWriter();
xmlSer = new XMLSerializer(sw, false);
xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas2, xmlSer.getContentHandler(), true);
String xml2 = sw.getBuffer().toString();
// compare
assertTrue(xml2.equals(xml));
}
public void testDeserializeAndReserialize() throws Exception {
doTestDeserializeAndReserialize(false);
doTestDeserializeAndReserialize(true);
}
private void doTestDeserializeAndReserialize(boolean useJCas) throws Exception {
// deserialize a complex CAS
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
CASImpl casImpl = (CASImpl) cas;
if (useJCas) {
cas.getJCas();
}
TypeSystemImpl tsi = casImpl.getTypeSystemImpl();
InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// //print some statistics to aid in verifying deserialization was correct
// FSIndexRepository iri = cas.getIndexRepository();
// FSIterator<TOP> it = iri.getAllIndexedFS(tsi.getTopType());
// List<TOP> allFSs = new ArrayList<>();
// while (it.hasNext()) {
// allFSs.add(it.next());
// }
//
// Collections.sort(allFSs, (fs1, fs2) ->
// { int c = fs1._typeImpl.getName().compareTo(fs2._typeImpl.getName());
// if (0 == c) {
// return Integer.compare(fs1._id, fs2._id);
// }
// return c;
// });
//
// {
// int i = 0;
//
// int si = 0;
// String prevName = "";
// for (TOP fs : allFSs) {
// String thisName = fs._typeImpl.getName();
// if (thisName.equals(prevName)) {
// i ++;
// continue;
// }
// if (!prevName.equals("")) {
// System.out.format("%,d %,d %s%n", si, i-1, prevName);
// }
// prevName = thisName;
// si = i;
// i++;
// }
// System.out.format("%,d %,d %s%n", si, i-1, prevName);
// }
// check that array refs are not null
Type entityType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
Feature classesFeat = entityType.getFeatureByBaseName("classes");
Iterator<FeatureStructure> iter = cas.getIndexRepository().getIndex("testEntityIndex").iterator();
assertTrue(iter.hasNext());
while (iter.hasNext()) {
FeatureStructure fs = iter.next();
StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
assertNotNull(arrayFS);
for (int i = 0; i < arrayFS.size(); i++) {
assertNotNull(arrayFS.get(i));
}
}
// reserialize
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
if (useJCas) {
cas2.getJCas();
}
// deserialize into another CAS
XCASDeserializer deser2 = new XCASDeserializer(cas2.getTypeSystem());
ContentHandler deserHandler2 = deser2.getXCASHandler(cas2);
xmlReader.setContentHandler(deserHandler2);
// // debug
// PrintStream ps = new PrintStream(new BufferedOutputStream(new FileOutputStream("debug.log.txt", false)));
// ps.println(xml);
// ps.close();
xmlReader.parse(new InputSource(new StringReader(xml)));
// compare
// if (cas.getAnnotationIndex().size() != cas2.getAnnotationIndex().size()) {
// System.out.println("debug");
// }
assertEquals(cas.getAnnotationIndex().size(), cas2.getAnnotationIndex().size());
// CasComparer.assertEquals(cas,cas2);
}
public void testOutOfTypeSystem2() throws Exception {
// deserialize a complex CAS into one with no TypeSystem
CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
new TypePriorities_impl(), new FsIndexDescription[0]);
OutOfTypeSystemData ootsd = new OutOfTypeSystemData();
InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas, ootsd);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// now reserialize including OutOfTypeSystem data
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd);
String xml = sw.getBuffer().toString();
// System.out.println("debug writing temp/xmlv3.xml");
// FileUtils.saveString2File(xml, new File("c:/temp/xmlv3.xml"));
// System.out.println(xml);
// deserialize into a CAS that accepts the full typesystem
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
XCASDeserializer deser2 = new XCASDeserializer(cas2.getTypeSystem());
ContentHandler deserHandler2 = deser2.getXCASHandler(cas2);
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler2);
xmlReader.parse(new InputSource(new StringReader(xml)));
// check that array refs are not null
Type entityType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
Feature classesFeat = entityType.getFeatureByBaseName("classes");
Iterator<FeatureStructure> iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
assertTrue(iter.hasNext());
while (iter.hasNext()) {
FeatureStructure fs = iter.next();
StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
assertNotNull(arrayFS);
for (int i = 0; i < arrayFS.size(); i++) {
assertNotNull(arrayFS.get(i));
}
}
}
public void testOutOfTypeSystem3() throws Exception {
// deserialize an XCAS using the implicit value feature into a CAS with no TypeSystem
CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
new TypePriorities_impl(), new FsIndexDescription[0]);
String xcas = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><CAS>"
+ "<uima.tcas.Document _content=\"text\">Test Document</uima.tcas.Document>"
+ "<uima.tcas.DocumentAnnotation _indexed=\"1\" _id=\"8\" sofa=\"1\" begin=\"0\" end=\"13\" language=\"en\"/>"
+ "<foo.Bar _indexed=\"1\" _id=\"2\" sofa=\"1\" begin=\"0\" end=\"0\" baz=\"blah\">this is the value feature</foo.Bar></CAS>";
OutOfTypeSystemData ootsd = new OutOfTypeSystemData();
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler handler = deser.getXCASHandler(cas, ootsd);
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(new StringReader(xcas)));
// now reserialize including OutOfTypeSystem data
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd);
String xml = sw.getBuffer().toString();
// System.out.println(xml);
// make sure the value feature was not lost (it will be serialized as an attribute however)
assertTrue(xml.indexOf("value=\"this is the value feature\"") != -1);
}
public void testMultipleSofas() throws Exception {
/*************************************************
* Make CAS with 2 sofas, initial and OtherSofa *
* *
* Add instance of TOP and index in both views *
* *
* Serialize to string "xml" *
* *
* Deserialize from string *
*************************************************/
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
// set document text for the initial view
cas.setDocumentText("This is a test");
// create a new view and set its document text
CAS cas2 = cas.createView("OtherSofa");
cas2.setDocumentText("This is only a test");
// Change this test to create an instance of TOP because you cannot add an annotation to other than
// the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099
// create a TOP and add to index of both views
Type topType = cas.getTypeSystem().getTopType();
FeatureStructure aTOP = cas.createFS(topType);
cas.getIndexRepository().addFS(aTOP);
cas2.getIndexRepository().addFS(aTOP);
FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType);
FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType);
it.next(); it.next();
it2.next(); it2.next();
assertFalse(it.hasNext());
assertFalse(it2.hasNext());
// serialize
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
// deserialize into another CAS (repeat twice to check it still works after reset)
CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
for (int i = 0; i < 2; i++) {
XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem());
ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(newDeserHandler);
xmlReader.parse(new InputSource(new StringReader(xml)));
// check sofas
assertEquals("This is a test", newCas.getDocumentText());
CAS newCas2 = newCas.getView("OtherSofa");
assertEquals("This is only a test", newCas2.getDocumentText());
// check that annotation is still indexed in both views
it = newCas.getIndexRepository().getAllIndexedFS(topType);
it2 = newCas2.getIndexRepository().getAllIndexedFS(topType);
it.next(); it.next();
it2.next(); it2.next();
assertFalse(it.hasNext());
assertFalse(it2.hasNext());
// assertTrue(tIndex.size() == 2); // document annot and this one
// assertTrue(t2Index.size() == 2); // ditto
newCas.reset(); // testing if works after cas reset, go around loop 2nd time
}
}
public void testv1FormatXcas() throws Exception {
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
CAS v1cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
// get the CAS used above that is in v2.0 format
InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// get a v1.x version of the same CAS
serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/v1cas.xml"));
deser = new XCASDeserializer(v1cas.getTypeSystem());
deserHandler = deser.getXCASHandler(v1cas);
fact = SAXParserFactory.newInstance();
parser = fact.newSAXParser();
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// compare
assertEquals(cas.getAnnotationIndex().size(), v1cas.getAnnotationIndex().size());
// now a v1.x version of a multiple Sofa CAS
v1cas.reset();
serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/v1MultiSofaCas.xml"));
deser = new XCASDeserializer(v1cas.getTypeSystem());
deserHandler = deser.getXCASHandler(v1cas);
fact = SAXParserFactory.newInstance();
parser = fact.newSAXParser();
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// test it
assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
CAS engView = v1cas.getView("EnglishDocument");
assertTrue(engView.getDocumentText().equals("this beer is good"));
assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
CAS gerView = v1cas.getView("GermanDocument");
assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
// reserialize
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(v1cas.getTypeSystem());
xcasSer.serialize(v1cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
// deserialize into another CAS
cas.reset();
XCASDeserializer deser2 = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler2 = deser2.getXCASHandler(cas);
xmlReader.setContentHandler(deserHandler2);
xmlReader.parse(new InputSource(new StringReader(xml)));
// test it
assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
engView = cas.getView("EnglishDocument");
assertTrue(engView.getDocumentText().equals("this beer is good"));
assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
gerView = cas.getView("GermanDocument");
assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
}
public void testStringArrayWithNullValues() throws Exception {
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
StringArrayFS strArray = cas.createStringArrayFS(3);
strArray.set(1, "value");
cas.getIndexRepository().addFS(strArray);
assertEquals(null, strArray.get(0));
assertEquals("value", strArray.get(1));
assertEquals(null, strArray.get(2));
//serialize to XCAS and back
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XCASSerializer.serialize(cas,baos);
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
XCASDeserializer.deserialize(bais, cas);
//check
Iterator iter = cas.getIndexRepository().getAllIndexedFS(cas.getTypeSystem().getType("uima.cas.StringArray"));
StringArrayFS strArrayOut = (StringArrayFS)iter.next();
assertEquals(null, strArrayOut.get(0));
assertEquals("value", strArrayOut.get(1));
assertEquals(null, strArrayOut.get(2));
}
}