blob: a102d8d8ee000fedf2befa0714fe6a026ae14ebe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Iterator;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import junit.framework.TestCase;
import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.IntArrayFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
import org.apache.uima.test.junit_extension.JUnitExtension;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLSerializer;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
public class XCASDeserializerTest extends TestCase {
private TypeSystemDescription typeSystem;
private FsIndexDescription[] indexes;
/**
* Constructor for XCASDeserializerTest.
*
* @param arg0
*/
public XCASDeserializerTest(String arg0) {
super(arg0);
}
protected void setUp() throws Exception {
File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");
typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
new XMLInputSource(typeSystemFile));
indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
.getFsIndexes();
}
public void testNoInitialSofa() throws Exception {
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
// create non-annotation type so as not to create the _InitialView Sofa
IntArrayFS intArrayFS = cas.createIntArrayFS(5);
intArrayFS.set(0, 1);
intArrayFS.set(1, 2);
intArrayFS.set(2, 3);
intArrayFS.set(3, 4);
intArrayFS.set(4, 5);
cas.getIndexRepository().addFS(intArrayFS);
// serialize the CAS
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
// deserialize into another CAS
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
XCASDeserializer deser = new XCASDeserializer(cas2.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas2);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(new StringReader(xml)));
// serialize the new CAS
sw = new StringWriter();
xmlSer = new XMLSerializer(sw, false);
xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas2, xmlSer.getContentHandler(), true);
String xml2 = sw.getBuffer().toString();
// compare
assertTrue(xml2.equals(xml));
}
public void testDeserializeAndReserialize() throws Exception {
doTestDeserializeAndReserialize(false);
doTestDeserializeAndReserialize(true);
}
private void doTestDeserializeAndReserialize(boolean useJCas) throws Exception {
// deserialize a complex CAS
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
if (useJCas) {
cas.getJCas();
}
InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// check that array refs are not null
Type entityType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
Feature classesFeat = entityType.getFeatureByBaseName("classes");
Iterator iter = cas.getIndexRepository().getIndex("testEntityIndex").iterator();
assertTrue(iter.hasNext());
while (iter.hasNext()) {
FeatureStructure fs = (FeatureStructure) iter.next();
StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
assertNotNull(arrayFS);
for (int i = 0; i < arrayFS.size(); i++) {
assertNotNull(arrayFS.get(i));
}
}
// reserialize
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
if (useJCas) {
cas2.getJCas();
}
// deserialize into another CAS
XCASDeserializer deser2 = new XCASDeserializer(cas2.getTypeSystem());
ContentHandler deserHandler2 = deser2.getXCASHandler(cas2);
xmlReader.setContentHandler(deserHandler2);
xmlReader.parse(new InputSource(new StringReader(xml)));
// compare
assertEquals(cas.getAnnotationIndex().size(), cas2.getAnnotationIndex().size());
// CasComparer.assertEquals(cas,cas2);
}
public void testOutOfTypeSystem2() throws Exception {
// deserialize a complex CAS into one with no TypeSystem
CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
new TypePriorities_impl(), new FsIndexDescription[0]);
OutOfTypeSystemData ootsd = new OutOfTypeSystemData();
InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas, ootsd);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// now reserialize including OutOfTypeSystem data
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd);
String xml = sw.getBuffer().toString();
// System.out.println(xml);
// deserialize into a CAS that accepts the full typesystem
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
XCASDeserializer deser2 = new XCASDeserializer(cas2.getTypeSystem());
ContentHandler deserHandler2 = deser2.getXCASHandler(cas2);
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler2);
xmlReader.parse(new InputSource(new StringReader(xml)));
// check that array refs are not null
Type entityType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
Feature classesFeat = entityType.getFeatureByBaseName("classes");
Iterator iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
assertTrue(iter.hasNext());
while (iter.hasNext()) {
FeatureStructure fs = (FeatureStructure) iter.next();
StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
assertNotNull(arrayFS);
for (int i = 0; i < arrayFS.size(); i++) {
assertNotNull(arrayFS.get(i));
}
}
}
public void testOutOfTypeSystem3() throws Exception {
// deserialize an XCAS using the implicit value feature into a CAS with no TypeSystem
CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
new TypePriorities_impl(), new FsIndexDescription[0]);
String xcas = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><CAS>"
+ "<uima.tcas.Document _content=\"text\">Test Document</uima.tcas.Document>"
+ "<uima.tcas.DocumentAnnotation _indexed=\"1\" _id=\"8\" sofa=\"1\" begin=\"0\" end=\"13\" language=\"en\"/>"
+ "<foo.Bar _indexed=\"1\" _id=\"2\" sofa=\"1\" begin=\"0\" end=\"0\" baz=\"blah\">this is the value feature</foo.Bar></CAS>";
OutOfTypeSystemData ootsd = new OutOfTypeSystemData();
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler handler = deser.getXCASHandler(cas, ootsd);
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(new StringReader(xcas)));
// now reserialize including OutOfTypeSystem data
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd);
String xml = sw.getBuffer().toString();
// System.out.println(xml);
// make sure the value feature was not lost (it will be serialized as an attribute however)
assertTrue(xml.indexOf("value=\"this is the value feature\"") != -1);
}
public void testMultipleSofas() throws Exception {
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
// set document text for the initial view
cas.setDocumentText("This is a test");
// create a new view and set its document text
CAS cas2 = cas.createView("OtherSofa");
cas2.setDocumentText("This is only a test");
// create an annotation and add to index of both views
AnnotationFS anAnnot = cas.createAnnotation(cas.getAnnotationType(), 0, 5);
cas.getIndexRepository().addFS(anAnnot);
cas2.getIndexRepository().addFS(anAnnot);
FSIndex tIndex = cas.getAnnotationIndex();
FSIndex t2Index = cas2.getAnnotationIndex();
assertTrue(tIndex.size() == 2); // document annot and this one
assertTrue(t2Index.size() == 2); // ditto
// serialize
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
// deserialize into another CAS (repeat twice to check it still works after reset)
CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
for (int i = 0; i < 2; i++) {
XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem());
ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(newDeserHandler);
xmlReader.parse(new InputSource(new StringReader(xml)));
// check sofas
assertEquals("This is a test", newCas.getDocumentText());
CAS newCas2 = newCas.getView("OtherSofa");
assertEquals("This is only a test", newCas2.getDocumentText());
// check that annotation is still indexed in both views
assertTrue(tIndex.size() == 2); // document annot and this one
assertTrue(t2Index.size() == 2); // ditto
newCas.reset();
}
}
public void testv1FormatXcas() throws Exception {
CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
CAS v1cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
// get the CAS used above that is in v2.0 format
InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler = deser.getXCASHandler(cas);
SAXParserFactory fact = SAXParserFactory.newInstance();
SAXParser parser = fact.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// get a v1.x version of the same CAS
serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/v1cas.xml"));
deser = new XCASDeserializer(v1cas.getTypeSystem());
deserHandler = deser.getXCASHandler(v1cas);
fact = SAXParserFactory.newInstance();
parser = fact.newSAXParser();
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// compare
assertEquals(cas.getAnnotationIndex().size(), v1cas.getAnnotationIndex().size());
// now a v1.x version of a multiple Sofa CAS
v1cas.reset();
serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/v1MultiSofaCas.xml"));
deser = new XCASDeserializer(v1cas.getTypeSystem());
deserHandler = deser.getXCASHandler(v1cas);
fact = SAXParserFactory.newInstance();
parser = fact.newSAXParser();
xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(deserHandler);
xmlReader.parse(new InputSource(serCasStream));
serCasStream.close();
// test it
assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
CAS engView = v1cas.getView("EnglishDocument");
assertTrue(engView.getDocumentText().equals("this beer is good"));
assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
CAS gerView = v1cas.getView("GermanDocument");
assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
// reserialize
StringWriter sw = new StringWriter();
XMLSerializer xmlSer = new XMLSerializer(sw, false);
XCASSerializer xcasSer = new XCASSerializer(v1cas.getTypeSystem());
xcasSer.serialize(v1cas, xmlSer.getContentHandler(), true);
String xml = sw.getBuffer().toString();
// deserialize into another CAS
cas.reset();
XCASDeserializer deser2 = new XCASDeserializer(cas.getTypeSystem());
ContentHandler deserHandler2 = deser2.getXCASHandler(cas);
xmlReader.setContentHandler(deserHandler2);
xmlReader.parse(new InputSource(new StringReader(xml)));
// test it
assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
engView = cas.getView("EnglishDocument");
assertTrue(engView.getDocumentText().equals("this beer is good"));
assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
gerView = cas.getView("GermanDocument");
assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
}
}