blob: e2efa4b3d3b2b45d928a8828d10665ca5984482f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CommonArrayFS;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
import org.apache.uima.internal.util.IntStack;
import org.apache.uima.internal.util.IntVector;
import org.apache.uima.internal.util.XmlAttribute;
import org.apache.uima.internal.util.XmlElementName;
import org.apache.uima.internal.util.XmlElementNameAndContents;
import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.XMLSerializer;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.AttributesImpl;
/**
* XMI CAS serializer. Used to write out a CAS in an XML Metadata Interchange (XMI) format. Create a
* serializer from a type system, then encode individual CASes by writing to a SAX content handler.
* This class is thread safe.
*/
public class XmiCasSerializer {
// Special "type class" codes for list types. The LowLevelCAS.ll_getTypeClass() method
// returns type classes for primitives and arrays, but not lists (which are just ordinary FS types
// as far as the CAS is concerned). The XMI serialization treats lists specially, however, and
// so needs its own type codes for these.
public static final int TYPE_CLASS_INTLIST = 101;
public static final int TYPE_CLASS_FLOATLIST = 102;
public static final int TYPE_CLASS_STRINGLIST = 103;
public static final int TYPE_CLASS_FSLIST = 104;
// number of children of current element
private int numChildren;
/**
* Gets the number of children of the current element. This is guranteed to be set correctly at
* the time when startElement is called. Needed for streaming Vinci serialization.
* <p>
* NOTE: this method will not work if there are simultaneously executing calls to
* XmiCasSerializer.serialize. Use it only with a dedicated XmiCasSerializer instance that is not
* shared betwen threads.
*
* @return the number of children of the current element
*/
public int getNumChildren() {
return numChildren;
}
/**
* Use an inner class to hold the data for serializing a CAS. Each call to serialize() creates its
* own instance.
*
*
*/
private class XmiCasDocSerializer {
// Where the output goes.
private ContentHandler ch;
// optional error handler, mainly so we can send warnings
private ErrorHandler eh = null;
// The CAS we're serializing.
private CASImpl cas;
// Any FS reference we've touched goes in here.
private IntRedBlackTree visited;
// All FSs that are in an index somewhere.
private IntVector indexedFSs;
// The current queue for FSs to write out.
private IntStack queue;
// SofaFS type
// private int sofaTypeCode;
// Annotation type
// private int annotationTypeCode;
private final AttributesImpl emptyAttrs = new AttributesImpl();
private AttributesImpl workAttrs = new AttributesImpl();
private static final String cdataType = "CDATA";
// For debug statistics.
private int fsCount = 0;
// utilities for dealing with CAS list types
private ListUtils listUtils;
// holds the addresses of Array and List FSs that we have encountered
private IntRedBlackTree arrayAndListFSs;
private XmiSerializationSharedData sharedData;
private XmlElementName[] xmiTypeNames; // array, indexed by type code, giving XMI names for
// each type
private Map nsUriToPrefixMap = new HashMap();
private Set nsPrefixesUsed = new HashSet();
/**
* Whether the serializer neeeds to check for filtered-out types/features. Set to true if type
* system of CAS does not match type system that was passed to constructor of serializer.
*/
boolean isFiltering;
private XmiCasDocSerializer(ContentHandler ch, ErrorHandler eh, CASImpl cas,
XmiSerializationSharedData sharedData) {
super();
this.ch = ch;
this.eh = eh;
this.cas = cas;
this.visited = new IntRedBlackTree();
this.queue = new IntStack();
this.indexedFSs = new IntVector();
// this.sofaTypeCode = cas.getTypeSystemImpl().getTypeCode(CAS.TYPE_NAME_SOFA);
// this.annotationTypeCode = cas.getTypeSystemImpl().getTypeCode(CAS.TYPE_NAME_ANNOTATION);
this.listUtils = new ListUtils(cas, logger, eh);
this.arrayAndListFSs = new IntRedBlackTree();
this.sharedData = sharedData;
this.isFiltering = filterTypeSystem != null && filterTypeSystem != cas.getTypeSystemImpl();
}
// TODO: internationalize
private void reportWarning(String message) throws SAXException {
logger.log(Level.WARNING, message);
if (this.eh != null) {
this.eh.warning(new SAXParseException(message, null));
}
}
/**
* Check if we've seen this address before.
*
* @param addr
* The address.
* @return <code>true</code> iff we've seen the address before.
*/
private boolean isVisited(int addr) {
return visited.containsKey(addr);
}
/**
* Starts serialization
*/
private void serialize() throws SAXException {
// populate nsUriToPrefixMap and xmiTypeNames structures based on CAS
// type system, and out of typesytem data if any
initTypeAndNamespaceMappings();
int iElementCount = 1; // start at 1 to account for special NULL object
enqueueIncoming(); //make sure we enqueue every FS that was deserialized into this CAS
enqueueIndexed();
enqueueFeaturesOfIndexed();
iElementCount += indexedFSs.size();
iElementCount += queue.size();
FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME);
iElementCount += (sofaIndex.size()); // one View element per sofa
if (this.sharedData != null) {
iElementCount += this.sharedData.getOutOfTypeSystemElements().size();
}
workAttrs.clear();
computeNamespaceDeclarationAttrs(workAttrs);
workAttrs.addAttribute(XMI_NS_URI, XMI_VERSION_LOCAL_NAME, XMI_VERSION_QNAME, "CDATA",
XMI_VERSION_VALUE);
startElement(XMI_TAG, workAttrs, iElementCount);
writeNullObject(); // encodes 1 element
encodeIndexed(); // encodes indexedFSs.size() element
encodeQueued(); // encodes queue.size() elements
serializeOutOfTypeSystemElements(); //encodes sharedData.getOutOfTypeSystemElements().size() elements
writeViews(); // encodes cas.sofaCount + 1 elements
endElement(XMI_TAG);
}
private void writeViews() throws SAXException {
// Get indexes for each SofaFS in the CAS
int numViews = cas.getBaseSofaCount();
String sofaXmiId = null;
for (int sofaNum = 1; sofaNum <= numViews; sofaNum++) {
FSIndexRepositoryImpl loopIR = (FSIndexRepositoryImpl) cas.getBaseCAS()
.getSofaIndexRepository(sofaNum);
if (sofaNum != 1 || cas.isInitialSofaCreated()) {
FeatureStructureImpl sofa = (FeatureStructureImpl) cas.getView(sofaNum).getSofa();
sofaXmiId = getXmiId((sofa).getAddress());
}
if (loopIR != null) {
int[] fsarray = loopIR.getIndexedFSs();
writeView(sofaXmiId, fsarray);
}
}
}
private void writeView(String sofaXmiId, int[] members) throws SAXException {
workAttrs.clear();
if (sofaXmiId != null && sofaXmiId.length() > 0) {
addAttribute(workAttrs, "sofa", sofaXmiId);
}
StringBuffer membersString = new StringBuffer();
for (int i = 0; i < members.length; i++) {
String xmiId = getXmiId(members[i]);
if (xmiId != null) // to catch filtered FS
{
membersString.append(xmiId).append(' ');
}
}
//check for out-of-typesystem members
if (this.sharedData != null) {
List ootsMembers = this.sharedData.getOutOfTypeSystemViewMembers(sofaXmiId);
if (ootsMembers != null) {
Iterator iter = ootsMembers.iterator();
while (iter.hasNext()) {
membersString.append((String)iter.next()).append(' ');
}
}
}
if (membersString.length() > 0) {
// remove trailing space before adding to attributes
addAttribute(workAttrs, "members", membersString.substring(0, membersString.length() - 1));
}
XmlElementName elemName = uimaTypeName2XmiElementName("uima.cas.View");
startElement(elemName, workAttrs, 0);
endElement(elemName);
}
/**
* Writes a special instance of dummy type uima.cas.NULL, having xmi:id=0. This is needed to
* represent nulls in multi-valued references, which aren't natively supported in Ecore.
*
*/
private void writeNullObject() throws SAXException {
workAttrs.clear();
addAttribute(workAttrs, ID_ATTR_NAME, "0");
XmlElementName elemName = uimaTypeName2XmiElementName("uima.cas.NULL");
startElement(elemName, workAttrs, 0);
endElement(elemName);
}
/**
* @param workAttrs2
*/
private void computeNamespaceDeclarationAttrs(AttributesImpl workAttrs2) {
Iterator it = nsUriToPrefixMap.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = (Map.Entry) it.next();
String nsUri = (String) entry.getKey();
String prefix = (String) entry.getValue();
// write attribute
workAttrs.addAttribute(XMLNS_NS_URI, prefix, "xmlns:" + prefix, "CDATA", nsUri);
}
// also add schemaLocation if specified
if (nsUriToSchemaLocationMap != null) {
// write xmlns:xsi attribute
workAttrs.addAttribute(XMLNS_NS_URI, "xsi", "xmlns:xsi", "CDATA", XSI_NS_URI);
// write xsi:schemaLocation attributaiton
StringBuffer buf = new StringBuffer();
it = nsUriToSchemaLocationMap.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = (Map.Entry) it.next();
buf.append(entry.getKey()).append(' ').append(entry.getValue()).append(' ');
}
workAttrs.addAttribute(XSI_NS_URI, "xsi", "xsi:schemaLocation", "CDATA", buf.toString());
}
}
/**
* Enqueues all FS that are stored in the XmiSerializationSharedData's id map.
* This map is populated during the previous deserialization. This method
* is used to make sure that all incoming FS are echoed in the next
* serialization.
*/
private void enqueueIncoming() {
if (this.sharedData == null)
return;
int[] fsAddrs = this.sharedData.getAllFsAddressesInIdMap();
for (int i = 0; i < fsAddrs.length; i++) {
enqueueIndexedFs(fsAddrs[i]);
}
}
/**
* Push the indexed FSs onto the queue.
*/
private void enqueueIndexed() {
FSIndexRepositoryImpl ir = (FSIndexRepositoryImpl) cas.getBaseCAS().getBaseIndexRepository();
int[] fsarray = ir.getIndexedFSs();
for (int k = 0; k < fsarray.length; k++) {
enqueueIndexedFs(fsarray[k]);
}
// FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME);
// FSIterator iterator = sofaIndex.iterator();
// // Get indexes for each SofaFS in the CAS
// while (iterator.isValid())
int numViews = cas.getBaseSofaCount();
for (int sofaNum = 1; sofaNum <= numViews; sofaNum++) {
// SofaFS sofa = (SofaFS) iterator.get();
// int sofaNum = sofa.getSofaRef();
// iterator.moveToNext();
FSIndexRepositoryImpl loopIR = (FSIndexRepositoryImpl) cas.getBaseCAS()
.getSofaIndexRepository(sofaNum);
if (loopIR != null) {
fsarray = loopIR.getIndexedFSs();
for (int k = 0; k < fsarray.length; k++) {
enqueueIndexedFs(fsarray[k]);
}
}
}
}
/**
* Enqueue everything reachable from features of indexed FSs.
*/
private void enqueueFeaturesOfIndexed() throws SAXException {
final int max = indexedFSs.size();
for (int i = 0; i < max; i++) {
int addr = indexedFSs.get(i);
int heapVal = cas.getHeapValue(addr);
enqueueFeatures(addr, heapVal);
}
}
/**
* Enqueues an indexed FS. Does NOT enqueue features at this point.
*/
private void enqueueIndexedFs(int addr) {
if (isVisited(addr)) {
return;
}
if (isFiltering) {
String typeName = cas.getTypeSystemImpl().ll_getTypeForCode(cas.getHeapValue(addr)).getName();
if (filterTypeSystem.getType(typeName) == null) {
return; // this type is not in the target type system
}
}
visited.put(addr, addr);
indexedFSs.add(addr);
}
/**
* Enqueue an FS, and everything reachable from it.
*
* @param addr
* The FS address.
*/
private void enqueue(int addr) throws SAXException {
if (isVisited(addr)) {
return;
}
int typeCode = cas.getHeapValue(addr);
if (isFiltering) {
String typeName = cas.getTypeSystemImpl().ll_getTypeForCode(typeCode).getName();
if (filterTypeSystem.getType(typeName) == null) {
return; // this type is not in the target type system
}
}
visited.put(addr, addr);
queue.push(addr);
enqueueFeatures(addr, typeCode);
// Also, for FSArrays enqueue the elements
if (cas.isFSArrayType(typeCode)) { //TODO: won't get parameterized arrays??
enqueueFSArrayElements(addr);
}
}
/**
* Enqueue all FSs reachable from features of the given FS.
*
* @param addr
* address of an FS
* @param typeCode
* type of the FS
* @param insideListNode
* true iff the enclosing FS (addr) is a list type
*/
private void enqueueFeatures(int addr, int typeCode) throws SAXException {
boolean insideListNode = listUtils.isListType(typeCode);
int[] feats = cas.getTypeSystemImpl().ll_getAppropriateFeatures(typeCode);
int featAddr, featVal, fsClass;
for (int i = 0; i < feats.length; i++) {
if (isFiltering) {
// skip features that aren't in the target type system
String fullFeatName = cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).getName();
if (filterTypeSystem.getFeatureByFullName(fullFeatName) == null) {
continue;
}
}
featAddr = addr + cas.getFeatureOffset(feats[i]);
featVal = cas.getHeapValue(featAddr);
if (featVal == CASImpl.NULL) {
continue;
}
// enqueue behavior depends on range type of feature
fsClass = classifyType(cas.getTypeSystemImpl().range(feats[i]));
switch (fsClass) {
case LowLevelCAS.TYPE_CLASS_FS: {
enqueue(featVal);
break;
}
case LowLevelCAS.TYPE_CLASS_INTARRAY:
case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
case LowLevelCAS.TYPE_CLASS_LONGARRAY:
case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
case LowLevelCAS.TYPE_CLASS_FSARRAY: {
// we only enqueue arrays as first-class objects if the feature has
// multipleReferencesAllowed = true
if (cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).isMultipleReferencesAllowed()) {
enqueue(featVal);
} else if (fsClass == LowLevelCAS.TYPE_CLASS_FSARRAY) {
// but we do need to enqueue any FSs reachable from an FSArray
enqueueFSArrayElements(featVal);
}
break;
}
case TYPE_CLASS_INTLIST:
case TYPE_CLASS_FLOATLIST:
case TYPE_CLASS_STRINGLIST:
case TYPE_CLASS_FSLIST: {
// we only enqueue lists as first-class objects if the feature has
// multipleReferencesAllowed = true
// OR if we're already inside a list node (this handles the tail feature correctly)
if (cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).isMultipleReferencesAllowed() || insideListNode) {
enqueue(featVal);
} else if (fsClass == TYPE_CLASS_FSLIST) {
// also, we need to enqueue any FSs reachable from an FSList
enqueueFSListElements(featVal);
}
break;
}
}
}
}
/**
* Enqueues all FS reachable from an FSArray.
*
* @param addr
* Address of an FSArray
*/
private void enqueueFSArrayElements(int addr) throws SAXException {
final int size = cas.ll_getArraySize(addr);
int pos = cas.getArrayStartAddress(addr);
int val;
for (int i = 0; i < size; i++) {
val = cas.getHeapValue(pos);
if (val != CASImpl.NULL) {
enqueue(val);
}
++pos;
}
}
/**
* Enqueues all FS reachable from an FSList. This does NOT include the list nodes themselves.
*
* @param addr
* Address of an FSList
*/
private void enqueueFSListElements(int addr) throws SAXException {
int[] addrArray = listUtils.fsListToAddressArray(addr);
for (int j = 0; j < addrArray.length; j++) {
if (addrArray[j] != CASImpl.NULL) {
enqueue(addrArray[j]);
}
}
}
/**
* Encode the indexed FS in the queue.
*
* @throws IOException
* @throws SAXException
*/
private void encodeIndexed() throws SAXException {
final int max = indexedFSs.size();
for (int i = 0; i < max; i++) {
encodeFS(indexedFSs.get(i));
}
}
/**
* Encode all other enqueued (non-indexed) FSs.
*
* @throws XMLException
* @throws IOException
* @throws SAXException
*/
private void encodeQueued() throws SAXException {
int addr;
while (!queue.empty()) {
addr = queue.pop();
encodeFS(addr);
}
}
/**
* Encode an individual FS.
*
* @param addr
* The address to be encoded.
* @throws SAXException
*/
private void encodeFS(int addr) throws SAXException {
++fsCount;
workAttrs.clear();
// Add ID attribute. We do this for every FS, since otherwise we would
// have to do a complete traversal of the heap to find out which FSs is
// actually referenced.
addAttribute(workAttrs, ID_ATTR_NAME, getXmiId(addr));
// generate the XMI name for the type (uses a precomputed array so we don't
// recompute the same name multiple times).
int typeCode = cas.getHeapValue(addr);
XmlElementName xmlElementName = xmiTypeNames[typeCode];
// Call special code according to the type of the FS (special treatment
// for arrays and lists).
final int typeClass = classifyType(typeCode);
switch (typeClass) {
case LowLevelCAS.TYPE_CLASS_FS:
case TYPE_CLASS_INTLIST:
case TYPE_CLASS_FLOATLIST:
case TYPE_CLASS_STRINGLIST:
case TYPE_CLASS_FSLIST: {
// encode features. this populates the attributes (workAttrs). It also
// populates the child elements list with features that are to be encoded
// as child elements (currently required for string arrays).
List childElements = encodeFeatures(addr, workAttrs,
(typeClass != LowLevelCAS.TYPE_CLASS_FS));
startElement(xmlElementName, workAttrs, childElements.size());
sendElementEvents(childElements);
endElement(xmlElementName);
break;
}
case LowLevelCAS.TYPE_CLASS_FSARRAY:
case LowLevelCAS.TYPE_CLASS_INTARRAY:
case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
case LowLevelCAS.TYPE_CLASS_LONGARRAY:
case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: {
workAttrs.addAttribute("", "", "elements", "CDATA", arrayToString(addr, typeClass));
startElement(xmlElementName, workAttrs, 0);
endElement(xmlElementName);
break;
}
case LowLevelCAS.TYPE_CLASS_STRINGARRAY: {
// string arrays are encoded as elements, in case they contain whitespace
List childElements = new ArrayList();
stringArrayToElementList("elements", addr, childElements);
startElement(xmlElementName, workAttrs, childElements.size());
sendElementEvents(childElements);
endElement(xmlElementName);
break;
}
default: {
throw new SAXException("Error classifying FS type.");
}
}
}
/**
* Get the XMI ID to use for an FS.
*
* @param addr
* address of FS
* @return XMI ID. If addr == CASImpl.NULL, returns null
*/
private String getXmiId(int addr) {
if (addr == CASImpl.NULL) {
return null;
}
if (isFiltering) // return as null any references to types not in target TS
{
String typeName = cas.getTypeSystemImpl().ll_getTypeForCode(cas.getHeapValue(addr)).getName();
if (filterTypeSystem.getType(typeName) == null) {
return null;
}
}
if (this.sharedData == null) {
// in the absence of outside information, just use the FS address
return Integer.toString(addr);
} else {
return this.sharedData.getXmiId(addr);
}
}
/**
* Generate startElement, characters, and endElement SAX events.
*
* @param elements
* a list of XMLElementNameAndContents objects representing the elements to generate
* @throws SAXException
*/
private void sendElementEvents(List elements) throws SAXException {
Iterator childIter = elements.iterator();
while (childIter.hasNext()) {
XmlElementNameAndContents elem = (XmlElementNameAndContents) childIter.next();
if (elem.contents != null) {
startElement(elem.name, emptyAttrs, 1);
addText(elem.contents);
} else {
startElement(elem.name, emptyAttrs, 0);
}
endElement(elem.name);
}
}
/**
* Encode features of a regular (non-array) FS.
*
* @param addr
* Address of the FS
* @param attrs
* SAX Attributes object, to which we will add attributes
* @param insideListNode
* true iff this FS is a List type.
*
* @return a List of XmlElementNameAndContents objects, each of which represents an element that
* should be added as a child of the FS
*/
private List encodeFeatures(int addr, AttributesImpl attrs, boolean insideListNode)
throws SAXException {
List childElements = new ArrayList();
int heapValue = cas.getHeapValue(addr);
int[] feats = cas.getTypeSystemImpl().ll_getAppropriateFeatures(heapValue);
int featAddr, featVal, fsClass;
String featName, attrValue;
// boolean isSofa = false;
// if (sofaTypeCode == heapValue)
// {
// // set isSofa flag to apply SofaID mapping and to store sofaNum->xmi:id mapping
// isSofa = true;
// }
for (int i = 0; i < feats.length; i++) {
if (isFiltering) {
// skip features that aren't in the target type system
String fullFeatName = cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).getName();
if (filterTypeSystem.getFeatureByFullName(fullFeatName) == null) {
continue;
}
}
featAddr = addr + cas.getFeatureOffset(feats[i]);
featVal = cas.getHeapValue(featAddr);
featName = cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).getShortName();
fsClass = classifyType(cas.getTypeSystemImpl().range(feats[i]));
switch (fsClass) {
case LowLevelCAS.TYPE_CLASS_INT:
case LowLevelCAS.TYPE_CLASS_FLOAT:
case LowLevelCAS.TYPE_CLASS_BOOLEAN:
case LowLevelCAS.TYPE_CLASS_BYTE:
case LowLevelCAS.TYPE_CLASS_SHORT:
case LowLevelCAS.TYPE_CLASS_LONG:
case LowLevelCAS.TYPE_CLASS_DOUBLE: {
attrValue = cas.getFeatureValueAsString(addr, feats[i]);
break;
}
case LowLevelCAS.TYPE_CLASS_STRING: {
if (featVal == CASImpl.NULL) {
attrValue = null;
break;
}
attrValue = cas.getStringForCode(featVal);
break;
}
// Arrays
case LowLevelCAS.TYPE_CLASS_INTARRAY:
case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
case LowLevelCAS.TYPE_CLASS_LONGARRAY:
case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
case LowLevelCAS.TYPE_CLASS_FSARRAY: {
// If the feature has multipleReferencesAllowed = true, serialize as any other FS.
// If false, serialize as a multi-valued property.
if (cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).isMultipleReferencesAllowed()) {
attrValue = getXmiId(featVal);
} else {
attrValue = arrayToString(featVal, fsClass);
}
break;
}
// special case for StringArrays, which stored values as child elements rather
// than attributes.
case LowLevelCAS.TYPE_CLASS_STRINGARRAY: {
// If the feature has multipleReferencesAllowed = true, serialize as any other FS.
// If false, serialize as a multi-valued property.
if (cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).isMultipleReferencesAllowed()) {
attrValue = getXmiId(featVal);
} else {
stringArrayToElementList(featName, featVal, childElements);
attrValue = null;
}
break;
}
// Lists
case TYPE_CLASS_INTLIST:
case TYPE_CLASS_FLOATLIST:
case TYPE_CLASS_FSLIST: {
// If the feature has multipleReferencesAllowed = true OR if we're already
// inside another list node (i.e. this is the "tail" feature), serialize as a normal FS.
// Otherwise, serialize as a multi-valued property.
if (cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).isMultipleReferencesAllowed() || insideListNode) {
attrValue = getXmiId(featVal);
} else {
attrValue = listToString(featVal, fsClass);
}
break;
}
// special case for StringLists, which stored values as child elements rather
// than attributes.
case TYPE_CLASS_STRINGLIST: {
if (cas.getTypeSystemImpl().ll_getFeatureForCode(feats[i]).isMultipleReferencesAllowed() || insideListNode) {
attrValue = getXmiId(featVal);
} else {
// it is not safe to use a space-separated attribute, which would
// break for strings containing spaces. So use child elements instead.
String[] array = listUtils.stringListToStringArray(featVal);
if (array.length > 0 && !arrayAndListFSs.put(featVal, featVal)) {
reportWarning("Warning: multiple references to a ListFS. Reference identity will not be preserved.");
}
for (int j = 0; j < array.length; j++) {
childElements.add(new XmlElementNameAndContents(new XmlElementName(null, featName,
featName), array[j]));
}
attrValue = null;
}
break;
}
default: // Anything that's not a primitive type, array, or list.
{
attrValue = getXmiId(featVal);
break;
}
}
if (attrValue != null && featName != null) {
addAttribute(attrs, featName, attrValue);
}
}
//add out-of-typesystem features, if any
if (this.sharedData != null) {
OotsElementData oed = this.sharedData.getOutOfTypeSystemFeatures(addr);
if (oed != null) {
//attributes
Iterator attrIter = oed.attributes.iterator();
while (attrIter.hasNext()) {
XmlAttribute attr = (XmlAttribute)attrIter.next();
addAttribute(workAttrs, attr.name, attr.value);
}
//child elements
childElements.addAll(oed.childElements);
}
}
return childElements;
}
private void addText(String text) throws SAXException {
ch.characters(text.toCharArray(), 0, text.length());
}
private void addAttribute(AttributesImpl attrs, String attrName, String attrValue) {
attrs.addAttribute(null, null, attrName, cdataType, attrValue);
}
private void startElement(XmlElementName name, Attributes attrs, int aNumChildren)
throws SAXException {
XmiCasSerializer.this.numChildren = aNumChildren;
// don't include NS URI here. That causes XMI serializer to
// include the xmlns attribute in every element. Instead we
// explicitly added these attributes to the root element.
ch.startElement(""/* name.nsUri */, name.localName, name.qName, attrs);
}
private void endElement(XmlElementName name) throws SAXException {
ch.endElement(name.nsUri, name.localName, name.qName);
}
/**
* @param featName
* @param addr
* @param resultList
* @throws SAXException
*/
private void stringArrayToElementList(String featName, int addr, List resultList)
throws SAXException {
if (addr == CASImpl.NULL) {
return;
}
// it is not safe to use a space-separated attribute, which would
// break for strings containing spaces. So use child elements instead.
final int size = cas.ll_getArraySize(addr);
if (size > 0 && !arrayAndListFSs.put(addr, addr)) {
reportWarning("Warning: multiple references to a String array. Reference identity will not be preserved.");
}
int pos = cas.getArrayStartAddress(addr);
for (int j = 0; j < size; j++) {
String s = cas.getStringForCode(cas.getHeapValue(pos));
resultList.add(new XmlElementNameAndContents(new XmlElementName(null, featName, featName),
s));
++pos;
}
}
private String arrayToString(int addr, int arrayType) throws SAXException {
if (addr == CASImpl.NULL) {
return null;
}
StringBuffer buf = new StringBuffer();
final int size = cas.ll_getArraySize(addr);
if (size > 0 && !arrayAndListFSs.put(addr, addr)) {
reportWarning("Warning: multiple references to an array. Reference identity will not be preserved in XMI.");
}
String elemStr = null;
if (arrayType == LowLevelCAS.TYPE_CLASS_FSARRAY) {
int pos = cas.getArrayStartAddress(addr);
List ootsArrayElementsList = this.sharedData == null ? null :
this.sharedData.getOutOfTypeSystemArrayElements(addr);
int ootsIndex = 0;
for (int j = 0; j < size; j++) {
int heapValue = cas.getHeapValue(pos++);
elemStr = null;
String xmiId = getXmiId(heapValue);
if (xmiId != null) {
elemStr = xmiId;
} else {
// special NULL object with xmi:id=0 is used to represent
// a null in an FSArray
elemStr = "0";
// However, this null array element might have been a reference to an
//out-of-typesystem FS, so check the ootsArrayElementsList
if (ootsArrayElementsList != null) {
while (ootsIndex < ootsArrayElementsList.size()) {
XmiArrayElement arel =(XmiArrayElement)ootsArrayElementsList.get(ootsIndex++);
if (arel.index == j) {
elemStr = arel.xmiId;
break;
}
}
}
}
if (buf.length() > 0) {
buf.append(' ');
}
buf.append(elemStr);
}
return buf.toString();
} else if (arrayType == LowLevelCAS.TYPE_CLASS_BYTEARRAY) {
// special case for byte arrays: serialize as hex digits
ByteArrayFS byteArrayFS = new ByteArrayFSImpl(addr, cas);
int len = byteArrayFS.size();
for (int i = 0; i < len; i++) {
byte b = byteArrayFS.get(i);
// this test is necessary to generate a leading zero where necessary
if ((b & 0xF0) == 0) {
buf.append('0').append(Integer.toHexString(b).toUpperCase());
} else {
buf.append(Integer.toHexString(0xFF & b).toUpperCase());
}
}
return buf.toString();
} else {
CommonArrayFS fs;
String[] fsvalues;
switch (arrayType) {
case LowLevelCAS.TYPE_CLASS_INTARRAY:
fs = new IntArrayFSImpl(addr, cas);
break;
case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
fs = new FloatArrayFSImpl(addr, cas);
break;
case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
fs = new BooleanArrayFSImpl(addr, cas);
break;
case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
fs = new ShortArrayFSImpl(addr, cas);
break;
case LowLevelCAS.TYPE_CLASS_LONGARRAY:
fs = new LongArrayFSImpl(addr, cas);
break;
case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
fs = new DoubleArrayFSImpl(addr, cas);
break;
default: {
fs = null;
}
}
if (arrayType == LowLevelCAS.TYPE_CLASS_STRINGARRAY) {
StringArrayFS strFS = new StringArrayFSImpl(addr, cas);
fsvalues = strFS.toArray();
} else {
fsvalues = fs.toStringArray();
}
for (int i = 0; i < fsvalues.length; i++) {
if (buf.length() > 0) {
buf.append(' ');
}
buf.append(fsvalues[i]);
}
return buf.toString();
}
}
/**
* Converts a CAS ListFS to its string representation for use in multi-valued XMI properties.
*
* @param addr
* address of the CAS ListFS
* @param arrayType
* type of the List (defined by constants on this class)
*
* @return String representation of the array
* @throws SAXException
*/
private String listToString(int addr, int arrayType) throws SAXException {
if (addr == CASImpl.NULL) {
return null;
}
StringBuffer buf = new StringBuffer();
String[] array = new String[0];
switch (arrayType) {
case TYPE_CLASS_INTLIST:
array = listUtils.intListToStringArray(addr);
break;
case TYPE_CLASS_FLOATLIST:
array = listUtils.floatListToStringArray(addr);
break;
case TYPE_CLASS_STRINGLIST:
array = listUtils.stringListToStringArray(addr);
break;
case TYPE_CLASS_FSLIST:
array = listUtils.fsListToXmiIdStringArray(addr, sharedData);
break;
}
if (array.length > 0 && !arrayAndListFSs.put(addr, addr)) {
reportWarning("Warning: multiple references to a ListFS. Reference identity will not be preserved.");
}
for (int j = 0; j < array.length; j++) {
buf.append(array[j]);
if (j < array.length - 1) {
buf.append(' ');
}
}
return buf.toString();
}
/**
* Classifies a type. This returns an integer code identifying the type as one of the primitive
* types, one of the array types, one of the list types, or a generic FS type (anything else).
* <p>
* The {@link LowLevelCAS#ll_getTypeClass(int)} method classifies primitives and array types,
* but does not have a special classification for list types, which we need for XMI
* serialization. Therefore, in addition to the type codes defined on {@link LowLevelCAS}, this
* method can return one of the type codes TYPE_CLASS_INTLIST, TYPE_CLASS_FLOATLIST,
* TYPE_CLASS_STRINGLIST, or TYPE_CLASS_FSLIST.
*
* @param type
* the type to classify
* @return one of the TYPE_CLASS codes defined on {@link LowLevelCAS} or on this interface.
*/
private final int classifyType(int type) {
// For most most types
if (listUtils.isIntListType(type)) {
return TYPE_CLASS_INTLIST;
}
if (listUtils.isFloatListType(type)) {
return TYPE_CLASS_FLOATLIST;
}
if (listUtils.isStringListType(type)) {
return TYPE_CLASS_STRINGLIST;
}
if (listUtils.isFsListType(type)) {
return TYPE_CLASS_FSLIST;
}
return cas.ll_getTypeClass(type);
}
/**
* Populates nsUriToPrefixMap and xmiTypeNames structures based on CAS type system.
*/
private void initTypeAndNamespaceMappings() {
nsUriToPrefixMap.put(XMI_NS_URI, XMI_NS_PREFIX);
xmiTypeNames = new XmlElementName[cas.getTypeSystemImpl().getLargestTypeCode() + 1];
//Add any namespace prefix mappings used by out of type system data.
//Need to do this before the in-typesystem namespaces so that the prefix
//used here are reserved and won't be reused for any in-typesystem namespaces.
if (this.sharedData != null) {
Iterator ootsIter = this.sharedData.getOutOfTypeSystemElements().iterator();
while (ootsIter.hasNext()) {
OotsElementData oed = (OotsElementData)ootsIter.next();
String nsUri = oed.elementName.nsUri;
String qname = oed.elementName.qName;
String localName = oed.elementName.localName;
String prefix = qname.substring(0, qname.indexOf(localName)-1);
nsUriToPrefixMap.put(nsUri, prefix);
nsPrefixesUsed.add(prefix);
}
}
Iterator it = cas.getTypeSystemImpl().getTypeIterator();
while (it.hasNext()) {
TypeImpl t = (TypeImpl) it.next();
xmiTypeNames[t.getCode()] = uimaTypeName2XmiElementName(t.getName());
// this also populats the nsUriToPrefix map
}
}
/**
* Converts a UIMA-style dotted type name to the element name that should be used in the XMI
* serialization. The XMI element name consists of three parts - the Namespace URI, the Local
* Name, and the QName (qualified name).
*
* @param uimaTypeName
* a UIMA-style dotted type name
* @return a data structure holding the three components of the XML element name
*/
private XmlElementName uimaTypeName2XmiElementName(String uimaTypeName) {
// split uima type name into namespace and short name
String namespace, shortName, nsUri;
int lastDotIndex = uimaTypeName.lastIndexOf('.');
if (lastDotIndex == -1) // no namespace
{
namespace = null;
shortName = uimaTypeName;
nsUri = DEFAULT_NAMESPACE_URI;
} else {
namespace = uimaTypeName.substring(0, lastDotIndex);
shortName = uimaTypeName.substring(lastDotIndex + 1);
nsUri = "http:///" + namespace.replace('.', '/') + ".ecore";
}
// determine what namespace prefix to use
String prefix = (String) nsUriToPrefixMap.get(nsUri);
if (prefix == null) {
if (namespace != null) {
int secondLastDotIndex = namespace.lastIndexOf('.');
prefix = namespace.substring(secondLastDotIndex + 1);
} else {
prefix = "noNamespace";
}
// make sure this prefix hasn't already been used for some other namespace
if (nsPrefixesUsed.contains(prefix)) {
String basePrefix = prefix;
int num = 2;
while (nsPrefixesUsed.contains(basePrefix + num)) {
num++;
}
prefix = basePrefix + num;
}
nsUriToPrefixMap.put(nsUri, prefix);
nsPrefixesUsed.add(prefix);
}
return new XmlElementName(nsUri, shortName, prefix + ':' + shortName);
}
/**
* Serializes all of the out-of-typesystem elements that were recorded
* in the XmiSerializationSharedData during the last deserialization.
*/
private void serializeOutOfTypeSystemElements() throws SAXException {
if (this.sharedData == null)
return;
Iterator it = this.sharedData.getOutOfTypeSystemElements().iterator();
while (it.hasNext()) {
OotsElementData oed = (OotsElementData)it.next();
workAttrs.clear();
// Add ID attribute
addAttribute(workAttrs, ID_ATTR_NAME, oed.xmiId);
// Add other attributes
Iterator attrIt = oed.attributes.iterator();
while (attrIt.hasNext()) {
XmlAttribute attr = (XmlAttribute) attrIt.next();
addAttribute(workAttrs, attr.name, attr.value);
}
// serialize element
startElement(oed.elementName, workAttrs, oed.childElements.size());
//serialize features encoded as child elements
Iterator childElemIt = oed.childElements.iterator();
while (childElemIt.hasNext()) {
XmlElementNameAndContents child = (XmlElementNameAndContents)childElemIt.next();
workAttrs.clear();
Iterator attrIter = child.attributes.iterator();
while (attrIter.hasNext()) {
XmlAttribute attr =(XmlAttribute)attrIter.next();
addAttribute(workAttrs, attr.name, attr.value);
}
if (child.contents != null) {
startElement(child.name, workAttrs, 1);
addText(child.contents);
}
else {
startElement(child.name, workAttrs, 0);
}
endElement(child.name);
}
endElement(oed.elementName);
}
}
}
public static final String XMLNS_NS_URI = "http://www.w3.org/2000/xmlns/";
public static final String XMI_NS_URI = "http://www.omg.org/XMI";
public static final String XSI_NS_URI = "http://www.w3.org/2001/XMLSchema-instance";
public static final String XMI_NS_PREFIX = "xmi";
public static final String XMI_TAG_LOCAL_NAME = "XMI";
public static final String XMI_TAG_QNAME = "xmi:XMI";
public static final XmlElementName XMI_TAG = new XmlElementName(XMI_NS_URI, XMI_TAG_LOCAL_NAME,
XMI_TAG_QNAME);
public static final String INDEXED_ATTR_NAME = "_indexed";
public static final String ID_ATTR_NAME = "xmi:id";
public static final String XMI_VERSION_LOCAL_NAME = "version";
public static final String XMI_VERSION_QNAME = "xmi:version";
public static final String XMI_VERSION_VALUE = "2.0";
/** Namespace URI to use for UIMA types that have no namespace (the "default pacakge" in Java) */
public static final String DEFAULT_NAMESPACE_URI = "http:///uima/noNamespace.ecore";
private TypeSystemImpl filterTypeSystem;
// UIMA logger, to which we may write warnings
private Logger logger;
private Map nsUriToSchemaLocationMap = null;
/**
* Creates a new XmiCasSerializer.
*
* @param ts
* the TypeSystem of CASes that will be serialized. If any CAS that is later passed to
* the <code>serialize</code> method that contains types and features that are not in
* this typesystem, the serialization will not contain instances of those types or values
* for those features. So this can be used to filter the results of serialization.
* @param nsUriToSchemaLocation
* Map if supplied, this map is used to generate a "schemaLocation" attribute in the XMI
* output. This argument must be a map from namespace URIs to the schema location for
* that namespace URI.
*/
public XmiCasSerializer(TypeSystem ts, Map nsUriToSchemaLocationMap) {
super();
// System.out.println("Creating serializer for type system.");
this.filterTypeSystem = (TypeSystemImpl) ts;
this.nsUriToSchemaLocationMap = nsUriToSchemaLocationMap;
this.logger = UIMAFramework.getLogger(XmiCasSerializer.class);
}
/**
* Creates a new XmiCasSerializer.
*
* @param ts
* the TypeSystem of CASes that will be serialized. If any CAS that is later passed to
* the <code>serialize</code> method that contains types and features that are not in
* this typesystem, the serialization will not contain instances of those types or values
* for those features. So this can be used to filter the results of serialization.
* A null value indicates that all types and features will be serialized.
*/
public XmiCasSerializer(TypeSystem ts) {
this(ts, (Map) null);
}
/**
* Creates a new XmiCasSerializer.
*
* @param ts
* the TypeSystem of CASes that will be serialized. If any CAS that is later passed to
* the <code>serialize</code> method that contains types and features that are not in
* this typesystem, the serialization will not contain instances of those types or values
* for those features. So this can be used to filter the results of serialization.
* @param uimaContext
* not used
* @param nsUriToSchemaLocation
* Map if supplied, this map is used to generate a "schemaLocation" attribute in the XMI
* output. This argument must be a map from namespace URIs to the schema location for
* that namespace URI.
*
* @deprecated Use {@link #XmiCasSerializer(TypeSystem, Map)} instead. The UimaContext reference
* is never used by this implementation.
*/
public XmiCasSerializer(TypeSystem ts, UimaContext uimaContext, Map nsUriToSchemaLocationMap) {
this(ts, nsUriToSchemaLocationMap);
}
/**
* Creates a new XmiCasSerializer.
*
* @param ts
* the TypeSystem of CASes that will be serialized. If any CAS that is later passed to
* the <code>serialize</code> method that contains types and features that are not in
* this typesystem, the serialization will not contain instances of those types or values
* for those features. So this can be used to filter the results of serialization.
* @param uimaContext
* not used
*
* @deprecated Use {@link #XmiCasSerializer(TypeSystem)} instead. The UimaContext reference is
* never used by this implementation.
*/
public XmiCasSerializer(TypeSystem ts, UimaContext uimaContext) {
this(ts);
}
/**
* Write the CAS data to a SAX content handler.
*
* @param cas
* The CAS to be serialized.
* @param contentHandler
* The SAX content handler the data is written to. should be inserted into the XCAS
* output
*
* @throws IOException
* @throws SAXException
*/
public void serialize(CAS cas, ContentHandler contentHandler) throws SAXException {
this.serialize(cas, contentHandler, null);
}
/**
* Write the CAS data to a SAX content handler.
*
* @param cas
* The CAS to be serialized.
* @param contentHandler
* The SAX content handler the data is written to. should be inserted into the XCAS
* output
*
* @throws IOException
* @throws SAXException
*/
public void serialize(CAS cas, ContentHandler contentHandler, ErrorHandler errorHandler)
throws SAXException {
contentHandler.startDocument();
XmiCasDocSerializer ser = new XmiCasDocSerializer(contentHandler, errorHandler, ((CASImpl) cas)
.getBaseCAS(), null);
ser.serialize();
contentHandler.endDocument();
}
/**
* Write the CAS data to a SAX content handler.
*
* @param cas
* The CAS to be serialized.
* @param contentHandler
* The SAX content handler the data is written to. should be inserted into the XCAS
* output
* @param sharedData
* data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
* information.
* @throws IOException
* @throws SAXException
*/
public void serialize(CAS cas, ContentHandler contentHandler, ErrorHandler errorHandler,
XmiSerializationSharedData sharedData) throws SAXException {
contentHandler.startDocument();
XmiCasDocSerializer ser = new XmiCasDocSerializer(contentHandler, errorHandler, ((CASImpl) cas)
.getBaseCAS(), sharedData);
ser.serialize();
contentHandler.endDocument();
}
/**
* Serializes a CAS to an XMI stream.
*
* @param aCAS
* CAS to serialize.
* @param aStream
* output stream to which to write the XMI document
*
* @throws SAXException
* if a problem occurs during XMI serialization
* @throws IOException
* if an I/O failure occurs
*/
public static void serialize(CAS aCAS, OutputStream aStream) throws SAXException {
serialize(aCAS, null, aStream, false, null);
}
/**
* Serializes a CAS to an XMI stream. Allows a TypeSystem to be specified, to which the produced
* XMI will conform. Any types or features not in the target type system will not be serialized.
*
* @param aCAS
* CAS to serialize.
* @param aTargetTypeSystem
* type system to which the produced XMI will conform. Any types or features not in the
* target type system will not be serialized. A null value indicates that all types and features
* will be serialized.
* @param aStream
* output stream to which to write the XMI document
*
* @throws SAXException
* if a problem occurs during XMI serialization
* @throws IOException
* if an I/O failure occurs
*/
public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream)
throws SAXException {
serialize(aCAS, aTargetTypeSystem, aStream, false, null);
}
/**
* Serializes a CAS to an XMI stream. This version of this method allows many options to be configured.
*
* @param aCAS
* CAS to serialize.
* @param aTargetTypeSystem
* type system to which the produced XMI will conform. Any types or features not in the
* target type system will not be serialized. A null value indicates that all types and features
* will be serialized.
* @param aStream
* output stream to which to write the XMI document
* @param aPrettyPrint
* if true the XML output will be formatted with newlines and indenting. If false it will be unformatted.
* @param aSharedData
* an optional container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
* See the JavaDocs for {@link XmiSerializationSharedData} for details.
*
* @throws SAXException
* if a problem occurs during XMI serialization
* @throws IOException
* if an I/O failure occurs
*/
public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint,
XmiSerializationSharedData aSharedData)
throws SAXException {
XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem);
XMLSerializer sax2xml = new XMLSerializer(aStream, aPrettyPrint);
xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler(), null, aSharedData);
}
}