blob: 7adfa075ad6fc92d7861de01e6f2b3a918c0d4b2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIndexRepository;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.SofaFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
import org.apache.uima.internal.util.I18nUtil;
import org.apache.uima.internal.util.IntVector;
import org.apache.uima.internal.util.XmlAttribute;
import org.apache.uima.internal.util.XmlElementName;
import org.apache.uima.internal.util.XmlElementNameAndContents;
import org.apache.uima.internal.util.rb_trees.RedBlackTree;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* XMI CAS deserializer. Used to read in a CAS from XML Metadata Interchange (XMI) format.
*/
public class XmiCasDeserializer {
private class XmiCasDeserializerHandler extends DefaultHandler {
// ///////////////////////////////////////////////////////////////////////
// Internal states for the parser.
// Expect the start of the XML document.
private static final int DOC_STATE = 0;
// At the top level. Expect a FS, or document text element, or the end of the
// XML input.
private static final int FS_STATE = 1;
// Inside a FS. Expect features, or the end of the FS.
private static final int FEAT_STATE = 2;
// Inside a feature element. We expect the feature value.
private static final int FEAT_CONTENT_STATE = 3;
// Inside an element with the XMI namespace - indicating content that's
// not part of the typesystem and should be ignored.
private static final int IGNORING_XMI_ELEMENTS_STATE = 4;
// Inside a reference feature element (e.g. <feat href="#1").
// We expect no content, just the end of the element.
private static final int REF_FEAT_STATE = 5;
// End parser states.
// ///////////////////////////////////////////////////////////////////////
// For error message printing, if the Locator object can't provide source
// of XML input.
private static final String unknownXMLSource = "<unknown>";
private String ID_ATTR_NAME = "xmi:id";
// SAX locator. Used for error message generation.
private Locator locator;
// The CAS we're filling.
private CASImpl casBeingFilled;
// Store address of every FS we've deserialized, since we need to back
// and apply fix-ups afterwards.
private IntVector deserializedFsAddrs;
// Store a separate vector of FSList nodes that were deserialized from multivalued properties.
// These are special because their "head" feature needs remapping but their "tail" feature
// doesn't.
private IntVector fsListNodesFromMultivaluedProperties;
// What we expect next.
private int state;
// StringBuffer to accumulate text.
private StringBuffer buffer;
// The address of the most recently created FS. Needed for embedded
// feature values.
private int currentAddr;
// The type of the most recently created FS. Needed for arrays, also
// useful for embedded feature values.
private TypeImpl currentType;
// the ID and values of arrays are stored on startElement, then used on
// endElement to actually create the array. This is because in the case of
// String arrays serialized with the values as child elements, we can't create
// the array until we've seen all of the child elements.
private int currentArrayId;
private List<String> currentArrayElements;
// Used for keeping track of multi-valued features read from subelements.
// Keys are feature names, values are ArrayLists of strings,
// where each String is one of the values to be assigned to the feature.
private Map<String, List<String>> multiValuedFeatures = new TreeMap<String, List<String>>();
// SofaFS type
private int sofaTypeCode;
// Sofa number feature code
private int sofaNumFeatCode;
// Annotation:sofa feature code
private int sofaFeatCode;
// Store IndexRepositories in a vector;
private List<FSIndexRepository> indexRepositories;
// and views too
private List<CAS> views;
// utilities for handling CAS list types
private ListUtils listUtils;
// type of each feature, according to constants below
private int[] featureType;
// true if unknown types should be ignored; false if they should cause an error
boolean lenient;
// number of oustanding startElement events that we are ignoring
// we add 1 when an ignored element starts and subtract 1 when an ignored
// element ends
private int ignoreDepth = 0;
// map from namespace prefixes to URIs. Allows namespace resolution even
// with a non-namespace-enabled SAX parser.
private Map<String, String> nsPrefixToUriMap = new HashMap<String, String>();
// container for data shared between the XmiCasSerialier and
// XmiDeserializer, to support things such as consistency of IDs across
// multiple serializations. This is also where the map from xmi:id to
// FS address is stored.
private XmiSerializationSharedData sharedData;
// number of Sofas found so far
private int nextSofaNum;
//used for merging multiple XMI CASes into one CAS object.
private int mergePoint;
//Current out-of-typesystem element, if any
private OotsElementData outOfTypeSystemElement = null;
//local map from xmi:id to FS address, used when merging multiple XMI CASes
//into one CAS object.
private RedBlackTree<Integer> localXmiIdToFsAddrMap = new RedBlackTree<Integer>();
//if mergepoint is set, are preexisting FS allowed, disallowed or ignored.
AllowPreexistingFS allowPreexistingFS;
//When deserializing delta CAS preexisting FS, keep track of features that
//have been deserialized. This is then compared to the all features for the
//type and features that are not in the xmi are set to null.
IntVector featsSeen = null;
//set this flag if preexisting FS is encountered when deserializing
//delta cas View referenceing disallowed preexisting FS member.
//The preexisting members are ignored and deserialization allowed
//to complete so that the CAS being filled is not corrupted.
//An exception is thrown at the end.
//NOTE: Since preexisting FSs are serialized first, when deserializing
//of delta CAS with a disallowed preexisting FS, the error will be
//caught and reported before any updates are made to the CAS being filled.
boolean disallowedViewMemberEncountered;
/**
* Creates a SAX handler used for deserializing an XMI CAS.
* @param aCAS CAS to deserialize into
* @param lenient if true, unknown types/features result in an
* exception. If false, unknown types/features are ignored.
* @param sharedData data structure used to allow the XmiCasSerializer and
* XmiCasDeserializer to share information.
* @param mergePoint used to support merging multiple XMI CASes. If the
* mergePoint is negative, "normal" deserialization will be done,
* meaning the target CAS will be reset and the entire XMI content will
* be deserialized. If the mergePoint is nonnegative (including 0), the
* target CAS will not be reset, and only Feature Structures whose
* xmi:id is strictly greater than the mergePoint value will be
* deserialized.
*/
private XmiCasDeserializerHandler(CASImpl aCAS, boolean lenient,
XmiSerializationSharedData sharedData, int mergePoint, AllowPreexistingFS allowPreexistingFS) {
super();
this.casBeingFilled = aCAS.getBaseCAS();
this.lenient = lenient;
this.sharedData =
sharedData != null ? sharedData : new XmiSerializationSharedData();
this.mergePoint = mergePoint;
this.allowPreexistingFS = allowPreexistingFS;
this.featsSeen = null;
this.disallowedViewMemberEncountered = false;
if (mergePoint < 0) {
//If not merging, reset the CAS.
//Necessary to get Sofas to work properly.
casBeingFilled.resetNoQuestions();
// clear ID mappings stored in the SharedData (from previous deserializations)
this.sharedData.clearIdMap();
//new Sofas start at 2
this.nextSofaNum = 2;
} else {
this.nextSofaNum = ((CASImpl)this.casBeingFilled).getBaseSofaCount() + 1;
}
this.deserializedFsAddrs = new IntVector();
this.fsListNodesFromMultivaluedProperties = new IntVector();
this.buffer = new StringBuffer();
this.indexRepositories = new ArrayList<FSIndexRepository>();
this.views = new ArrayList<CAS>();
indexRepositories.add(this.casBeingFilled.getBaseIndexRepository());
// There should always be another index for the Initial View
indexRepositories.add(this.casBeingFilled.getView(CAS.NAME_DEFAULT_SOFA).getIndexRepository());
//add an entry to indexRepositories for each Sofa in the CAS (which can only happen if
//a mergePoint was specified)
FSIterator<SofaFS> sofaIter = this.casBeingFilled.getSofaIterator();
while(sofaIter.hasNext()) {
SofaFS sofa = (SofaFS)sofaIter.next();
if (sofa.getSofaRef() == 1) {
casBeingFilled.registerInitialSofa();
} else {
// add indexRepo for views other than the initial view
indexRepositories.add(casBeingFilled.getSofaIndexRepository(sofa));
}
}
final TypeSystemImpl tsOfReceivingCas = casBeingFilled.getTypeSystemImpl();
this.sofaTypeCode = tsOfReceivingCas.ll_getCodeForTypeName(CAS.TYPE_NAME_SOFA);
this.sofaNumFeatCode = tsOfReceivingCas.ll_getCodeForFeatureName(CAS.FEATURE_FULL_NAME_SOFANUM);
this.sofaFeatCode = tsOfReceivingCas.ll_getCodeForFeatureName(CAS.FEATURE_FULL_NAME_SOFA);
this.listUtils = new ListUtils(casBeingFilled, UIMAFramework.getLogger(XmiCasDeserializer.class), null);
// populate feature type table
this.featureType = new int[tsOfReceivingCas.getNumberOfFeatures() + 1];
FeatureImpl feat;
Iterator<Feature> it = tsOfReceivingCas.getFeatures();
while (it.hasNext()) {
feat = (FeatureImpl) it.next();
featureType[feat.getCode()] = classifyType(tsOfReceivingCas.range(feat.getCode()));
}
}
private final void resetBuffer() {
this.buffer = new StringBuffer();
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#startDocument()
*/
public void startDocument() throws SAXException {
// Do setup work in the constructor.
this.state = DOC_STATE;
// System.out.println("Starting to read document.");
// time = System.currentTimeMillis();
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
* java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String nameSpaceURI, String localName, String qualifiedName,
Attributes attrs) throws SAXException {
// org.apache.vinci.debug.Debug.p("startElement: " + qualifiedName);
// if (attrs != null) {
// for (int i=0; i<attrs.getLength(); i++) {
// org.apache.vinci.debug.Debug.p("a: " + attrs.getQName(i) + " v: " + attrs.getValue(i));
// }
// }
resetBuffer();
switch (state) {
case DOC_STATE: {
// allow any root element name
// extract xmlns:prefix=uri attributes into a map, which we can use to
// resolve the prefixes even with a non-namespace-aware parser
if (attrs != null) {
for (int i = 0; i < attrs.getLength(); i++) {
String attrName = attrs.getQName(i);
if (attrName.startsWith("xmlns:")) {
String prefix = attrName.substring(6);
String uri = attrs.getValue(i);
nsPrefixToUriMap.put(prefix, uri);
}
}
}
this.state = FS_STATE;
break;
}
case FS_STATE: {
// ignore elements with XMI prefix (such as XMI annotations)
if (qualifiedName.startsWith("xmi")) {
this.state = IGNORING_XMI_ELEMENTS_STATE;
this.ignoreDepth++;
return;
}
// if Delta CAS check if preexisting FS check if allowed
if (this.mergePoint >= 0) {
String id = attrs.getValue(ID_ATTR_NAME);
if (id != null) {
int idInt = Integer.parseInt(id);
if (idInt > 0 && !this.isNewFS(idInt)) { //preexisting FS
if (this.allowPreexistingFS == AllowPreexistingFS.ignore) { //skip elements whose ID is <= mergePoint
this.state = IGNORING_XMI_ELEMENTS_STATE;
this.ignoreDepth++;
return;
} else if (this.allowPreexistingFS == AllowPreexistingFS.disallow) { //fail
CASRuntimeException e = new CASRuntimeException(
CASRuntimeException.DELTA_CAS_PREEXISTING_FS_DISALLOWED,
new String[] {ID_ATTR_NAME + "=" + id,
nameSpaceURI,
localName,
qualifiedName});
throw e;
}
}
}
}
if (nameSpaceURI == null || nameSpaceURI.length() == 0) {
// parser may not be namespace-enabled, so try to resolve NS ourselves
int colonIndex = qualifiedName.indexOf(':');
if (colonIndex != -1) {
String prefix = qualifiedName.substring(0, colonIndex);
nameSpaceURI = (String) nsPrefixToUriMap.get(prefix);
if (nameSpaceURI == null) {
// unbound namespace. Rather than failing, just assume a reasonable default.
nameSpaceURI = "http:///" + prefix + ".ecore";
}
localName = qualifiedName.substring(colonIndex + 1);
} else // no prefix. Use default URI
{
nameSpaceURI = XmiCasSerializer.DEFAULT_NAMESPACE_URI;
}
}
readFS(nameSpaceURI, localName, qualifiedName, attrs);
multiValuedFeatures.clear();
state = FEAT_STATE;
break;
}
case FEAT_STATE: {
//parsing a feature recorded as a child element
//check for an "href" feature, used for references
String href = attrs.getValue("href");
if (href != null && href.startsWith("#")) {
//for out-of-typesystem objects, there's special handling here
//to keep track of the fact this was an href so we re-serialize
//correctly.
if (this.outOfTypeSystemElement != null) {
XmlElementName elemName = new XmlElementName(nameSpaceURI, localName, qualifiedName);
List<XmlAttribute> ootsAttrs = new ArrayList<XmlAttribute>();
ootsAttrs.add(new XmlAttribute("href", href));
XmlElementNameAndContents elemWithContents = new XmlElementNameAndContents(elemName, null, ootsAttrs);
this.outOfTypeSystemElement.childElements.add(elemWithContents);
}
else {
//In-typesystem FS, so we can forget this was an href and just add
//the integer value, which will be interpreted as a reference later.
//NOTE: this will end up causing it to be reserialized as an attribute
//rather than an element, but that is not in violation of the XMI spec.
List<String> valueList = this.multiValuedFeatures.get(qualifiedName);
if (valueList == null) {
valueList = new ArrayList<String>();
this.multiValuedFeatures.put(qualifiedName, valueList);
}
valueList.add(href.substring(1));
}
state = REF_FEAT_STATE;
}
else {
//non-reference feature, expecting feature value as character content
state = FEAT_CONTENT_STATE;
}
break;
}
case IGNORING_XMI_ELEMENTS_STATE: {
ignoreDepth++;
break;
}
default: {
// If we're not in an element expecting state, raise an error.
throw createException(XCASParsingException.TEXT_EXPECTED, qualifiedName);
}
}
}
// Create a new FS.
private void readFS(String nameSpaceURI, String localName, String qualifiedName,
Attributes attrs) throws SAXException {
String typeName = xmiElementName2uimaTypeName(nameSpaceURI, localName);
currentType = (TypeImpl) ts.getType(typeName);
if (currentType == null) {
// ignore NULL type
if ("uima.cas.NULL".equals(typeName)) {
return;
}
// special processing for uima.cas.View (encodes indexed FSs)
if ("uima.cas.View".equals(typeName)) {
processView(attrs.getValue("sofa"), attrs.getValue("members"));
String added = attrs.getValue("added_members");
String deleted = attrs.getValue("deleted_members");
String reindexed = attrs.getValue("reindexed_members");
processView(attrs.getValue("sofa"), added,deleted,reindexed);
return;
}
// type is not in our type system
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_TYPE, typeName);
} else {
addToOutOfTypeSystemData(
new XmlElementName(nameSpaceURI, localName, qualifiedName), attrs);
}
return;
} else if (casBeingFilled.isArrayType(currentType)) {
// store ID and array values (if specified as attribute).
// we will actually create the array later, in endElement.
String idStr = attrs.getValue(ID_ATTR_NAME);
currentArrayId = idStr == null ? -1 : Integer.parseInt(idStr);
String elements = attrs.getValue("elements");
// special parsing for byte arrays (they are serialized as a hex
// string. And we create them here instead of parsing to a string
// array, for efficiency.
if (casBeingFilled.isByteArrayType(currentType)) {
createByteArray(elements, currentArrayId, 0);
} else {
if (elements != null) {
String[] parsedElements = parseArray(elements);
currentArrayElements = Arrays.asList(parsedElements);
} else {
currentArrayElements = null;
}
}
} else {
String idStr = attrs.getValue(ID_ATTR_NAME);
int xmiId = idStr == null ? -1 : Integer.parseInt(idStr);
if (isNewFS(xmiId)) { //new FS so create it.
final int addr = casBeingFilled.ll_createFS(currentType.getCode());
readFS(addr, attrs);
} else { //preexisting
if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
CASRuntimeException e = new CASRuntimeException(
CASRuntimeException.DELTA_CAS_PREEXISTING_FS_DISALLOWED,
new String[] {ID_ATTR_NAME + "=" + idStr,
nameSpaceURI,
localName,
qualifiedName});
throw e;
} else if (this.allowPreexistingFS == AllowPreexistingFS.allow) { //get the FS
final int addr = getFsAddrForXmiId(xmiId);
readFS(addr,attrs);
} // otherwise ignore
}
}
}
/**
* Handles the processing of a cas:View element in the XMI. The cas:View element encodes indexed
* FSs.
*
* @param sofa
* xmi:id of the sofa for this view, null indicates base CAS "view"
* @param membersString
* whitespace-separated string of FS addresses. Each FS is to be added to the specified
* sofa's index repository
*/
private void processView(String sofa, String membersString) throws SAXParseException {
// TODO: this requires View to come AFTER all of its members
if (membersString != null) {
// a view with no Sofa will be added to the 1st, _InitialView, index
int sofaNum = 1;
if (sofa != null) {
// translate sofa's xmi:id into its sofanum
int sofaXmiId = Integer.parseInt(sofa);
int sofaAddr;
try {
sofaAddr = getFsAddrForXmiId(sofaXmiId);
} catch (NoSuchElementException e) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(sofaXmiId));
}
sofaNum = casBeingFilled.getFeatureValue(sofaAddr, sofaNumFeatCode);
}
FSIndexRepositoryImpl indexRep = (FSIndexRepositoryImpl) indexRepositories.get(sofaNum);
// TODO: optimize by going straight to int[] without going through
// intermediate String[]?
String[] members = parseArray(membersString);
for (int i = 0; i < members.length; i++) {
int id = Integer.parseInt(members[i]);
//if merging, don't try to index anything below the merge point
if (!isNewFS(id)) {
if (this.allowPreexistingFS == AllowPreexistingFS.disallow) { //flag this
this.disallowedViewMemberEncountered = true;
}
continue;
}
// have to map each ID to its "real" address (TODO: optimize?)
//TODO: currently broken, can't use XmiSerializationSharedData for
//this id mapping when merging, need local map
try {
int addr = getFsAddrForXmiId(id);
indexRep.addFS(addr);
} catch (NoSuchElementException e) {
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(id));
}
else {
//unknown view member may be an OutOfTypeSystem FS
this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
}
}
}
}
}
/**
* Handles the processing of a cas:View element in the XMI. The cas:View element encodes indexed
* FSs.
*
* @param sofa
* xmi:id of the sofa for this view, null indicates base CAS "view"
* @param membersString
* whitespace-separated string of FS addresses. Each FS is to be added to the specified
* sofa's index repository
*/
private void processView(String sofa, String addmembersString,
String delmemberString, String reindexmemberString) throws SAXParseException {
// TODO: this requires View to come AFTER all of its members
if (addmembersString != null) {
processView(sofa, addmembersString);
}
int sofaNum = 1;
FSIndexRepositoryImpl indexRep = null;
if (delmemberString != null || reindexmemberString != null) {
if (sofa != null) {
// translate sofa's xmi:id into its sofanum
int sofaXmiId = Integer.parseInt(sofa);
int sofaAddr = getFsAddrForXmiId(sofaXmiId);
sofaNum = casBeingFilled.getFeatureValue(sofaAddr, sofaNumFeatCode);
}
indexRep = (FSIndexRepositoryImpl) indexRepositories.get(sofaNum);
// TODO: optimize by going straight to int[] without going through
// intermediate String[]?
if (delmemberString != null) {
String[] members = parseArray(delmemberString);
for (int i = 0; i < members.length; i++) {
int id = Integer.parseInt(members[i]);
if (!isNewFS(id)) { //preexisting FS
if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
this.disallowedViewMemberEncountered = true; //ignore but flag it.
continue;
} else if (this.allowPreexistingFS == AllowPreexistingFS.ignore) {
continue; //ignore
}
}
// have to map each ID to its "real" address (TODO: optimize?)
//TODO: currently broken, can't use XmiSerializationSharedData for
//this id mapping when merging, need local map
try {
int addr = getFsAddrForXmiId(id);
indexRep.removeFS(addr);
} catch (NoSuchElementException e) {
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(id));
} else {
//unknown view member may be an OutOfTypeSystem FS
this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
}
}
}
}
if (reindexmemberString != null) {
String[] members = parseArray(reindexmemberString);
for (int i = 0; i < members.length; i++) {
int id = Integer.parseInt(members[i]);
if (!isNewFS(id)) { //preexising FS
if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
this.disallowedViewMemberEncountered = true; //ignore but flag it.
continue;
} else if (this.allowPreexistingFS == AllowPreexistingFS.ignore) {
continue;
}
}
// have to map each ID to its "real" address (TODO: optimize?)
//TODO: currently broken, can't use XmiSerializationSharedData for
//this id mapping when merging, need local map
try {
int addr = getFsAddrForXmiId(id);
indexRep.removeFS(addr);
indexRep.addFS(addr);
} catch (NoSuchElementException e) {
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(id));
} else {
//unknown view member may be an OutOfTypeSystem FS
this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
}
}
}
}
}
}
/**
*
* @param addr
* @param attrs
* @throws SAXException
*/
private void readFS(final int addr, Attributes attrs) throws SAXException {
// Hang on to address for handle features encoded as child elements
this.currentAddr = addr;
int id = -1;
String attrName, attrValue;
final int typeCode = casBeingFilled.getHeapValue(addr);
final Type type = casBeingFilled.getTypeSystemImpl().ll_getTypeForCode(typeCode);
int thisSofaNum = 0;
//is it a new FS
try {
id = Integer.parseInt(attrs.getValue(ID_ATTR_NAME));
} catch (NumberFormatException e) {
throw createException(XCASParsingException.ILLEGAL_ID, attrs.getValue(ID_ATTR_NAME));
}
boolean newFS = this.isNewFS(id);
if (sofaTypeCode == typeCode) {
String sofaID = attrs.getValue(CAS.FEATURE_BASE_NAME_SOFAID);
if (sofaID.equals(CAS.NAME_DEFAULT_SOFA) || sofaID.equals("_DefaultTextSofaName")) {
// initial view Sofa always has sofaNum = 1
thisSofaNum = 1;
} else {
if (newFS) {
thisSofaNum = this.nextSofaNum++;
} else {
thisSofaNum = Integer.parseInt(attrs.getValue(CAS.FEATURE_BASE_NAME_SOFANUM));
}
}
}
this.featsSeen = null;
for (int i = 0; i < attrs.getLength(); i++) {
attrName = attrs.getQName(i);
attrValue = attrs.getValue(i);
if (attrName.equals(ID_ATTR_NAME)) {
try {
id = Integer.parseInt(attrValue);
newFS = this.isNewFS(id);
if (sofaTypeCode != typeCode && !newFS) {
this.featsSeen = new IntVector(attrs.getLength());
} else {
this.featsSeen = null;
}
} catch (NumberFormatException e) {
throw createException(XCASParsingException.ILLEGAL_ID, attrValue);
}
} else {
if (sofaTypeCode == typeCode && attrName.equals(CAS.FEATURE_BASE_NAME_SOFAID)) {
if (attrValue.equals("_DefaultTextSofaName")) {
// First change old default Sofa name into the new one
attrValue = CAS.NAME_DEFAULT_SOFA;
}
} else if (sofaTypeCode == typeCode && attrName.equals(CAS.FEATURE_BASE_NAME_SOFANUM)) {
attrValue = Integer.toString(thisSofaNum);
}
int featCode = handleFeature(type, addr, attrName, attrValue, newFS);
//if processing delta cas preexisting FS, keep track of features that have
//been deserialized.
if (this.featsSeen != null && !newFS && featCode != -1) {
this.featsSeen.add(featCode);
}
}
}
if (sofaTypeCode == typeCode && newFS) {
// If a Sofa, create CAS view to get new indexRepository
SofaFS sofa = (SofaFS) casBeingFilled.createFS(addr);
// also add to indexes so we can retrieve the Sofa later
casBeingFilled.getBaseIndexRepository().addFS(sofa);
CAS view = casBeingFilled.getView(sofa);
if (sofa.getSofaRef() == 1) {
casBeingFilled.registerInitialSofa();
} else {
// add indexRepo for views other than the initial view
indexRepositories.add(casBeingFilled.getSofaIndexRepository(sofa));
}
((CASImpl) view).registerView(sofa);
views.add(view);
}
deserializedFsAddrs.add(addr);
addFsAddrXmiIdMapping(addr, id);
}
// The definition of a null value. Any other value must be in the expected
// format.
private final boolean emptyVal(String val) {
return ((val == null) || (val.length() == 0));
}
private int handleFeature(final Type type, int addr, String featName, String featVal, boolean newFS) throws SAXException {
final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
if (feat == null) {
if (!this.lenient) {
throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
}
else {
sharedData.addOutOfTypeSystemAttribute(addr, featName, featVal);
}
return -1;
}
//Sofa FS
//only update Sofa data features and mime type feature. skip other features.
//skip Sofa data features if Sofa data is already set.
//these features may not be modified.
if (sofaTypeCode == casBeingFilled.getHeapValue(addr) && !isNewFS(addr) ) {
if (featName.equals(CAS.FEATURE_BASE_NAME_SOFAID) ||
featName.equals(CAS.FEATURE_BASE_NAME_SOFANUM)) {
return feat.getCode();
} else if (featName.equals(CAS.FEATURE_BASE_NAME_SOFASTRING) ||
featName.equals(CAS.FEATURE_BASE_NAME_SOFAURI) ||
featName.equals(CAS.FEATURE_BASE_NAME_SOFAARRAY)) {
int currVal = casBeingFilled.getFeatureValue(addr, feat.getCode());
if (currVal != 0)
return feat.getCode();
}
}
handleFeature(addr, feat.getCode(), featVal);
return feat.getCode();
}
private int handleFeature(final Type type, int addr, String featName, List<String> featVals) throws SAXException {
final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
if (feat == null) {
if (!this.lenient) {
throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
}
else {
sharedData.addOutOfTypeSystemChildElements(addr, featName, featVals);
}
return -1;
}
handleFeature(addr, feat.getCode(), featVals);
return feat.getCode();
}
/**
* Set a CAS feature from an XMI attribute.
*
* @param addr
* address of FS containing the feature
* @param featCode
* code of feature to set
* @param featVal
* string representation of the feature value
* @throws SAXException
*/
private void handleFeature(int addr, int featCode, String featVal) throws SAXException {
switch (featureType[featCode]) {
case LowLevelCAS.TYPE_CLASS_INT: {
try {
if (!emptyVal(featVal)) {
if (featCode == sofaFeatCode) {
// special handling for "sofa" feature of annotation. Need to change
// it from a sofa reference into a sofa number
int sofaXmiId = Integer.parseInt(featVal);
int sofaAddr = getFsAddrForXmiId(sofaXmiId);
int sofaNum = casBeingFilled.getFeatureValue(sofaAddr, sofaNumFeatCode);
casBeingFilled.setFeatureValue(addr, featCode, sofaNum);
} else {
casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
}
}
} catch (NumberFormatException e) {
throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
}
break;
}
case LowLevelCAS.TYPE_CLASS_FLOAT:
case LowLevelCAS.TYPE_CLASS_BOOLEAN:
case LowLevelCAS.TYPE_CLASS_BYTE:
case LowLevelCAS.TYPE_CLASS_SHORT:
case LowLevelCAS.TYPE_CLASS_LONG:
case LowLevelCAS.TYPE_CLASS_DOUBLE: {
try {
if (!emptyVal(featVal)) {
casBeingFilled.setFeatureValueFromString(addr, featCode, featVal);
// cas.setFloatValue(addr, featCode, Float.parseFloat(featVal));
}
} catch (NumberFormatException e) {
throw createException(XCASParsingException.FLOAT_EXPECTED, featVal);
}
break;
}
case LowLevelCAS.TYPE_CLASS_STRING: {
if (featVal != null) // do not use empty value since that would filter out ""
{
//if (newFS) {
// casBeingFilled.setStringValue(addr, featCode, featVal);
//} else {
//preexisting FS, compare with original value in CAS and set only if different.
String origValue = casBeingFilled.getStringValue(addr, featCode);
if (origValue == null || !featVal.equals(origValue)) {
casBeingFilled.setStringValue(addr, featCode, featVal);
}
//}
}
break;
}
case LowLevelCAS.TYPE_CLASS_FS: {
try {
if (!emptyVal(featVal)) { this.
casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
}
} catch (NumberFormatException e) {
throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
}
break;
}
// For array types and list features, there are two kinds of serializations.
// If the feature has multipleReferencesAllowed = true, then it should have been
// serialized as a normal FS. If it has multipleReferencesAllowed = false, then
// it should have been serialized as a multi-valued property.
case LowLevelCAS.TYPE_CLASS_INTARRAY:
case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
case LowLevelCAS.TYPE_CLASS_LONGARRAY:
case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
case LowLevelCAS.TYPE_CLASS_FSARRAY: {
if (ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
// do the usual FS deserialization
try {
if (!emptyVal(featVal)) {
casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
}
} catch (NumberFormatException e) {
throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
}
} else {
// Do the multivalued property deserialization.
// However, byte arrays have a special serialization (as hex digits)
if (featureType[featCode] == LowLevelCAS.TYPE_CLASS_BYTEARRAY) {
int currFeatVal = casBeingFilled.getFeatureValue(addr, featCode);
int casArray = 0;
casArray = createByteArray(featVal,-1,currFeatVal);
if (casArray != currFeatVal) {
casBeingFilled.setFeatureValue(addr, featCode, casArray);
}
} else {
String[] arrayVals = parseArray(featVal);
handleFeature(addr, featCode, Arrays.asList(arrayVals));
}
}
break;
}
// For list types, we do the same as for array types UNLESS we're dealing with
// the tail feature of another list node. In that case we do the usual FS deserialization.
case XmiCasSerializer.TYPE_CLASS_INTLIST:
case XmiCasSerializer.TYPE_CLASS_FLOATLIST:
case XmiCasSerializer.TYPE_CLASS_STRINGLIST:
case XmiCasSerializer.TYPE_CLASS_FSLIST: {
if (ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
// do the usual FS deserialization
try {
if (!emptyVal(featVal)) {
casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
}
} catch (NumberFormatException e) {
throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
}
} else // do the multivalued property deserialization, like arrays
{
String[] arrayVals = parseArray(featVal);
handleFeature(addr, featCode, Arrays.asList(arrayVals));
}
break;
}
default: {
assert false; // this should be an exhaustive case block
}
}
}
/**
* Parse an XMI multi-valued attribute into a String array, by splitting on whitespace.
*
* @param val
* XMI attribute value
* @return an array with each array value as an element
*/
private String[] parseArray(String val) {
String[] arrayVals;
val = val.trim();
if (emptyVal(val)) {
arrayVals = new String[0];
} else {
arrayVals = val.split("\\s+");
}
return arrayVals;
}
/**
* Set a CAS feature from an array of Strings. This supports the XMI syntax where each value is
* listed as a separate subelement.
*
* @param addr
* address of FS containing the feature
* @param featCode
* code of feature to set
* @param featVals
* List of Strings, each String representing one value for the feature
* @throws SAXException
*/
private void handleFeature(int addr, int featCode, List<String> featVals) throws SAXException {
switch (featureType[featCode]) {
case LowLevelCAS.TYPE_CLASS_INT:
case LowLevelCAS.TYPE_CLASS_FLOAT:
case LowLevelCAS.TYPE_CLASS_STRING:
case LowLevelCAS.TYPE_CLASS_BOOLEAN:
case LowLevelCAS.TYPE_CLASS_BYTE:
case LowLevelCAS.TYPE_CLASS_SHORT:
case LowLevelCAS.TYPE_CLASS_LONG:
case LowLevelCAS.TYPE_CLASS_DOUBLE:
case LowLevelCAS.TYPE_CLASS_FS:
if (featVals.size() != 1) {
throw new SAXParseException(I18nUtil.localizeMessage(
UIMAException.STANDARD_MESSAGE_CATALOG, Locale.getDefault(),
"multiple_values_unexpected",
new Object[] { ts.ll_getFeatureForCode(featCode).getName() }), locator);
} else {
handleFeature(addr, featCode, featVals.get(0));
}
break;
case LowLevelCAS.TYPE_CLASS_INTARRAY:
case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
case LowLevelCAS.TYPE_CLASS_LONGARRAY:
case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
case LowLevelCAS.TYPE_CLASS_FSARRAY: {
int casArray = 0;
int currVal = casBeingFilled.getFeatureValue(addr, featCode);
casArray = createArray(casBeingFilled.getTypeSystemImpl().range(featCode), featVals, -1, currVal);
if (currVal != casArray) {
casBeingFilled.setFeatureValue(addr, featCode, casArray);
}
//add to nonshared fs to encompassing FS map
if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
addNonsharedFSToEncompassingFSMapping(casArray, addr);
}
break;
}
/**
case LowLevelCAS.TYPE_CLASS_FSARRAY: {
int casArray = createArray(casBeingFilled.getTypeSystemImpl().range(featCode), featVals, -1);
casBeingFilled.setFeatureValue(addr, featCode, casArray);
break;
}
**/
case XmiCasSerializer.TYPE_CLASS_INTLIST: {
int listFS = casBeingFilled.getFeatureValue(addr, featCode);
if (listFS == 0) {
listFS = listUtils.createIntList(featVals);
casBeingFilled.setFeatureValue(addr, featCode, listFS);
} else {
listUtils.updateIntList(listFS, featVals);
}
//add to nonshared fs to encompassing FS map
if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
addNonsharedFSToEncompassingFSMapping(listFS, addr);
}
break;
}
case XmiCasSerializer.TYPE_CLASS_FLOATLIST: {
int listFS = casBeingFilled.getFeatureValue(addr, featCode);
if (listFS == 0) {
listFS = listUtils.createFloatList(featVals);
casBeingFilled.setFeatureValue(addr, featCode, listFS);
} else {
listUtils.updateFloatList(listFS, featVals);
}
//add to nonshared fs to encompassing FS map
if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
addNonsharedFSToEncompassingFSMapping(listFS, addr);
}
break;
}
case XmiCasSerializer.TYPE_CLASS_STRINGLIST: {
int listFS = casBeingFilled.getFeatureValue(addr, featCode);
if (listFS == 0) {
listFS = listUtils.createStringList(featVals);
casBeingFilled.setFeatureValue(addr, featCode, listFS);
} else {
listUtils.updateStringList(listFS, featVals);
}
//add to nonshared fs to encompassing FS map
if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
addNonsharedFSToEncompassingFSMapping(listFS, addr);
}
break;
}
case XmiCasSerializer.TYPE_CLASS_FSLIST: {
// this call, in addition to creating the list in the CAS, also
// adds each list node ID to the fsListNodesFromMultivaluedProperties list.
// We need this so we can go back through later and reset the addresses of the
// "head" features of these lists nodes (but not reset the tail features).
// It also adds a mapping between the nodes and the encompassing FS in order
// to properly serialize in delta xmi format.
int listFS = casBeingFilled.getFeatureValue(addr, featCode);
IntVector fslistnodes = new IntVector();
if (listFS == 0) {
listFS = listUtils.createFsList(featVals, fslistnodes);
casBeingFilled.setFeatureValue(addr, featCode, listFS);
} else {
listUtils.updateFsList(listFS, featVals, fslistnodes);
}
//add to multivaluedproperties fs list.
for (int i=0; i < fslistnodes.size(); i++) {
fsListNodesFromMultivaluedProperties.add(fslistnodes.get(i));
}
//add to nonshared fs to encompassing FS map.
if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
for (int i=0; i < fslistnodes.size(); i++) {
addNonsharedFSToEncompassingFSMapping(fslistnodes.get(i), addr);
}
}
break;
}
default: {
assert false; // this should be an exhaustive case block
}
}
}
/**
* Create or update an array in the CAS
*
* @param arrayType
* CAS type code for the array
* @param values
* List of strings, each representing an element in the array
* @param xmiId
* xmi:id assigned to the array object.
* @param addr
* address of preexisting non-shared array
* @return
*/
private int createArray(int arrayType, List<String> values, int xmiId, int addr) {
int casArray = -1;
if (addr > 0) { //non-shared preexisting
if (values.size() == casBeingFilled.getLowLevelCAS().ll_getArraySize(addr)) {
casArray = addr;
updateExistingArray(arrayType, values, casArray);
} else {
casArray = createNewArray(arrayType, values);
}
} else if (xmiId == -1) { //non-shared new
casArray = createNewArray(arrayType, values);
} else if (isNewFS(xmiId)) { //shared new
casArray = createNewArray(arrayType,values);
} else { //shared preexisting
casArray = getFsAddrForXmiId(xmiId);
if (values.size() == casBeingFilled.getLowLevelCAS().ll_getArraySize(casArray)) {
updateExistingArray(arrayType, values, casArray);
} else {
casArray = createNewArray(arrayType, values);
}
}
deserializedFsAddrs.add(casArray);
addFsAddrXmiIdMapping(casArray, xmiId);
return casArray;
}
/**
* Create an array in the CAS.
*
* @param arrayType
* CAS type code for the array
* @param values
* List of strings, each containing the value of an element of the array.
* @return
*/
private int createNewArray(int arrayType, List<String> values) {
FeatureStructureImpl fs;
int casArray = -1;
if (casBeingFilled.isIntArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createIntArrayFS(values.size());
} else if (casBeingFilled.isFloatArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createFloatArrayFS(values.size());
} else if (casBeingFilled.isStringArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createStringArrayFS(values.size());
} else if (casBeingFilled.isBooleanArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createBooleanArrayFS(values.size());
} else if (casBeingFilled.isByteArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createByteArrayFS(values.size());
} else if (casBeingFilled.isShortArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createShortArrayFS(values.size());
} else if (casBeingFilled.isLongArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createLongArrayFS(values.size());
} else if (casBeingFilled.isDoubleArrayType(arrayType)) {
fs = (FeatureStructureImpl) casBeingFilled.createDoubleArrayFS(values.size());
} else {
fs = (FeatureStructureImpl) casBeingFilled.createArrayFS(values.size());
}
casArray = fs.getAddress();
for (int i = 0; i < values.size(); i++) {
String stringVal = (String) values.get(i);
casBeingFilled.setArrayValueFromString(casArray, i, stringVal);
}
return casArray;
}
private void updateExistingArray(int arrayType, List<String> values, int casArray) {
for (int i = 0; i < values.size(); i++) {
String stringVal = values.get(i);
if (casBeingFilled.isStringArrayType(arrayType)) {
String currVal = casBeingFilled.getLowLevelCAS().ll_getStringArrayValue(casArray, i);
if (currVal != null && currVal.equals(stringVal)) {
continue;
}
}
casBeingFilled.setArrayValueFromString(casArray, i, stringVal);
}
}
/**
* Create a byte array in the CAS.
*
* @param hexString
* value of the byte array as a hex string
* @param xmiId
* xmiId - this will be -1 if this is a non-shared byte array FS.
* @param addr
* the current address of the non-shared ByteArrayFS used when processing
* a Delta CAS.
* @return
*/
private int createByteArray(String hexString, int xmiId, int addr) {
int arrayLen = hexString.length() / 2;
ByteArrayFS fs = null;
if (addr > 0) { //non-shared and fs exists
fs = (ByteArrayFS) casBeingFilled.createFS(addr);
if (fs.size() != arrayLen) { //if length changes, create newFS
fs = casBeingFilled.createByteArrayFS(arrayLen);
}
} else if (xmiId == -1) { //non-shared and no fs
fs = casBeingFilled.createByteArrayFS(arrayLen);
} else { //shared
if (isNewFS(xmiId)) {
fs = casBeingFilled.createByteArrayFS(arrayLen);
} else {
addr = getFsAddrForXmiId(xmiId);
fs = (ByteArrayFS) casBeingFilled.createFS(addr);
if (fs.size() != arrayLen) {
fs = casBeingFilled.createByteArrayFS(arrayLen);
}
}
}
for (int i = 0; i < arrayLen; i++) {
byte high = hexCharToByte(hexString.charAt(i * 2));
byte low = hexCharToByte(hexString.charAt(i * 2 + 1));
byte b = (byte) ((high << 4) | low);
fs.set(i, b);
}
int arrayAddr = ((FeatureStructureImpl) fs).getAddress();
deserializedFsAddrs.add(arrayAddr);
addFsAddrXmiIdMapping(arrayAddr, xmiId);
return arrayAddr;
}
private byte hexCharToByte(char c) {
if ('0' <= c && c <= '9')
return (byte) (c - '0');
else if ('A' <= c && c <= 'F')
return (byte) (c - 'A' + 10);
else if ('1' <= c && c <= 'f')
return (byte) (c - '1' + 10);
else
throw new NumberFormatException("Invalid hex char: " + c);
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
public void characters(char[] chars, int start, int length) throws SAXException {
switch (this.state) {
case FEAT_CONTENT_STATE:
buffer.append(chars, start, length);
break;
default:
}
}
boolean isAllWhitespace(StringBuffer b) {
final int len = b.length();
for (int i = 0; i < len; i++) {
if (!Character.isWhitespace(b.charAt(i))) {
return false;
}
}
return true;
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String,
* java.lang.String)
*/
public void endElement(String nsURI, String localName, String qualifiedName)
throws SAXException {
switch (this.state) {
case DOC_STATE: {
// Do nothing.
break;
}
case FS_STATE: {
this.state = DOC_STATE;
break;
}
case FEAT_CONTENT_STATE: {
// We have just processed one of possibly many values for a feature.
// Store this value in the multiValuedFeatures map for later use.
List<String> valueList = this.multiValuedFeatures.get(qualifiedName);
if (valueList == null) {
valueList = new ArrayList<String>();
this.multiValuedFeatures.put(qualifiedName, valueList);
}
valueList.add(buffer.toString());
// go back to the state where we're expecting a feature
this.state = FEAT_STATE;
break;
}
case REF_FEAT_STATE: {
this.state = FEAT_STATE;
break;
}
case FEAT_STATE: {
// end of FS. Process multi-valued features or array elements that were
// encoded as subelements
if (this.outOfTypeSystemElement != null) {
if (!this.multiValuedFeatures.isEmpty()) {
for (Map.Entry<String, List<String>> entry : this.multiValuedFeatures.entrySet()) {
String featName = entry.getKey();
List<String> featVals = entry.getValue();
addOutOfTypeSystemFeature(outOfTypeSystemElement, featName, featVals);
}
}
this.outOfTypeSystemElement = null;
}
else if (currentType != null) {
if (casBeingFilled.isArrayType(currentType) && !casBeingFilled.isByteArrayType(currentType)) {
// create the array now. elements may have been provided either as
// attributes or child elements, but not both.
// BUT - not byte arrays! They are created immediately, to avoid
// the overhead of parsing into a String array first
if (currentArrayElements == null) // were not specified as attributes
{
currentArrayElements = this.multiValuedFeatures.get("elements");
if (currentArrayElements == null) {
currentArrayElements = Collections.emptyList();
}
}
createArray(currentType.getCode(), currentArrayElements, currentArrayId, 0);
} else if (!this.multiValuedFeatures.isEmpty()) {
for (Map.Entry<String, List<String>> entry : this.multiValuedFeatures.entrySet()) {
String featName = entry.getKey();
List<String> featVals = entry.getValue();
int featcode = handleFeature(currentType, currentAddr, featName, featVals);
if (featcode != -1 && this.featsSeen != null ) {
this.featsSeen.add(featcode);
}
}
}
//if this is a preexisting FS which is not a Sofa FS,
//set the features that were not deserialized to null.
if (sofaTypeCode != currentType.getCode() && this.featsSeen != null) {
int[] feats = casBeingFilled.getTypeSystemImpl().ll_getAppropriateFeatures(currentType.getCode());
for (int i=0; i< feats.length; i++) {
if ( !this.featsSeen.contains(feats[i]) ) {
casBeingFilled.setFeatureValue(currentAddr, feats[i], CASImpl.NULL);
}
}
this.featsSeen = null;
}
}
this.state = FS_STATE;
break;
}
case IGNORING_XMI_ELEMENTS_STATE: {
ignoreDepth--;
if (ignoreDepth == 0) {
this.state = FS_STATE;
}
break;
}
}
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#endDocument()
*/
public void endDocument() throws SAXException {
// Resolve ID references, and add FSs to indexes
for (int i = 0; i < deserializedFsAddrs.size(); i++) {
finalizeFS(deserializedFsAddrs.get(i));
}
for (int i = 0; i < fsListNodesFromMultivaluedProperties.size(); i++) {
remapFSListHeads(fsListNodesFromMultivaluedProperties.get(i));
}
// time = System.currentTimeMillis() - time;
// System.out.println("Done in " + new TimeSpan(time));
for (int i = 0; i < views.size(); i++) {
((CASImpl) views.get(i)).updateDocumentAnnotation();
}
//check if disallowed fs was encoutered]
if (this.disallowedViewMemberEncountered) {
CASRuntimeException e = new CASRuntimeException(
CASRuntimeException.DELTA_CAS_PREEXISTING_FS_DISALLOWED,
new String[] {"Preexisting FS view member encountered." });
throw e;
}
}
/**
* Adds this FS to the appropriate index, and applies ID remappings. For each nonprimitive,
* non-multivalued-property feature, we need to update the feature value to point to the correct
* heap address of the target FS.
*
* @param fsInfo
*/
private void finalizeFS(int addr) throws SAXParseException {
final int type = casBeingFilled.getHeapValue(addr);
if (casBeingFilled.isArrayType(type)) {
finalizeArray(type, addr);
return;
}
// remap IDs for all nonprimtive, non-multivalued-property features
int[] feats = casBeingFilled.getTypeSystemImpl().ll_getAppropriateFeatures(type);
Feature feat;
for (int i = 0; i < feats.length; i++) {
feat = ts.ll_getFeatureForCode(feats[i]);
int typeCode = ts.ll_getRangeType(feats[i]);
if (casBeingFilled.ll_isRefType(typeCode)
&& (featureType[feats[i]] == LowLevelCAS.TYPE_CLASS_FS || feat
.isMultipleReferencesAllowed())) {
int featVal = casBeingFilled.getFeatureValue(addr, feats[i]);
if (featVal != CASImpl.NULL ) {
int fsValAddr = CASImpl.NULL;
try {
fsValAddr = getFsAddrForXmiId(featVal);
} catch (NoSuchElementException e) {
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(featVal));
}
else {
// we may not have deserialized the value of this feature because it
// was of unknown type. We set it to null, and record in the
// out-of-typesystem data.
this.sharedData.addOutOfTypeSystemAttribute(
addr, feat.getShortName(), Integer.toString(featVal));
}
}
casBeingFilled.setFeatureValue(addr, feats[i], fsValAddr);
}
}
}
}
/**
* Rempas ID for the "head" feature of NonEmptyFSList, but not the "tail" feature. Used for
* FSList nodes deserialized from multi-valued properties, which already have their tail set
* correctly.
*
* @param i
*/
private void remapFSListHeads(int addr) throws SAXParseException {
final int type = casBeingFilled.getHeapValue(addr);
if (!listUtils.isFsListType(type))
return;
int[] feats = casBeingFilled.getTypeSystemImpl().ll_getAppropriateFeatures(type);
if (feats.length == 0)
return;
int headFeat = feats[0];
int featVal = casBeingFilled.getFeatureValue(addr, headFeat);
if (featVal != CASImpl.NULL) {
int fsValAddr = CASImpl.NULL;
try {
fsValAddr = getFsAddrForXmiId(featVal);
} catch (NoSuchElementException e) {
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(featVal));
}
else {
//this may be a reference to an out-of-typesystem FS
this.sharedData.addOutOfTypeSystemAttribute(addr, CAS.FEATURE_BASE_NAME_HEAD, Integer.toString(featVal));
}
}
casBeingFilled.setFeatureValue(addr, headFeat, fsValAddr);
}
}
/**
* Walk an array, remapping IDs. If called on a primitive array,this method does nothing.
*
* @param type
* CAS type code for the array
* @param addr
* address of the array
*/
private void finalizeArray(int type, int addr) throws SAXParseException {
if (!casBeingFilled.isFSArrayType(type)) {
// Nothing to do.
return;
}
final int size = casBeingFilled.ll_getArraySize(addr);
for (int i = 0; i < size; i++) {
int arrayVal = casBeingFilled.getArrayValue(addr, i);
if (arrayVal != CASImpl.NULL) {
int arrayValAddr = CASImpl.NULL;
try {
arrayValAddr = getFsAddrForXmiId(arrayVal);
} catch (NoSuchElementException e) {
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(arrayVal));
}
else {
// the array element may be out of typesystem. In that case set it
// to null, but record the id so we can add it back on next serialization.
this.sharedData.addOutOfTypeSystemArrayElement(addr, i, arrayVal);
}
}
casBeingFilled.setArrayValue(addr, i, arrayValAddr);
}
}
}
private XCASParsingException createException(int code) {
XCASParsingException e = new XCASParsingException(code);
String source = unknownXMLSource;
String line = unknownXMLSource;
String col = unknownXMLSource;
if (locator != null) {
source = locator.getSystemId();
if (source == null) {
source = locator.getPublicId();
}
if (source == null) {
source = unknownXMLSource;
}
line = Integer.toString(locator.getLineNumber());
col = Integer.toString(locator.getColumnNumber());
}
e.addArgument(source);
e.addArgument(line);
e.addArgument(col);
return e;
}
private XCASParsingException createException(int code, String arg) {
XCASParsingException e = createException(code);
e.addArgument(arg);
return e;
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
*/
public void error(SAXParseException e) throws SAXException {
throw e;
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
*/
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
*/
public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
// Since we're not validating, we don't need to do anything; this won't
// be called.
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
*/
public void setDocumentLocator(Locator loc) {
// System.out.println("Setting document locator.");
this.locator = loc;
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
*/
public void warning(SAXParseException e) throws SAXException {
throw e;
}
/**
* Classifies a type. This returns an integer code identifying the type as one of the primitive
* types, one of the array types, one of the list types, or a generic FS type (anything else).
* <p>
* The {@link LowLevelCAS#ll_getTypeClass(int)} method classifies primitives and array types,
* but does not have a special classification for list types, which we need for XMI
* serialization. Therefore, in addition to the type codes defined on {@link LowLevelCAS}, this
* method can return one of the type codes TYPE_CLASS_INTLIST, TYPE_CLASS_FLOATLIST,
* TYPE_CLASS_STRINGLIST, or TYPE_CLASS_FSLIST defined on {@link XmiCasSerializer} interface.
*
* @param type
* the type to classify
* @return one of the TYPE_CLASS codes defined on {@link LowLevelCAS} or on the
* {@link XmiCasSerializer} interface.
*/
private final int classifyType(int type) {
// For most most types
if (listUtils.isIntListType(type)) {
return XmiCasSerializer.TYPE_CLASS_INTLIST;
}
if (listUtils.isFloatListType(type)) {
return XmiCasSerializer.TYPE_CLASS_FLOATLIST;
}
if (listUtils.isStringListType(type)) {
return XmiCasSerializer.TYPE_CLASS_STRINGLIST;
}
if (listUtils.isFsListType(type)) {
return XmiCasSerializer.TYPE_CLASS_FSLIST;
}
return casBeingFilled.ll_getTypeClass(type);
}
private void addFsAddrXmiIdMapping(int fsAddr, int xmiId) {
if (xmiId > 0) {
if (mergePoint < 0) {
//if we are not doing a merge, update the map in the XmiSerializationSharedData
sharedData.addIdMapping(fsAddr, xmiId);
} else {
//if we're doing a merge, we can't update the shared map because we could
//have duplicate xmi:id values in the different parts of the merge.
//instead we keep a local mapping used only within this deserialization.
localXmiIdToFsAddrMap.put(xmiId, Integer.valueOf(fsAddr));
}
}
}
/**
* Gets the FS address into which the XMI element with the given ID
* was deserialized. This method supports merging multiple XMI documents
* into a single CAS, by checking the XmiSerializationSharedData
* structure to get the address of elements that are below the mergePoint
* and are expected to already be present in the CAS.
*
* @param xmiId
* @return
*/
private int getFsAddrForXmiId(int xmiId) {
//first check shared data (but if we're doing a merge, do so only
//for xmi:ids below the merge point)
if (mergePoint < 0 || !isNewFS(xmiId) ) {
int addr = sharedData.getFsAddrForXmiId(xmiId);
if (addr > 0) {
return addr;
} else {
throw new java.util.NoSuchElementException();
}
} else {
//if we're merging, then we use a local id map for FSs above the
//merge point, since each of the different XMI CASes being merged
//can use these same ids for different FSs.
Integer localAddr = (Integer)localXmiIdToFsAddrMap.get(xmiId);
if (localAddr != null) {
return localAddr.intValue();
} else {
throw new java.util.NoSuchElementException();
}
}
}
/**
* Adds a feature sturcture to the out-of-typesystem data. Also sets the
* this.outOfTypeSystemElement field, which is referred to later if we have to
* handle features recorded as child elements.
*/
private void addToOutOfTypeSystemData(XmlElementName xmlElementName, Attributes attrs)
throws XCASParsingException {
this.outOfTypeSystemElement = new OotsElementData();
this.outOfTypeSystemElement.elementName = xmlElementName;
String attrName, attrValue;
for (int i = 0; i < attrs.getLength(); i++) {
attrName = attrs.getQName(i);
attrValue = attrs.getValue(i);
if (attrName.equals(ID_ATTR_NAME)) {
this.outOfTypeSystemElement.xmiId = attrValue;
}
else {
this.outOfTypeSystemElement.attributes.add(
new XmlAttribute(attrName, attrValue));
}
}
this.sharedData.addOutOfTypeSystemElement(this.outOfTypeSystemElement);
}
/**
* Adds a feature to the out-of-typesystem features list.
* @param ootsElem object to which to add the feature
* @param featName name of feature
* @param featVals feature values, as a list of strings
*/
private void addOutOfTypeSystemFeature(OotsElementData ootsElem, String featName, List<String> featVals) {
Iterator<String> iter = featVals.iterator();
XmlElementName elemName = new XmlElementName(null,featName,featName);
while (iter.hasNext()) {
ootsElem.childElements.add(new XmlElementNameAndContents(elemName, (String)iter.next()));
}
}
private boolean isNewFS(int id) {
return (id > this.mergePoint);
}
private void addNonsharedFSToEncompassingFSMapping(int nonsharedFS, int encompassingFS ) {
//System.out.println("addNonsharedFSToEncompassingFSMapping" + nonsharedFS + " " + encompassingFS);
this.sharedData.addNonsharedRefToFSMapping(nonsharedFS, encompassingFS);
}
}
private TypeSystemImpl ts;
private Map<String, String> xmiNamespaceToUimaNamespaceMap = new HashMap<String, String>();
/**
* Create a new deserializer from a type system. Note: all CAS arguments later supplied to
* <code>getXCASHandler()</code> must have this type system as their type system.
*
* @param ts
* The type system of the CASes to be deserialized.
*/
public XmiCasDeserializer(TypeSystem ts, UimaContext uimaContext) {
super();
this.ts = (TypeSystemImpl) ts;
}
public XmiCasDeserializer(TypeSystem ts) {
this(ts, null);
}
/**
* Create a default handler for deserializing a CAS from XMI.
*
* @param cas
* This CAS will be used to hold the data deserialized from the XMI
*
* @return The <code>DefaultHandler</code> to pass to the SAX parser.
*/
public DefaultHandler getXmiCasHandler(CAS cas) {
return getXmiCasHandler(cas, false);
}
/**
* Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
* meaning that if the XMI references Types that are not in the Type System, an Exception will be
* thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
* and ignore any unknown types.
*
* @param cas
* This CAS will be used to hold the data deserialized from the XMI
* @param lenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
*
* @return The <code>DefaultHandler</code> to pass to the SAX parser.
*/
public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient) {
return new XmiCasDeserializerHandler((CASImpl) cas, lenient, null, -1, AllowPreexistingFS.ignore);
}
/**
* Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
* meaning that if the XMI references Types that are not in the Type System, an Exception will be
* thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
* and ignore any unknown types.
*
* @param cas
* This CAS will be used to hold the data deserialized from the XMI
* @param lenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
* @param sharedData
* data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
* information.
*
* @return The <code>DefaultHandler</code> to pass to the SAX parser.
*/
public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
XmiSerializationSharedData sharedData) {
return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, -1, AllowPreexistingFS.ignore);
}
/**
* Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
* meaning that if the XMI references Types that are not in the Type System, an Exception will be
* thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
* and ignore any unknown types.
*
* @param cas
* This CAS will be used to hold the data deserialized from the XMI
* @param lenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
* @param sharedData
* data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
* information.
* @param mergePoint
* used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
* deserialization will be done, meaning the target CAS will be reset and the entire XMI
* content will be deserialized. If the mergePoint is nonnegative (including 0), the
* target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
* greater than the mergePoint value will be deserialized.
* @return The <code>DefaultHandler</code> to pass to the SAX parser.
*/
public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
XmiSerializationSharedData sharedData, int mergePoint) {
return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, mergePoint, AllowPreexistingFS.ignore);
}
/**
* Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
* meaning that if the XMI references Types that are not in the Type System, an Exception will be
* thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
* and ignore any unknown types.
*
* @param cas
* This CAS will be used to hold the data deserialized from the XMI
* @param lenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
* @param sharedData
* data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
* information.
* @param mergePoint
* used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
* deserialization will be done, meaning the target CAS will be reset and the entire XMI
* content will be deserialized. If the mergePoint is nonnegative (including 0), the
* target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
* greater than the mergePoint value will be deserialized.
* @return The <code>DefaultHandler</code> to pass to the SAX parser.
*/
public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
XmiSerializationSharedData sharedData, int mergePoint, AllowPreexistingFS allow) {
return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, mergePoint, allow);
}
/**
* Deserializes a CAS from XMI.
*
* @param aStream
* input stream from which to read the XMI document
* @param aCAS
* CAS into which to deserialize. This CAS must be set up with a type system that is
* compatible with that in the XMI
*
* @throws SAXException
* if an XML Parsing error occurs
* @throws IOException
* if an I/O failure occurs
*/
public static void deserialize(InputStream aStream, CAS aCAS) throws SAXException, IOException {
XmiCasDeserializer.deserialize(aStream, aCAS, false, null, -1);
}
/**
* Deserializes a CAS from XMI.
*
* @param aStream
* input stream from which to read the XCMI document
* @param aCAS
* CAS into which to deserialize. This CAS must be set up with a type system that is
* compatible with that in the XMI
* @param aLenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
*
* @throws SAXException
* if an XML Parsing error occurs
* @throws IOException
* if an I/O failure occurs
*/
public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient)
throws SAXException, IOException {
deserialize(aStream, aCAS, aLenient, null, -1);
}
/**
* Deserializes a CAS from XMI.
*
* @param aStream
* input stream from which to read the XCMI document
* @param aCAS
* CAS into which to deserialize. This CAS must be set up with a type system that is
* compatible with that in the XMI
* @param aLenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
* @param aSharedData
* an optional container for data that is shared between the {@link XmiCasSerializer} and the
* {@link XmiCasDeserializer}. See the JavaDocs for {@link XmiSerializationSharedData} for details.
*
* @throws SAXException
* if an XML Parsing error occurs
* @throws IOException
* if an I/O failure occurs
*/
public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
XmiSerializationSharedData aSharedData)
throws SAXException, IOException {
deserialize(aStream, aCAS, aLenient, aSharedData, -1);
}
/**
* Deserializes a CAS from XMI. This version of this method supports merging multiple XMI documents into a single CAS.
*
* @param aStream
* input stream from which to read the XCMI document
* @param aCAS
* CAS into which to deserialize. This CAS must be set up with a type system that is
* compatible with that in the XMI
* @param aLenient
* if true, unknown Types will be ignored. If false, unknown Types will cause an
* exception. The default is false.
* @param aSharedData
* a container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
* See the JavaDocs for {@link XmiSerializationSharedData} for details.
* @param aMergePoint
* used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
* deserialization will be done, meaning the target CAS will be reset and the entire XMI
* content will be deserialized. If the mergePoint is nonnegative (including 0), the
* target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
* greater than the mergePoint value will be deserialized.
* @throws SAXException
* if an XML Parsing error occurs
* @throws IOException
* if an I/O failure occurs
*/
public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
XmiSerializationSharedData aSharedData, int aMergePoint)
throws SAXException, IOException {
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
XmiCasDeserializer deser = new XmiCasDeserializer(aCAS.getTypeSystem());
ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient, aSharedData, aMergePoint);
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(aStream));
}
/**
* Deserializes a CAS from XMI. This version of this method supports deserializing
* XMI document containing only deltas. The Delta CAS XMI is in the same form
* as a complete CAS XMI but only consists of new and modified FSs and updates
* to Views.
*
* This API is for reducing the overhead associated with serialization when calling
* a remote service. The service can send back only the deltas which are deserialized
* into the original outgoing CAS.
*
*
* @param aStream
* input stream from which to read the XCMI document
* @param aCAS
* CAS into which to deserialize. This CAS must be set up with a
* type system that is compatible with that in the XMI
* @param aLenient
* if true, unknown Types will be ignored. If false, unknown
* Types will cause an exception. The default is false.
* @param aSharedData
* a container for data that is shared between the
* {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
* See the JavaDocs for {@link XmiSerializationSharedData} for
* details.
* @param aMergePoint
* used to support merging multiple XMI CASes. If the mergePoint
* is negative, "normal" deserialization will be done, meaning
* the target CAS will be reset and the entire XMI content will
* be deserialized. If the mergePoint is nonnegative (including
* 0), the target CAS will not be reset, and only Feature
* Structures whose xmi:id is strictly greater than the
* mergePoint value will be deserialized.
* @param allowPreexistingFS
* used when deserializing delta CAS whether to allow, disallow or
* ignore elements representign preexisting FSs or preexisting
* FSs updates in View element.
* if IGNORE, FSs below the mergePoint are ignored and only new FSs are processed.
* if ALLOW, FSs below the mergePoint are processed as well as new FSs.
* if DISALLOW FSs below mergePoint will cause serialization to fail. FSs below
* the mergePoint referenced in View element will be flagged as an error condition
* and will not modifiy the CAS being filled and an exception reporting this will
* be thrown at the end of deserialization.
*
*
* @throws SAXException
* if an XML Parsing error occurs
* @throws IOException
* if an I/O failure occurs
*
* NOTES:
* It is expected that Delta CAS serialization will serialize
* modified preexisting FSs first so that disallowed preexisting
* FSs are detected at the start and the CAS being filled is
* left untouched. If disallowed prexisting FS is encountered in
* the View element, the FS is ignored and the deserialization completes
* but throws an exception at the end.
*
* Possible performance issue with StringListFS.
* When processing String, StringArrayFS and StringListFS features of a preexisting FS,
* the string value in the CAS is updated only if it is not equal to the incoming string value.
* Processing of a StringListFS where a new string value has been inserted, all subsequent
* strings in the list will be upadated with new strings.
*
*/
public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
XmiSerializationSharedData aSharedData, int aMergePoint, AllowPreexistingFS allowPreexistngFS)
throws SAXException, IOException {
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
XmiCasDeserializer deser = new XmiCasDeserializer(aCAS.getTypeSystem());
ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient, aSharedData, aMergePoint, allowPreexistngFS);
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(aStream));
}
/**
* Converts an XMI element name to a UIMA-style dotted type name.
*
* @param nsUri
* the namespace URI of the XMI element
* @param localName
* the local name of the XMI element
*
* @return the UIMA type name corresponding to the XMI element name
*/
private String xmiElementName2uimaTypeName(String nsUri, String localName) throws SAXException {
// check map first to see if we've already computed the namespace mapping
String uimaNamespace = (String) xmiNamespaceToUimaNamespaceMap.get(nsUri);
if (uimaNamespace == null) {
// check for the special "no-namespace" URI, which is used for UIMA types with no namespace
if (XmiCasSerializer.DEFAULT_NAMESPACE_URI.equals(nsUri)) {
uimaNamespace = "";
} else {
// Our convention is that the UIMA namespace is the URI path, with leading slashes
// removed, trailing ".ecore" removed, and internal slashes converted to dots
java.net.URI uri;
try {
uri = new URI(nsUri);
} catch (URISyntaxException e) {
throw new SAXException(e);
}
String path = uri.getPath();
while (path.startsWith("/")) {
path = path.substring(1);
}
if (path.endsWith(".ecore")) {
path = path.substring(0, path.length() - 6);
}
uimaNamespace = path.replace('/', '.') + '.'; // include trailing dot for convenience
}
xmiNamespaceToUimaNamespaceMap.put(nsUri, uimaNamespace);
}
return uimaNamespace + localName;
}
}