| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.cas.impl; |
| |
| import java.util.ArrayDeque; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.BitSet; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.Deque; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.IdentityHashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.NoSuchElementException; |
| import java.util.Set; |
| import java.util.concurrent.atomic.AtomicInteger; |
| |
| import org.apache.uima.UimaSerializable; |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.CASRuntimeException; |
| import org.apache.uima.cas.CommonArrayFS; |
| import org.apache.uima.cas.FSIndex; |
| import org.apache.uima.internal.util.Misc; |
| import org.apache.uima.internal.util.XmlElementName; |
| import org.apache.uima.jcas.cas.CommonList; |
| import org.apache.uima.jcas.cas.CommonPrimitiveArray; |
| import org.apache.uima.jcas.cas.FSArray; |
| import org.apache.uima.jcas.cas.FSList; |
| import org.apache.uima.jcas.cas.NonEmptyFSList; |
| import org.apache.uima.jcas.cas.NonEmptyList; |
| import org.apache.uima.jcas.cas.Sofa; |
| import org.apache.uima.jcas.cas.TOP; |
| import org.apache.uima.jcas.tcas.Annotation; |
| import org.apache.uima.util.Logger; |
| import org.xml.sax.ContentHandler; |
| import org.xml.sax.ErrorHandler; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| // @formatter:off |
| /** |
| * CAS serializer support for XMI and JSON formats. |
| * |
| * There are multiple use cases. |
| * 1) normal - the consumer is independent of UIMA |
| * - (maybe) support for delta serialization |
| * 2) service calls: |
| * - support deserialization with out-of-type-system set-aside, and subsequent serialization with re-merging |
| * - guarantee of using same xmi:id's as were deserialized when serializing |
| * - support for delta serialization |
| * |
| * There is an outer class (one instance per "configuration" - reusable after configuration, and |
| * an inner class - one per serialize call. |
| * |
| * These classes are the common parts of serialization between XMI and JSON, mainly having to do with |
| * 1) enqueueing the FS to be serialized |
| * 2) serializing according to their types and features |
| * |
| * |
| * Methods marked public are not for public use but are that way to permit |
| * other users of this class in other packages to "see" these methods. |
| * |
| * XmiCasSerializer JsonCasSerializer |
| * Instance Instance |
| * css ref -------> CasSerializerSupport <------ css ref |
| * |
| * |
| * XmiDocSerializer JsonDocSerializer |
| * Instance Instance |
| * (1 per serialize action) (1 per serialize action) |
| * cds ref -------> CasDocSerializer <------- cds ref |
| * csss points back |
| * |
| * |
| * Construction: |
| * new Xmi/JsonCasSerializer |
| * initializes css with new CasSerializerSupport |
| * |
| * serialize method creates a new Xmi/JsonDocSerializer inner class |
| * constructor creates a new CasDocSerializer, |
| * |
| * Use Cases and Algorithms |
| * Support set-aside for out-of-type-system FS on deserialization (record in shareData) |
| * implies can't determine sharing status of things ref'd by features; need to depend on |
| * multiple-refs-allowed flag. |
| * If multiple-refs found during serialization for feat marked non-shared, unshare these (make |
| * 2 serializations, one or more inplace, for example. |
| * Perhaps not considered an error. |
| * implies need (for non-delta case) to send all FSs that were deserialized - some may be ref'd by oots elements |
| * ** Could ** not do this if no oots elements, but could break some assumptions |
| * and this only would apply to non-delta - not worth doing |
| * |
| * Enqueuing: |
| * There are two styles |
| * - enqueueCommon: does **NOT** recursively enqueue features |
| * - enqueue: calls enqueueCommon and then recursively enqueues features |
| * |
| * enqueueCommon is called (bypassing enqueue) to defer scanning references |
| * |
| * Order and target of enqueuing: |
| * - things in the index |
| * -- put on "queue" |
| * -- first, the sofa's (which are the only things indexed in base view) |
| * -- next, for each view, for each item, the FSs, but **NOT** following any feature/array refs |
| * - things not in the index, but deserialized (incoming) |
| * -- put on previouslySerializedFSs, no recursive descent for features |
| * - (delta) enqueueNonsharedMultivaluedFS (lists and arrays) |
| * -- put on modifiedEmbeddedValueFSs, no recursive descent for features |
| * |
| * - recursive descent for |
| * -- things in previouslySerializedFSs, |
| * -- things in modifiedEmbeddedValueFSs |
| * -- things in the index |
| * |
| * The recursive descent is recursive, and an arbitrary long chain can get stack overflow error. |
| * TODO Probably should fix this someday. See https://issues.apache.org/jira/browse/UIMA-106 * |
| */ |
| // @formatter:on |
| public class CasSerializerSupport { |
| |
| // Special "type class" codes for list types. The LowLevelCAS.ll_getTypeClass() method |
| // returns type classes for primitives and arrays, but not lists (which are just ordinary FS types |
| // as far as the CAS is concerned). The serialization treats lists specially, however, and |
| // so needs its own type codes for these. |
| public static final int TYPE_CLASS_INTLIST = 101; |
| |
| public static final int TYPE_CLASS_FLOATLIST = 102; |
| |
| public static final int TYPE_CLASS_STRINGLIST = 103; |
| |
| public static final int TYPE_CLASS_FSLIST = 104; |
| |
| public static int PP_LINE_LENGTH = 120; |
| public static int PP_ELEMENTS = 30; // number of elements to do before nl |
| |
| public static AtomicInteger errorCount = new AtomicInteger(0); |
| |
| /** |
| * Comparator that just uses short name Public for access by JsonCasSerializer where it's needed |
| * for a binary search https://issues.apache.org/jira/browse/UIMA-5171 |
| */ |
| public final static Comparator<TypeImpl> COMPARATOR_SHORT_TYPENAME = new Comparator<TypeImpl>() { |
| @Override |
| public int compare(TypeImpl object1, TypeImpl object2) { |
| return object1.getShortName().compareTo(object2.getShortName()); |
| } |
| }; |
| |
| TypeSystemImpl filterTypeSystem; |
| |
| ErrorHandler errorHandler = null; |
| |
| // UIMA logger, to which we may write warnings |
| Logger logger; |
| |
| public boolean isFormattedOutput; // true for pretty printing |
| |
| /*********************************************** |
| * C O N S T R U C T O R S * |
| ***********************************************/ |
| |
| public CasSerializerSupport() { |
| } |
| |
| /******************************************************** |
| * Routines to set/reset configuration * |
| ********************************************************/ |
| /** |
| * set or reset the pretty print flag (default is false) |
| * |
| * @param pp |
| * true to do pretty printing of output |
| * @return the original instance, possibly updated |
| */ |
| public CasSerializerSupport setPrettyPrint(boolean pp) { |
| this.isFormattedOutput = pp; |
| return this; |
| } |
| |
| /** |
| * pass in a type system to use for filtering what gets serialized; only those types and features |
| * which are defined this type system are included. |
| * |
| * @param ts |
| * the filter |
| * @return the original instance, possibly updated |
| */ |
| public CasSerializerSupport setFilterTypes(TypeSystemImpl ts) { |
| this.filterTypeSystem = ts; |
| return this; |
| } |
| |
| // for testing |
| public TypeSystemImpl getFilterTypes() { |
| return filterTypeSystem; |
| } |
| |
| // not done here, done on serialize call, different (typically) for each call |
| // /** |
| // * set the Marker to specify delta cas serialization |
| // * @param m - the marker |
| // * @return the original instance, possibly updated |
| // */ |
| // public CasSerializerSupport setDeltaCas(Marker m, XmiSerializationSharedData sharedData) { |
| // this.marker = (MarkerImpl) m; |
| // this.sharedData = sharedData; |
| // return this; |
| // } |
| |
| /** |
| * set an error handler to receive information about errors |
| * |
| * @param eh |
| * the error handler |
| * @return the original instance, possibly updated |
| */ |
| public CasSerializerSupport setErrorHandler(ErrorHandler eh) { |
| this.errorHandler = eh; |
| return this; |
| } |
| |
| // ********************************************** |
| // Methods used to serialize items |
| // Separate implementations for JSON and Xmi |
| // **********************************************/ |
| public static abstract class CasSerializerSupportSerialize { |
| |
| abstract protected void initializeNamespaces(); |
| |
| abstract protected void checkForNameCollision(XmlElementName xmlElementName); |
| |
| abstract protected void addNameSpace(XmlElementName xmlElementName); |
| |
| abstract protected XmlElementName uimaTypeName2XmiElementName(String typeName); |
| |
| abstract protected void writeFeatureStructures(int elementCount) throws Exception; |
| |
| abstract protected void writeViews() throws Exception; |
| |
| abstract protected void writeView(Sofa sofa, Collection<TOP> members) throws Exception; |
| |
| abstract protected void writeView(Sofa sofa, Collection<TOP> added, Collection<TOP> deleted, |
| Collection<TOP> reindexed) throws Exception; |
| |
| /** |
| * |
| * @param fs |
| * - |
| * @param typeCode |
| * - |
| * @return true if writing out referenced items (JSON) |
| * @throws Exception |
| * - |
| */ |
| abstract protected boolean writeFsStart(TOP fs, int typeCode) throws Exception; |
| |
| abstract protected void writeFs(TOP fs, int typeCode) throws Exception; |
| |
| abstract protected void writeListsAsIndividualFSs(TOP fs, int typeCode) throws Exception; |
| |
| abstract protected void writeArrays(TOP fsarray, int typeCode, int typeClass) throws Exception; |
| |
| abstract protected void writeEndOfIndividualFs() throws Exception; |
| |
| abstract protected void writeEndOfSerialization() throws Exception; |
| |
| abstract protected void writeFsRef(TOP fs) throws Exception; |
| } |
| |
| /** |
| * Use an inner class to hold the data for serializing a CAS. Each call to serialize() creates its |
| * own instance. |
| * |
| * package private to allow a test case to access not static to share the logger and the |
| * initializing values (could be changed) |
| */ |
| public class CasDocSerializer { |
| |
| // The CAS we're serializing. |
| public final CASImpl cas; |
| |
| public final TypeSystemImpl tsi; |
| |
| // @formatter:off |
| /** |
| * set of FSs that have been visited and enqueued to be serialized |
| * - exception: arrays and lists which are "inline" are put into this set, |
| * but are not enqueued to be serialized. |
| * |
| * - FSs added to this, during "enqueue" phase, prior to encoding |
| * |
| * uses: |
| * - for Arrays and Lists, used to detect multi-refs |
| * - for Lists, used to detect loops |
| * - during enqueuing phase, prevent multiple enqueuings |
| * - during encoding phase, to prevent multiple encodings |
| * |
| * Public for use by JsonCasSerializer |
| */ |
| // @formatter:on |
| public final Set<TOP> visited_not_yet_written = Collections |
| .newSetFromMap(new IdentityHashMap<>()); |
| |
| // @formatter:off |
| /** |
| * Set of array or list FSs referenced from features marked as multipleReferencesAllowed, |
| * - which have previously been serialized "inline" |
| * - which now need to be serialized as separate items |
| * |
| * Set during enqueue scanning, to handle the case where the |
| * "visited_not_yet_written" set may have already recorded that this FS is |
| * already processed for enqueueing, but it is an array or list item which was being |
| * put "in-line" and no element is being written. |
| * |
| * It has array or list elements where the item needs to be enqueued onto the "queue" list. |
| * |
| * Use: limit the put-onto-queue list to one time |
| */ |
| // @formatter:on |
| private final Set<TOP> enqueued_multiRef_arrays_or_lists = Collections |
| .newSetFromMap(new IdentityHashMap<>()); |
| |
| // @formatter:off |
| /** |
| * Set of FSs that have multiple references |
| * Has an entry for each FS (not just array or list FSs) which is (from some point on) being serialized as a multi-ref, |
| * that is, is **not** being serialized (any more) using the special notation for arrays and lists |
| * or, for JSON, **not** being serialized using the embedded notation |
| * This is for JSON which is computing the multi-refs, not depending on the setting in a feature. |
| * This is also for xmi, to enable adding to "queue" (once) for each FSs of this kind. |
| * |
| * Used: |
| * - limit the number of times this is put onto the queue to 1. |
| * - skip encoding of items on "queue" if not in this Set (maybe not needed? 8/2017 mis) |
| * - serialize if not in indexed set, dynamic ref == true, and in this set (otherwise serialize only from ref) |
| */ |
| // @formatter:on |
| public final Set<TOP> multiRefFSs; |
| |
| /** |
| * Set to true for JSON configuration of using dynamic multi-ref detection for arrays and lists |
| */ |
| public final boolean isDynamicMultiRef; |
| |
| // ********************************************* |
| // FSs that need to be serialized because they're |
| // a) in an index |
| // b) in the set of previously serialized FS which have ids (that is, they weren't previously |
| // embedded) |
| // c) (delta only) have a feature which has an embedded value some part of which changed (no id) |
| // |
| // d) the set of FSs that are reachable via FSrefs from the above 3 sets |
| public List<TOP> previouslySerializedFSs = null; |
| |
| public List<TOP> modifiedEmbeddedValueFSs = null; |
| |
| /** |
| * Array of Lists of all FS that are indexed in some view (other than sofas). Array indexed by |
| * view. |
| */ |
| public final List<TOP>[] indexedFSs; |
| |
| /** |
| * FSs not in an index, but only being serialized becaused they're referenced. Exception: the |
| * sofa's are here. |
| */ |
| private final Deque<TOP> queue; |
| |
| // utilities for dealing with CAS list types |
| // public final ListUtils listUtils; |
| |
| public XmlElementName[] typeCode2namespaceNames; // array, indexed by type code, giving XMI |
| // names for each type |
| |
| private final BitSet typeUsed; // identifies types being serialized, a subset of all possible |
| // types |
| |
| public boolean needNameSpaces = true; // may be false; currently for JSON only |
| |
| /** |
| * map from a namespace expanded form to the namespace prefix, to identify potential collisions |
| * when generating a namespace string |
| */ |
| public final Map<String, String> nsUriToPrefixMap = new HashMap<>(); |
| |
| /** |
| * the set of all namespace prefixes used, to disallow some if they are in use already in |
| * set-aside data (xmi serialization) being merged back in |
| */ |
| public final Set<String> nsPrefixesUsed = new HashSet<>(); |
| |
| /** |
| * Used to tell if a FS was created before or after mark. |
| */ |
| public final MarkerImpl marker; |
| |
| /** |
| * for Delta serialization, holds the info gathered from deserialization needed for delta |
| * serialization and for handling out-of-type-system data for both plain and delta serialization |
| */ |
| public final XmiSerializationSharedData sharedData; |
| |
| /** |
| * Whether the serializer needs to serialize only the deltas, that is, new FSs created after |
| * mark represented by Marker object and preexisting FSs and Views that have been modified. Set |
| * to true if Marker object is not null and CASImpl object of this serialize matches the CASImpl |
| * in Marker object. |
| */ |
| public final boolean isDelta; |
| |
| /** |
| * Whether the serializer needs to check for filtered-out types/features. Set to true if type |
| * system of CAS does not match type system that was passed to constructor of serializer. |
| */ |
| public final boolean isFiltering; |
| |
| private TypeImpl[] sortedUsedTypes; |
| |
| private final ErrorHandler errorHandler2; |
| |
| public TypeSystemImpl filterTypeSystem_inner; |
| |
| // map to reduce string usage by reusing equal string representations; lives just for one |
| // serialize call |
| private final Map<String, String> uniqueStrings = new HashMap<>(); |
| |
| public final boolean isFormattedOutput_inner; |
| |
| private final CasSerializerSupportSerialize csss; |
| |
| /*********************************************** |
| * C O N S T R U C T O R S * |
| ***********************************************/ |
| /** |
| * |
| * @param ch |
| * - |
| * @param cas |
| * - |
| * @param sharedData |
| * - |
| * @param marker |
| * - |
| * @param csss |
| * - |
| */ |
| public CasDocSerializer(ContentHandler ch, CASImpl cas, XmiSerializationSharedData sharedData, |
| MarkerImpl marker, CasSerializerSupportSerialize csss) { |
| this(ch, cas, sharedData, marker, csss, false); |
| } |
| |
| public CasDocSerializer(ContentHandler ch, CASImpl cas, XmiSerializationSharedData sharedData, |
| MarkerImpl marker, CasSerializerSupportSerialize csss, boolean trackMultiRefs) { |
| this.cas = cas; |
| this.csss = csss; |
| this.sharedData = sharedData; |
| |
| // copy outer class values into final inner ones, to keep the outer thread-safe |
| filterTypeSystem_inner = CasSerializerSupport.this.filterTypeSystem; |
| isFormattedOutput_inner = CasSerializerSupport.this.isFormattedOutput; |
| this.marker = marker; |
| errorHandler2 = CasSerializerSupport.this.errorHandler; |
| |
| tsi = cas.getTypeSystemImpl(); |
| queue = new ArrayDeque<>(); |
| indexedFSs = (List<TOP>[]) new List<?>[cas.getViewCount()]; // number of views |
| // listUtils = new ListUtils(cas, logger, errorHandler); |
| typeUsed = new BitSet(); |
| |
| isFiltering = filterTypeSystem_inner != null && filterTypeSystem_inner != tsi; |
| if (marker != null && !marker.isValid()) { |
| throw new CASRuntimeException(CASRuntimeException.INVALID_MARKER, "Invalid Marker."); |
| } |
| isDelta = marker != null; |
| multiRefFSs = Collections.newSetFromMap(new IdentityHashMap<>()); |
| isDynamicMultiRef = trackMultiRefs; |
| } |
| |
| // TODO: internationalize |
| private void reportMultiRefWarning(FeatureImpl fi) throws SAXException { |
| String message = String.format( |
| "Feature %s is marked multipleReferencesAllowed=false, but it has" |
| + " multiple references. These will be serialized in duplicate.", |
| fi.getName()); |
| Misc.decreasingWithTrace(errorCount, message, logger); |
| if (this.errorHandler2 != null) { |
| this.errorHandler2.warning(new SAXParseException(message, null)); |
| } |
| } |
| |
| /** |
| * Starts serialization |
| * |
| * @throws Exception |
| * - |
| */ |
| public void serialize() throws Exception { |
| typeCode2namespaceNames = new XmlElementName[tsi.getLargestTypeCode() + 1]; |
| |
| // reset caches in case some things modified between calls to serialize for same instance of |
| // serializer |
| sortedUsedTypes = null; |
| typeUsed.clear(); |
| Arrays.fill(indexedFSs, null); |
| queue.clear(); |
| |
| csss.initializeNamespaces(); |
| |
| int iElementCount = 1; // start at 1 to account for special NULL object |
| |
| enqueueIndexed(); // done first - to insure this has priority |
| |
| enqueueIncoming(); // make sure we enqueue every FS that was deserialized into this CAS |
| // needed to support Out Of Typesystem data |
| enqueueNonsharedMultivaluedFS(); // needed for delta serialization of modified embedded |
| // lists/arrays |
| enqueueFeaturesOfIndexed(); // and incoming and modified embedded refs |
| |
| iElementCount += (previouslySerializedFSs == null) ? 0 : previouslySerializedFSs.size(); |
| iElementCount += (modifiedEmbeddedValueFSs == null) ? 0 : modifiedEmbeddedValueFSs.size(); |
| for (List<TOP> fss : indexedFSs) { |
| iElementCount += (fss == null) ? 0 : fss.size(); |
| } |
| iElementCount += queue.size(); |
| |
| FSIndex<TOP> sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME); |
| if (!isDelta) { |
| iElementCount += (sofaIndex.size()); // one View element per sofa |
| iElementCount += getElementCountForSharedData(); |
| } else { |
| int numViews = cas.getViewCount(); |
| for (int sofaNum = 1; sofaNum <= numViews; sofaNum++) { |
| FSIndexRepositoryImpl loopIR = (FSIndexRepositoryImpl) cas.getBaseCAS() |
| .getSofaIndexRepository(sofaNum); |
| if (loopIR != null && loopIR.isModified()) { |
| iElementCount++; |
| } |
| } |
| } |
| |
| csss.writeFeatureStructures(iElementCount); |
| |
| csss.writeViews(); |
| |
| csss.writeEndOfSerialization(); |
| } |
| |
| /** |
| * |
| * @param sofaNum |
| * - starts at 1 |
| * @return the sofa FS, or null |
| */ |
| public Sofa getSofa(int sofaNum) { |
| if (sofaNum != 1 || cas.isInitialSofaCreated()) { // skip if initial view && no Sofa yet |
| // all non-initial-views must have a sofa |
| return ((CASImpl) cas.getView(sofaNum)).getSofaRef(); |
| } |
| return null; |
| } |
| |
| public void writeViewsCommons() throws Exception { |
| // Get indexes for each SofaFS in the CAS |
| int numViews = cas.getViewCount(); |
| |
| for (int sofaNum = 1; sofaNum <= numViews; sofaNum++) { |
| FSIndexRepositoryImpl loopIR = (FSIndexRepositoryImpl) cas.getBaseCAS() |
| .getSofaIndexRepository(sofaNum); |
| final Sofa sofa = getSofa(sofaNum); |
| if (loopIR != null) { |
| if (!isDelta) { |
| Collection<TOP> fss = loopIR.getIndexedFSs(); |
| csss.writeView(sofa, fss); |
| } else { // is Delta Cas |
| if (sofaNum != 1 && this.marker.isNew(sofa)) { |
| // for views created after mark (initial view never is - it is always created with the |
| // CAS) |
| // write out the view as new |
| Collection<TOP> fss = loopIR.getIndexedFSs(); |
| csss.writeView(sofa, fss); |
| } else if (loopIR.isModified()) { |
| csss.writeView(sofa, loopIR.getAddedFSs(), loopIR.getDeletedFSs(), |
| loopIR.getReindexedFSs()); |
| } |
| } |
| } |
| } |
| } |
| |
| // sort is by shortname of type |
| public TypeImpl[] getSortedUsedTypes() { |
| if (null == sortedUsedTypes) { |
| sortedUsedTypes = new TypeImpl[typeUsed.cardinality()]; |
| int i = 0; |
| for (TypeImpl ti : getUsedTypesIterable()) { |
| sortedUsedTypes[i++] = ti; |
| } |
| Arrays.sort(sortedUsedTypes, COMPARATOR_SHORT_TYPENAME); |
| } |
| return sortedUsedTypes; |
| } |
| |
| private Iterable<TypeImpl> getUsedTypesIterable() { |
| return new Iterable<TypeImpl>() { |
| @Override |
| public Iterator<TypeImpl> iterator() { |
| return new Iterator<TypeImpl>() { |
| private int i = 0; |
| |
| @Override |
| public boolean hasNext() { |
| return typeUsed.nextSetBit(i) >= 0; |
| } |
| |
| @Override |
| public TypeImpl next() { |
| final int next_i = typeUsed.nextSetBit(i); |
| if (next_i < 0) { |
| throw new NoSuchElementException(); |
| } |
| i = next_i + 1; |
| return (TypeImpl) tsi.ll_getTypeForCode(next_i); |
| } |
| |
| @Override |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| }; |
| } |
| }; |
| } |
| |
| // private StringPair[] getSortedPrefixUri() { |
| // StringPair[] r = new StringPair[nsUriToPrefixMap.size()]; |
| // int i = 0; |
| // for (Map.Entry<String,String> e : nsUriToPrefixMap.entrySet()) { |
| // r[i++] = new StringPair(e.getValue(), e.getKey()); |
| // } |
| // Arrays.sort(r); |
| // return r; |
| // } |
| |
| /** |
| * Enqueues all FS that are stored in the sharedData's id map. This map is populated during the |
| * previous deserialization. This method is used to make sure that all incoming FS are echoed in |
| * the next serialization. It is required if there are out-of-type FSs that are being merged |
| * back into the serialized form; those might reference some of these. |
| */ |
| private void enqueueIncoming() { |
| if (sharedData == null) { |
| return; |
| } |
| TOP[] fss = this.sharedData.getAndSortByIdAllFSsInIdMap(); |
| previouslySerializedFSs = new ArrayList<>(); |
| |
| for (TOP fs : fss) { |
| // skip enque for null and for unmodified fss if delta |
| if (fs == null || (isDelta && !marker.isModified(fs))) { |
| continue; |
| } |
| |
| // is the first instance, but skip if delta and not modified or above the line or filtered |
| // skip enqueuing incoming FS if already enqueued |
| int typeCode = enqueueCommon(fs); |
| if (typeCode == -1) { |
| continue; |
| } |
| previouslySerializedFSs.add(fs); |
| } |
| } |
| |
| /** |
| * add the indexed FSs onto the indexedFSs by view. add the SofaFSs onto the by-ref queue |
| */ |
| private void enqueueIndexed() { |
| FSIndexRepositoryImpl ir = (FSIndexRepositoryImpl) cas.getBaseCAS().getBaseIndexRepository(); |
| Collection<TOP> fss = ir.getIndexedFSs(); // only sofas |
| try { |
| for (TOP fs : fss) { |
| enqueueFsAndMaybeFeatures(fs); // put Sofa on by-ref queue |
| } |
| } catch (SAXException e) { |
| throw new RuntimeException("Internal error - should never happen", e); |
| } |
| |
| // FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME); |
| // FSIterator iterator = sofaIndex.iterator(); |
| // // Get indexes for each SofaFS in the CAS |
| // while (iterator.isValid()) |
| int numViews = cas.getViewCount(); |
| for (int sofaNum = 1; sofaNum <= numViews; sofaNum++) { |
| // SofaFS sofa = (SofaFS) iterator.get(); |
| // int sofaNum = sofa.getSofaRef(); |
| // iterator.moveToNext(); |
| FSIndexRepositoryImpl loopIR = (FSIndexRepositoryImpl) cas.getBaseCAS() |
| .getSofaIndexRepository(sofaNum); |
| if (loopIR != null) { |
| Collection<TOP> items = loopIR.getIndexedFSs(); |
| for (TOP item : items) { |
| enqueueIndexedFs_only_not_features(sofaNum, item); |
| } |
| } |
| } |
| } |
| |
| /** |
| * When serializing Delta CAS, enqueue encompassing FS of nonshared multivalued FS that have |
| * been modified. The embedded nonshared-multivalued item could be a list or an array |
| */ |
| private void enqueueNonsharedMultivaluedFS() { |
| if (sharedData == null || !isDelta) { |
| return; |
| } |
| TOP[] fss = sharedData.getNonsharedMulitValuedFSs(); |
| modifiedEmbeddedValueFSs = new ArrayList<>(); |
| |
| for (TOP fs : fss) { |
| if (marker.isModified(fs)) { |
| TOP encompassingFs = sharedData.getEncompassingFS(fs); |
| assert null != encompassingFs; |
| if (-1 != enqueueCommonWithoutDeltaAndFilteringCheck(encompassingFs)) { // only to set |
| // type used info |
| // and check if |
| // already |
| // enqueued |
| modifiedEmbeddedValueFSs.add(encompassingFs); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Enqueue everything reachable from features of indexed FSs. |
| */ |
| private void enqueueFeaturesOfIndexed() throws SAXException { |
| if (null != previouslySerializedFSs) { |
| enqueueFeaturesOfFSs(previouslySerializedFSs); |
| } |
| if (null != modifiedEmbeddedValueFSs) { |
| enqueueFeaturesOfFSs(modifiedEmbeddedValueFSs); |
| } |
| for (List<TOP> fss : indexedFSs) { |
| if (fss != null) { |
| enqueueFeaturesOfFSs(fss); |
| } |
| } |
| } |
| |
| private void enqueueFeaturesOfFSs(final List<TOP> fss) throws SAXException { |
| for (TOP fs : fss) { |
| enqueueFeatures(fs); |
| } |
| } |
| |
| int enqueueCommon(TOP fs) { |
| return enqueueCommon(fs, true); |
| } |
| |
| int enqueueCommonWithoutDeltaAndFilteringCheck(TOP fs) { |
| return enqueueCommon(fs, false); |
| } |
| |
| /** |
| * @param fs |
| * - |
| * @param doDeltaAndFilteringCheck |
| * - |
| * @return true to have enqueue put onto "queue" and enqueue features |
| */ |
| private int enqueueCommon(TOP fs, boolean doDeltaAndFilteringCheck) { |
| if (doDeltaAndFilteringCheck) { |
| if (isDelta) { |
| if (!marker.isNew(fs) && !marker.isModified(fs)) { |
| return -1; |
| } |
| } |
| |
| if (isFiltering) { |
| String typeName = fs._getTypeImpl().getName(); |
| if (filterTypeSystem_inner.getType(typeName) == null) { |
| return -1; // this type is not in the target type system |
| } |
| } |
| } |
| |
| // We set visited only if we're going to enqueue this. |
| // (In other words, please don't move this up in this method) |
| // This handles the use case: |
| // delta cas; element is not modified, but at some later point, we determine |
| // an embedded feature value (array or list) is modified, which requires we serialize out this |
| // fs as if it was modified. |
| |
| if (!visited_not_yet_written.add(fs)) { |
| // was already visited; means this FS has multiple references, either from FS feature(s) or |
| // indexes or both |
| // https://issues.apache.org/jira/browse/UIMA-5532 |
| if (isDynamicMultiRef || isArrayOrList(fs)) { |
| boolean wasAdded = multiRefFSs.add(fs); |
| if (wasAdded) { |
| queue.add(fs); // if was in indexed set before, isn't in the queue set, but needs to be |
| } |
| } |
| return -1; |
| } |
| |
| final int typeCode = fs._getTypeCode(); |
| boolean alreadySet = typeUsed.get(typeCode); |
| if (!alreadySet) { |
| typeUsed.set(typeCode); |
| |
| String typeName = fs._getTypeImpl().getName(); |
| XmlElementName newXel = csss.uimaTypeName2XmiElementName(typeName); |
| |
| if (!needNameSpaces) { // means if name spaces are not not always needed, then we have to |
| // check for collision |
| csss.checkForNameCollision(newXel); // executed for JSON code |
| } |
| typeCode2namespaceNames[typeCode] = newXel; |
| } |
| return typeCode; |
| } |
| |
| /* |
| * Enqueues an indexed FS. Does NOT enqueue features at this point. Doesn't enqueue non-modified |
| * FS when delta |
| */ |
| void enqueueIndexedFs_only_not_features(int viewNumber, TOP fs) { |
| if (enqueueCommon(fs) != -1) { |
| List<TOP> fss = indexedFSs[viewNumber - 1]; |
| if (null == fss) { |
| indexedFSs[viewNumber - 1] = fss = new ArrayList<>(); |
| } |
| fss.add(fs); |
| } |
| } |
| |
| /** |
| * Enqueue an FS, and everything reachable from it. |
| * |
| * This call is recursive with enqueueFeatures, \ and an arbitrary long chain can get stack |
| * overflow error. Probably should fix this someday. See |
| * https://issues.apache.org/jira/browse/UIMA-106 |
| * |
| * @param addr |
| * The FS address. |
| * @throws SAXException |
| */ |
| private void enqueueFsAndMaybeFeatures(TOP fs) throws SAXException { |
| if (null == fs) { |
| return; |
| } |
| |
| int typeCode = enqueueCommon(fs); |
| if (typeCode == -1) { |
| return; |
| } |
| queue.add(fs); |
| enqueueFeatures(fs); |
| // Also, for FSArrays enqueue the elements -- not here, done by enqueueFeatures, 1 line above |
| // if (fs instanceof FSArray) { //TODO: won't get parameterized arrays? no, there are no |
| // parameterized arrays in the impl |
| // enqueueFSArrayElements((FSArray) fs); |
| // } |
| } |
| |
| // @formatter:off |
| /** |
| * For lists, |
| * see if this is a plain list |
| * - no loops |
| * - no other refs to list elements from outside the list |
| * -- if so, return false; |
| * |
| * add all the elements of the list to visited_not_yet_written, |
| * noting if they've already been added |
| * -- this indicates either a loop or another ref from outside, |
| * -- in either case, return true - t |
| * @param curNode - |
| * @param featCode - |
| * @return false if no list element is multiply-referenced, |
| * true if there is a loop or another ref from outside the list, for |
| * one or more list element nodes |
| */ |
| // @formatter:on |
| private boolean isListElementsMultiplyReferenced(TOP listNode) { |
| boolean foundCycle = false; |
| CommonList curNode = (CommonList) listNode; |
| while (curNode instanceof NonEmptyList) { // stop on end or 0 |
| if (!visited_not_yet_written.add((TOP) curNode)) { |
| foundCycle = true; |
| break; |
| } |
| curNode = curNode.getCommonTail(); |
| } |
| return foundCycle; |
| } |
| |
| // @formatter:off |
| /** |
| * ordinary FSs referenced as features are not checked by this routine; |
| * this is only called for FSlists of various kinds, and fs arrays of various kinds |
| * |
| * Not all featValues should be enqueued; |
| * list or array features which are marked **NOT** multiple-refs-allowed |
| * are serialized in-line |
| * for JSON, when using dynamicMultiRef (the default), list / array FSs |
| * are serialized by ref (not in-line) if there are multiple refs to them |
| * |
| * for XMI and JSON, any FS ref marked as multiple-refs-allowed forces |
| * the item onto the ref "queue". |
| * |
| * (not handled here: ordinary FSs are serialized in-line in JSON with isDynamicMultiRef) |
| * |
| * @param fi |
| * - the feature, to look up the multiRefAllowed flag |
| * @param featVal |
| * - the List or array element |
| * @param alreadyVisited |
| * true if visited_not_yet_written contains the featVal |
| * @param isListNode |
| * - |
| * @param isListFeat |
| * - |
| * @return false if should skip enqueue because this array or list is being serialized inline |
| * @throws SAXException |
| * - |
| */ |
| // @formatter:on |
| private boolean isMultiRef_enqueue(FeatureImpl fi, TOP featVal, boolean alreadyVisited, |
| boolean isListNode, boolean isListFeat) throws SAXException { |
| if (!isDynamicMultiRef) { |
| |
| // not JSON dynamic embedding, or dynamic embedding is turned off - compute static embedding |
| // just for lists and arrays. |
| boolean multiRefAllowed = fi.isMultipleReferencesAllowed() || isListNode; |
| if (!multiRefAllowed) { |
| // Arrays cannot be resized, so it is ok if an empty array has multiple references to it |
| // even if multiRefAllowed is false because it is effectively immutable. |
| if ((featVal instanceof CommonArrayFS && ((CommonArrayFS<?>) featVal).isEmpty())) { |
| return false; // immutable empty array, no need to enqueue |
| } |
| |
| // two cases: a list or non-list |
| // if a list, check/mark all the nodes in the list for any being multiply referenced |
| // say: multi-ref not allowed, but discovered a multi-ref, will be serialized as separate |
| // item |
| if ((isListFeat && isListElementsMultiplyReferenced(featVal)) |
| || (!isListFeat && alreadyVisited)) { |
| reportMultiRefWarning(fi); |
| } else { |
| // multi-ref not allowed, and this item is not multiply referenced (so far) |
| // expecting to serialize as embedded (if array or list, or JSON) |
| if (!isListFeat) { // already added visited for list nodes |
| visited_not_yet_written.add(featVal); |
| } |
| } |
| return false; // because static, multi-ref not allowed, no need to enqueue |
| } else { // is multiRefAllowed or in list node |
| return true; // static, multi-ref allowed or in list node, enqueue |
| } |
| } |
| |
| // doing JSON dynamic determination of multi-refs |
| if (alreadyVisited) { |
| return !multiRefFSs.contains(featVal); // enqueue in the "queue" section, first time this |
| // happens |
| } |
| return true; // enqueue this item. May or may not be eventually written embedded |
| // but we enqueue to track multi-use |
| } |
| |
| /** |
| * Enqueue all FSs reachable from features of the given FS. |
| * |
| * @param addr |
| * address of an FS |
| * @param typeCode |
| * type of the FS |
| * @param insideListNode |
| * true iff the enclosing FS (addr) is a list type |
| */ |
| private void enqueueFeatures(TOP fs) throws SAXException { |
| |
| /** |
| * Handle FSArrays |
| */ |
| if (fs instanceof FSArray) { |
| |
| TOP[] theArray = ((FSArray) fs)._getTheArray(); |
| |
| for (TOP elem : theArray) { |
| if (isFiltering |
| && (null == filterTypeSystem_inner.getType(elem._getTypeImpl().getName()))) { |
| continue; // skip because not in filter type system |
| } |
| if (elem != null) { |
| enqueueFsAndMaybeFeatures(elem); |
| } |
| } |
| return; |
| } |
| |
| boolean insideListNode = fs instanceof CommonList; |
| |
| if (fs instanceof UimaSerializable) { |
| ((UimaSerializable) fs)._save_to_cas_data(); |
| } |
| for (FeatureImpl fi : fs._getTypeImpl().getFeatureImpls()) { |
| if (isFiltering && filterTypeSystem_inner.getFeatureByFullName(fi.getName()) == null) { |
| // skip features that aren't in the target type system |
| continue; |
| } |
| // final int featAddr = addr + cas.getFeatureOffset(feat); |
| // final int featVal = cas.getHeapValue(featAddr); |
| // if (featVal == CASImpl.NULL) { // null feature values do not refer to any other FS |
| // continue; |
| // } |
| |
| // enqueue behavior depends on range type of feature |
| final int fsClass = fi.rangeTypeClass; |
| switch (fsClass) { |
| case LowLevelCAS.TYPE_CLASS_FS: { |
| enqueueFsAndMaybeFeatures(fs.getFeatureValue(fi)); |
| break; |
| } |
| case LowLevelCAS.TYPE_CLASS_INTARRAY: |
| case LowLevelCAS.TYPE_CLASS_FLOATARRAY: |
| case LowLevelCAS.TYPE_CLASS_STRINGARRAY: |
| case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY: |
| case LowLevelCAS.TYPE_CLASS_BYTEARRAY: |
| case LowLevelCAS.TYPE_CLASS_SHORTARRAY: |
| case LowLevelCAS.TYPE_CLASS_LONGARRAY: |
| case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: |
| case LowLevelCAS.TYPE_CLASS_FSARRAY: { |
| TOP array = fs.getFeatureValue(fi); // can be null |
| if (null == array) { |
| continue; |
| } |
| // we enqueue arrays if: |
| // when statically using multipleReferencesAllowed flag: |
| // when that says it's multiply referenced; |
| // otherwise, we skip enqueueing it because it will |
| // be picked up when serializing the feature |
| // when dynamically computing multiple-refs: we enqueue it |
| // unless already enqueued, in order to pick up any multiple refs |
| final boolean alreadyVisited = visited_not_yet_written.contains(array); |
| if (isMultiRef_enqueue(fi, array, alreadyVisited, false, false)) { |
| if (enqueued_multiRef_arrays_or_lists.add(array)) { // only do this once per item |
| enqueueFsAndMaybeFeatures(array); // will add to queue list 1st time multi-ref |
| // detected |
| // or JSON isDynamicEmbedding is on (whether or not multi-ref) |
| } else { |
| // for isDynamicMultiRef, this is the first time we detect multiple refs |
| // do this here, because the enqueued_multiRef_arrays_or_lists.add above makes |
| // the 2nd and subsequent multi-ref things bypass the enqueue call. |
| // - only needed for isDynamicMultiRef, because only that returns true for |
| // isMultiRef_enqueue |
| // for the "first" instance, when it isn't yet known. |
| if (isDynamicMultiRef) { |
| multiRefFSs.add(array); |
| } |
| } |
| // otherwise, it is singly referenced (so far) and will be embedded |
| // (or has already been enqueued, in dynamic embedding mode), so don't enqueue |
| } else if (array instanceof FSArray && !alreadyVisited) { |
| // enqueue any FSs reachable from an FSArray |
| enqueueFSArrayElements((FSArray) array); |
| } |
| break; |
| } |
| case TYPE_CLASS_INTLIST: |
| case TYPE_CLASS_FLOATLIST: |
| case TYPE_CLASS_STRINGLIST: |
| case TYPE_CLASS_FSLIST: { |
| // we enqueue lists if: |
| // when statically using multipleReferencesAllowed flag: |
| // when that says it's multiply referenced or |
| // we're inside a list which was earlier multiply referenced |
| // otherwise, we skip enqueueing it because it will |
| // be picked up when serializing the feature |
| // when dynamically computing multiple-refs: we enqueue it |
| // unless already enqueued, in order to pick up any multiple refs |
| TOP startOfList_node = fs.getFeatureValue(fi); |
| if (null == startOfList_node) { |
| // the feature, whose type is one of the lists, has a null value, so there's nothing |
| // to enqueue |
| continue; |
| } |
| final boolean alreadyVisited = visited_not_yet_written.contains(startOfList_node); |
| if (isMultiRef_enqueue(fi, startOfList_node, alreadyVisited, insideListNode, true)) { |
| if (enqueued_multiRef_arrays_or_lists.add(startOfList_node)) { // only do this once |
| // per item |
| enqueueFsAndMaybeFeatures(startOfList_node); |
| } else { |
| // for isDynamicMultiRef, this is the first time we detect multiple refs |
| // do this here, because the enqueued_multiRef_arrays_or_lists.add above makes |
| // the 2nd and subsequent multi-ref things bypass the enqueue call. |
| // - only needed for isDynamicMultiRef, because only that returns true for |
| // isMultiRef_enqueue |
| // for the "first" instance, when it isn't yet known. |
| if (isDynamicMultiRef) { |
| multiRefFSs.add(startOfList_node); |
| } |
| } |
| } else if (startOfList_node instanceof FSList && !alreadyVisited) { |
| // also, we need to enqueue any FSs reachable from an FSList |
| enqueueFSListElements((FSList) startOfList_node); |
| } |
| break; |
| } |
| } |
| } // end of loop over all features |
| } |
| |
| /** |
| * Enqueues all FS reachable from an FSArray. |
| * |
| * @param addr |
| * Address of an FSArray |
| */ |
| private void enqueueFSArrayElements(FSArray fsArray) throws SAXException { |
| for (TOP elem : fsArray._getTheArray()) { |
| if (elem != null) { |
| enqueueFsAndMaybeFeatures(elem); |
| } |
| } |
| } |
| |
| /** |
| * Enqueues all Head values of FSList reachable from an FSList. This does NOT include the list |
| * nodes themselves. |
| * |
| * @param addr |
| * Address of an FSList |
| */ |
| private void enqueueFSListElements(FSList<TOP> node) throws SAXException { |
| node.walkList_saxException( |
| n -> enqueueFsAndMaybeFeatures(((NonEmptyFSList<TOP>) n).getHead()), null); |
| } |
| |
| /* |
| * Encode the indexed FS in the queue. |
| */ |
| public void encodeIndexed() throws Exception { |
| if (null != previouslySerializedFSs) { |
| encodeFSs(previouslySerializedFSs); |
| } |
| if (null != modifiedEmbeddedValueFSs) { |
| encodeFSs(modifiedEmbeddedValueFSs); |
| } |
| for (List<TOP> fss : indexedFSs) { |
| if (fss != null) { |
| encodeFSs(fss); |
| } |
| } |
| } |
| |
| private void encodeFSs(final List<TOP> fss) throws Exception { |
| for (TOP fs : fss) { |
| encodeFS(fs); |
| } |
| } |
| |
| /* |
| * Encode all other enqueued (non-indexed) FSs. The queue is read out in FiFo order. This |
| * insures that FsLists which are only referenced via a single FS ref, get encoded as [ x x x ] |
| * format rather than as individual FSs (because the individual items are also in the queue as |
| * items, but later). The isWritten test prevents dupl writes |
| */ |
| public void encodeQueued() throws Exception { |
| |
| for (TOP fs : queue) { |
| // for some serializers, things could be enqueued multiple times in the ref queue |
| // so check if already written, and if so, skip |
| // Case where this happens: JSON serialization with dynamically determined single ref |
| // embedding |
| // - have to enqueue to check if multiple refs, even if embedding eventually |
| if (visited_not_yet_written.contains(fs)) { |
| // skip if JSON dynamically computing whether or not to embed things and there's only one |
| // item - it will be embedded instead |
| if (isDynamicMultiRef && !multiRefFSs.contains(fs)) { |
| continue; // skip writing embeddable item (for JSON dynamic embedding) from Q; will be |
| // written from reference |
| } |
| encodeFS(fs); |
| } |
| } |
| } |
| |
| // public Integer[] collectAllFeatureStructures() { |
| // final int indexedSize = indexedFSs.size(); |
| // final int qSize = queue.size(); |
| // final int rLen = indexedSize + queue.size(); |
| // Integer[] r = new Integer[rLen]; |
| // int i = 0; |
| // for (; i < indexedSize; i++) { |
| // r[i] = indexedFSs.get(i); |
| // } |
| // for (int j = 0; j < qSize; j++) { |
| // r[i++] = queue.get(j); |
| // } |
| // return r; |
| // } |
| |
| /** |
| * Called for JSon Serialization Sort a view, by type and then by begin/end asc/des for subtypes |
| * of Annotation, then by id |
| */ |
| public final Comparator<TOP> sortFssByType = new Comparator<TOP>() { |
| @Override |
| public int compare(TOP fs1, TOP fs2) { |
| int c = Integer.compare(fs1._getTypeImpl().getCode(), fs2._getTypeImpl().getCode()); |
| if (c != 0) { |
| return c; |
| } |
| // final boolean hasSofa = tsi.subsumes(tsi.annotBaseTypeCode, typeCode1); |
| // if (hasSofa) { |
| // c = compareFeat(o1, o2, tsi.annotSofaFeatCode); |
| // if (c != 0) { |
| // return c; |
| // } |
| |
| if (fs1 instanceof Annotation) { |
| Annotation fs1a = (Annotation) fs1; |
| Annotation fs2a = (Annotation) fs2; |
| |
| c = Integer.compare(fs1a.getBegin(), fs2a.getBegin()); |
| if (c != 0) { |
| return c; |
| } |
| |
| c = Integer.compare(fs2a.getEnd(), fs1a.getEnd()); // reverse order |
| if (c != 0) { |
| return c; |
| } |
| |
| // fall thru to do id compare |
| } |
| // not annotation, or equal begin/end/type |
| return Integer.compare(fs1._id, fs2._id); // return in @id order |
| } |
| }; |
| |
| // @formatter:off |
| /** |
| * Encode an individual FS. |
| * |
| * Json has 2 encodings |
| * For type: |
| * "typeName" : [ { "@id" : 123, feat : value .... }, |
| * { "@id" : 456, feat : value .... }, |
| * ... |
| * ], |
| * ... |
| * |
| * For id: |
| * "nnnn" : {"@type" : typeName ; feat : value ...} |
| * |
| * For cases where the top level type is an array or list, there is |
| * a generated feature name, "@collection" whose value is |
| * the list or array of values associated with that type. |
| * |
| * @param fs |
| * the FS to be encoded. |
| * @throws SAXException |
| * passthru |
| */ |
| // @formatter:on |
| public void encodeFS(TOP fs) throws Exception { |
| final int typeCode = fs._getTypeImpl().getCode(); |
| |
| final int typeClass = classifyType(fs._getTypeImpl()); |
| // for JSON, the items reachable via indexes are written first, |
| // and isIndexId = false |
| // The items reachable via refs are written next, and |
| // isIndexId = true; |
| boolean isIndexId = csss.writeFsStart(fs, typeCode); |
| |
| // write the id if needed for reference |
| // - if it is not ref'd via index, and JSON is computing dynamic refs, and it is multiply |
| // ref'd |
| // - skip if not JSON dynamic ref, or in index, or not multiply ref'd |
| if (!isIndexId && isDynamicMultiRef && multiRefFSs.contains(fs)) { |
| csss.writeFsRef(fs); |
| } else { |
| visited_not_yet_written.remove(fs); // mark as written |
| switch (typeClass) { |
| case LowLevelCAS.TYPE_CLASS_FS: |
| csss.writeFs(fs, typeCode); |
| break; |
| |
| case TYPE_CLASS_INTLIST: |
| case TYPE_CLASS_FLOATLIST: |
| case TYPE_CLASS_STRINGLIST: |
| case TYPE_CLASS_FSLIST: |
| csss.writeListsAsIndividualFSs(fs, typeCode); |
| break; |
| |
| case LowLevelCAS.TYPE_CLASS_FSARRAY: |
| case LowLevelCAS.TYPE_CLASS_INTARRAY: |
| case LowLevelCAS.TYPE_CLASS_FLOATARRAY: |
| case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY: |
| case LowLevelCAS.TYPE_CLASS_BYTEARRAY: |
| case LowLevelCAS.TYPE_CLASS_SHORTARRAY: |
| case LowLevelCAS.TYPE_CLASS_LONGARRAY: |
| case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: |
| case LowLevelCAS.TYPE_CLASS_STRINGARRAY: |
| csss.writeArrays(fs, typeCode, typeClass); |
| break; |
| |
| default: |
| throw new RuntimeException("Error classifying FS type."); |
| } |
| |
| csss.writeEndOfIndividualFs(); |
| } |
| } |
| |
| int getElementCountForSharedData() { |
| return (sharedData == null) ? 0 : sharedData.getOutOfTypeSystemElements().size(); |
| } |
| |
| /** |
| * Get the XMI ID to use for an FS. |
| * |
| * @param fs |
| * the FS |
| * @return XMI ID or null |
| */ |
| public String getXmiId(TOP fs) { |
| int v = getXmiIdAsInt(fs); |
| return (v == 0) ? null : Integer.toString(v); |
| } |
| |
| public int getXmiIdAsInt(TOP fs) { |
| if (fs == null) { |
| return 0; |
| } |
| if (isFiltering && null == filterTypeSystem_inner.getType(fs._getTypeImpl().getName())) { // return |
| // as |
| // null |
| // any |
| // references |
| // to |
| // types |
| // not |
| // in |
| // target |
| // TS |
| return 0; |
| } |
| |
| if (sharedData == null) { |
| // in the absence of outside information, just use the FS address |
| return fs._id; |
| } else { |
| return sharedData.getXmiIdAsInt(fs); |
| } |
| |
| } |
| |
| public String getNameSpacePrefix(String uimaTypeName, String nsUri, int lastDotIndex) { |
| // determine what namespace prefix to use |
| String prefix = nsUriToPrefixMap.get(nsUri); |
| if (prefix == null) { |
| if (lastDotIndex != -1) { // have namespace |
| int secondLastDotIndex = uimaTypeName.lastIndexOf('.', lastDotIndex - 1); |
| prefix = uimaTypeName.substring(secondLastDotIndex + 1, lastDotIndex); |
| } else { |
| prefix = "noNamespace"; // is correct for older XMI standard too |
| } |
| // make sure this prefix hasn't already been used for some other namespace |
| // including out-of-type-system types (for XmiCasSerializer) |
| if (nsPrefixesUsed.contains(prefix)) { |
| String basePrefix = prefix; |
| int num = 2; |
| while (nsPrefixesUsed.contains(basePrefix + num)) { |
| num++; |
| } |
| prefix = basePrefix + num; |
| } |
| nsUriToPrefixMap.put(nsUri, prefix); |
| nsPrefixesUsed.add(prefix); |
| } |
| return prefix; |
| } |
| |
| /* |
| * convert to shared string, without interning, reduce GCs |
| */ |
| public String getUniqueString(String s) { |
| String u = uniqueStrings.get(s); |
| if (null == u) { |
| u = s; |
| uniqueStrings.put(s, s); |
| } |
| return u; |
| } |
| |
| public String getTypeNameFromXmlElementName(XmlElementName xe) { |
| final String nsUri = xe.nsUri; |
| if (nsUri == null || nsUri.length() == 0) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| final int pfx = XmiCasSerializer.URIPFX.length; |
| final int sfx = XmiCasSerializer.URISFX.length; |
| |
| String r = (nsUri.startsWith(XmiCasSerializer.DEFAULT_NAMESPACE_URI)) ? "" |
| : nsUri.substring(pfx, nsUri.length() - sfx); |
| r = r.replace('/', '.'); |
| |
| return r + xe.localName; |
| } |
| |
| public boolean isStaticMultiRef(FeatureImpl fi) { |
| return fi.isMultipleReferencesAllowed(); |
| } |
| |
| } |
| |
| /** |
| * Classifies a type. This returns an integer code identifying the type as one of the primitive |
| * types, one of the array types, one of the list types, or a generic FS type (anything else). |
| * <p> |
| * The {@link LowLevelCAS#ll_getTypeClass(int)} method classifies primitives and array types, but |
| * does not have a special classification for list types, which we need for XMI serialization. |
| * Therefore, in addition to the type codes defined on {@link LowLevelCAS}, this method can return |
| * one of the type codes TYPE_CLASS_INTLIST, TYPE_CLASS_FLOATLIST, TYPE_CLASS_STRINGLIST, or |
| * TYPE_CLASS_FSLIST. |
| * |
| * @param ti |
| * the type to classify |
| * @return one of the TYPE_CLASS codes defined on {@link LowLevelCAS} or on this interface. |
| */ |
| public static final int classifyType(TypeImpl ti) { |
| switch (ti.getCode()) { |
| case TypeSystemConstants.intListTypeCode: |
| return TYPE_CLASS_INTLIST; |
| case TypeSystemConstants.floatListTypeCode: |
| return TYPE_CLASS_FLOATLIST; |
| case TypeSystemConstants.stringListTypeCode: |
| return TYPE_CLASS_STRINGLIST; |
| case TypeSystemConstants.fsListTypeCode: |
| return TYPE_CLASS_FSLIST; |
| default: |
| return TypeSystemImpl.getTypeClass(ti); |
| } |
| } |
| |
| private static boolean isArrayOrList(TOP fs) { |
| return fs instanceof CommonPrimitiveArray || fs instanceof FSArray || fs instanceof CommonList; |
| } |
| |
| } |