| /** \file xmideserializer_handler.hpp . |
| |
| |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| |
| |
| \brief SAX2 handler for reading XMI into a CAS. |
| |
| -------------------------------------------------------------------------- */ |
| #ifndef __UIMA_XMIDESER_HANDLER_HPP |
| #define __UIMA_XMIDESER_HANDLER_HPP |
| |
| |
| // --------------------------------------------------------------------------- |
| // Includes |
| // --------------------------------------------------------------------------- |
| |
| #include "uima/pragmas.hpp" //must be first to surpress warnings |
| #include <map> |
| #include <stack> |
| #include <utility> |
| #include "xercesc/sax2/DefaultHandler.hpp" |
| #include "uima/internal_casimpl.hpp" |
| #include "xmishareddata.hpp" |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Forward declarations */ |
| /* ----------------------------------------------------------------------- */ |
| namespace uima { |
| class FeatureStructure; |
| class FSIndexRepository; |
| class SofaFS; |
| class CAS; |
| class AnnotationFS; |
| class AnnotatorContext; |
| namespace lowlevel { |
| class IndexRepository; |
| class FSHeap; |
| class TypeSystem; |
| } |
| namespace internal { |
| class CASImpl; |
| } |
| } |
| |
| XERCES_CPP_NAMESPACE_USE |
| |
| |
| namespace uima { |
| |
| /** |
| The class <TT>XmiDeserializerHandler</TT> implements a SAX2 handler for XMI format |
| */ |
| |
| #define DOC_STATE 0 |
| #define FS_STATE 1 |
| #define FEAT_STATE 2 |
| #define FEAT_CONTENT_STATE 3 |
| #define IGNORING_XMI_ELEMENTS_STATE 4 |
| |
| class XmiDeserializerHandler : public DefaultHandler { |
| public: |
| // ----------------------------------------------------------------------- |
| // Constructors and Destructor |
| // ----------------------------------------------------------------------- |
| XmiDeserializerHandler(CAS & cas, XmiSerializationSharedData * xmiSharedData, bool lenient=true); |
| ~XmiDeserializerHandler(); |
| |
| void startDocument(); |
| void startElement(const XMLCh* const uri, |
| const XMLCh* const localname, |
| const XMLCh* const qname, |
| const Attributes& attrs); |
| void characters(const XMLCh* const chars, |
| const XMLSize_t length); |
| void endDocument(); |
| void endElement(const XMLCh* const uri, |
| const XMLCh* const localname, |
| const XMLCh* const qname |
| ); |
| void ignorableWhitespace(const XMLCh* const chars, |
| const unsigned int length); |
| void setDocumentLocator(const Locator* const locator); |
| |
| |
| void warning(const SAXParseException& exception); |
| void error(const SAXParseException& exception); |
| void fatalError(const SAXParseException& exception); |
| |
| |
| private: |
| |
| //void readFS(icu:UnicodeString & qualifiedName, const Attributes & attrs); |
| void readFS(icu::UnicodeString & nsUri, |
| icu::UnicodeString & localName, |
| icu::UnicodeString & qualifiedName, |
| const Attributes & attrs); |
| void readFS(lowlevel::TyFS addr, const Attributes & attrs, bool toIndex); |
| void handleFeature(lowlevel::TyFS addr, |
| icu::UnicodeString & featName, |
| icu::UnicodeString & featVal, |
| bool lenient); |
| void handleFeature(Type & type, lowlevel::TyFS addr, |
| lowlevel::TyFSFeature featCode, |
| icu::UnicodeString & featVal, |
| bool lenient); |
| |
| void finalizeFS(int addr); |
| void finalizeArray(Type & type, lowlevel::TyFS addr); |
| |
| const Locator * iv_locator; |
| CAS * iv_cas; |
| internal::CASImpl & iv_casimpl; |
| const lowlevel::TypeSystem * iv_typesystem; |
| int iv_state; |
| icu::UnicodeString buffer; |
| |
| // The address of the most recently created FS. Needed for array elements |
| // and embedded feature values. |
| lowlevel::TyFS currentAddr; |
| |
| // The name of the content feature, if we've seen one. |
| icu::UnicodeString currentContentFeat; |
| |
| // The current position when parsing array elements. |
| size_t arrayPos; |
| |
| // The type of the array we're currently reading. Needed for proper |
| // treatment of array element values. |
| lowlevel::TyFS arrayType; |
| |
| // SofaFS type |
| int sofaTypeCode; |
| |
| // Store IndexRepositories in a vector; |
| std::vector<uima::lowlevel::IndexRepository *> indexRepositories; |
| |
| // Store CAS Views in a vector |
| std::vector<CAS*> tcasInstances; |
| |
| int nextIndex; |
| |
| icu::UnicodeString xmiElementName2uimaTypeName(icu::UnicodeString& nameSpaceURI, icu::UnicodeString& localName); |
| int createByteArray(icu::UnicodeString& currentArrayElements, int currentArrayId); |
| void remapFSListHeads(int addr); |
| |
| void tokenize(icu::UnicodeString&, std::vector<std::string>&); |
| int createIntList( std::vector<std::string>& featVal); |
| int createFloatList( std::vector<std::string>& featVal); |
| int createStringList( std::vector<std::string>& featVal); |
| int createFSList( std::vector<std::string>& featVal); |
| |
| void addArrayElement(lowlevel::TyFS addr,lowlevel::TyFSType arrayType, |
| int arrayPos, std::string & buffer); |
| |
| void handleFeature(lowlevel::TyFS addr, icu::UnicodeString & featName, |
| std::vector<std::string> & featVal); |
| |
| void handleFeature(lowlevel::TyFS addr, lowlevel::TyFSFeature featCode, |
| lowlevel::TyFSType rangeTypeCode,std::vector<std::string> & featVal); |
| |
| int createArray( lowlevel::TyFSType typeCode, |
| std::vector<std::string>& featVal, int xmiID); |
| |
| void processView(int sofaXmiId, icu::UnicodeString & membersString) ; |
| int getFsAddrForXmiId(int xmiId); |
| void addToOutOfTypeSystemData(XmlElementName * xmlElementName, const Attributes & attrs); |
| void addOutOfTypeSystemFeature(OotsElementData * ootsElem, |
| icu::UnicodeString & featName, std::vector<icu::UnicodeString> & featVals); |
| |
| // container for data shared between the XmiCasSerialier and |
| // XmiDeserializer, to support things such as consistency of IDs across |
| // multiple serializations. This is also where the map from xmi:id to |
| // FS address is stored. |
| XmiSerializationSharedData * sharedData; |
| bool ownsSharedData; |
| |
| //Current out-of-typesystem element, if any |
| OotsElementData * outOfTypeSystemElement; |
| |
| // Store address of every FS we've deserialized, since we need to back |
| // and apply fix-ups afterwards. |
| std::vector<int> deserializedFsAddrs; |
| |
| // map from namespace prefixes to URIs. |
| std::map<icu::UnicodeString, icu::UnicodeString> nsPrefixToUriMap; |
| // map from xmi namespace to uima namespace |
| std::map<icu::UnicodeString, icu::UnicodeString> xmiNamespaceToUimaNamespaceMap; |
| |
| //typename - values |
| std::map<icu::UnicodeString, std::vector<icu::UnicodeString>* > multiValuedFeatures; |
| int ignoreDepth; |
| |
| // The type of the most recently created FS. Needed for arrays, also |
| // useful for embedded feature values. |
| Type currentType; |
| |
| // the ID and values of arrays are stored on startElement, then used on |
| // endElement to actually create the array. This is because in the case of |
| // String arrays serialized with the values as child elements, we can't create |
| // the array until we've seen all of the child elements. |
| int currentArrayId; |
| icu::UnicodeString currentArrayElements; |
| |
| int nextSofaNum; //number of sofas found so far |
| |
| // Store a separate vector of FSList nodes that were deserialized |
| // from multivalued properties. |
| // These are special because their "head" feature needs remapping but their "tail" feature |
| // doesn't. |
| std::vector<int> fsListNodesFromMultivaluedProperties; |
| bool lenient; |
| |
| static char const * XMI_ID_ATTR_NAME; |
| static char const * TRUE_VALUE; |
| static char const * DEFAULT_CONTENT_FEATURE; |
| static char const * DEFAULT_NAMESPACE_URI; |
| |
| }; |
| |
| } // namespace uima |
| |
| #endif //__UIMA_XMIDESER_HANDLER_HPP |
| |