| /** \file internal_casserializer.cpp . |
| ----------------------------------------------------------------------------- |
| |
| |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| |
| ----------------------------------------------------------------------------- |
| |
| Description: |
| |
| ----------------------------------------------------------------------------- |
| |
| |
| -------------------------------------------------------------------------- */ |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Include dependencies */ |
| /* ----------------------------------------------------------------------- */ |
| |
| //#define DEBUG_VERBOSE |
| #include "uima/pragmas.hpp" |
| |
| #include "uima/macros.h" |
| #include "uima/internal_casserializer.hpp" |
| #include "uima/internal_casimpl.hpp" |
| #include "uima/lowlevel_fsheap.hpp" |
| #include "uima/lowlevel_indexiterator.hpp" |
| #include "uima/result_specification.hpp" |
| #include "uima/casdefinition.hpp" |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Constants */ |
| /* ----------------------------------------------------------------------- */ |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Forward declarations */ |
| /* ----------------------------------------------------------------------- */ |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Types / Classes */ |
| /* ----------------------------------------------------------------------- */ |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Implementation */ |
| /* ----------------------------------------------------------------------- */ |
| |
| using namespace std; |
| namespace uima { |
| namespace internal { |
| |
| |
| void CASSerializer::serializeResultSpec(ResultSpecification const & resultSpec, |
| vector<SerializedCAS::TyNum>& resultSpecTypes, |
| vector<SerializedCAS::TyNum>& resultSpecFeatures) { |
| ResultSpecification::TyTypeOrFeatureSTLSet const & tofSet = resultSpec.getTypeOrFeatureSTLSet(); |
| ResultSpecification::TyTypeOrFeatureSTLSet::const_iterator cit; |
| for (cit = tofSet.begin(); cit != tofSet.end(); ++cit) { |
| TypeOrFeature const & tof = *cit; |
| if (tof.isType()) { |
| Type t = tof.getType(); |
| assert( t.isValid() ); |
| resultSpecTypes.push_back( uima::internal::FSPromoter::demoteType(t) ); |
| } else { |
| Feature f = tof.getFeature(); |
| assert( f.isValid() ); |
| resultSpecFeatures.push_back( uima::internal::FSPromoter::demoteFeature(f) ); |
| } |
| } |
| } |
| |
| |
| bool isInterval(uima::lowlevel::TyFSType first, uima::lowlevel::TyFSType last, vector<uima::lowlevel::TyFSType> const & vec) { |
| size_t i; |
| for (i=0; i<vec.size(); ++i) { |
| if (vec[i] != first + i) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| CASSerializer::CASSerializer(bool bCopyStrings) |
| : iv_bCopyStrings(bCopyStrings) {} |
| |
| CASSerializer::~CASSerializer() {} |
| |
| UnicodeStringRef CASSerializer::createString(UChar const * cpBuf, size_t uiLen, uima::internal::SerializedCAS & rSerializedCAS) { |
| UnicodeStringRef ref(cpBuf, uiLen); |
| if (iv_bCopyStrings) { |
| return rSerializedCAS.addString(ref); |
| } |
| return ref; |
| } |
| |
| |
| void CASSerializer::serializeTypeSystem(uima::internal::CASDefinition const & casDef, uima::internal::SerializedCAS & rSerializedCAS) { |
| uima::lowlevel::TypeSystem const & crTypeSystem = casDef.getTypeSystem(); |
| UnicodeStringRef invalidUSP(rSerializedCAS.iv_emptyString.getBuffer(), rSerializedCAS.iv_emptyString.length()); |
| assert( invalidUSP.getBuffer() != NULL ); |
| assert( invalidUSP.length() == 0); |
| |
| // 1. inheritance vector |
| rSerializedCAS.iv_vecTypeInheritanceTable.clear(); |
| size_t uiTypeNum = crTypeSystem.getNumberOfTypes() + 1; |
| rSerializedCAS.iv_vecTypeInheritanceTable.resize(uiTypeNum, 0); |
| |
| #ifndef NDEBUG |
| vector<uima::lowlevel::TyFSType> vecTypes; |
| crTypeSystem.getAllTypes(vecTypes); |
| assert( isInterval(1, uiTypeNum, vecTypes) ); |
| assert( uiTypeNum == vecTypes.size() + 1); |
| #endif |
| |
| size_t i; |
| assert( 0 == uima::lowlevel::TypeSystem::INVALID_TYPE ); |
| assert( 1 == crTypeSystem.getTopType() ); |
| for (i=2; i<uiTypeNum; ++i) { |
| uima::lowlevel::TyFSType tyChild = (uima::lowlevel::TyFSType) i; |
| uima::lowlevel::TyFSType tyParent = crTypeSystem.getParentType( tyChild ); |
| assert( tyParent <rSerializedCAS.iv_vecTypeInheritanceTable.size() ); |
| rSerializedCAS.iv_vecTypeInheritanceTable[tyChild] = tyParent; |
| } |
| |
| // 2. feature intro vector |
| rSerializedCAS.iv_vecFeatureDefinitionTable.clear(); |
| size_t uiFeatureNum = crTypeSystem.getNumberOfFeatures() + 1; |
| |
| // leave the first three cells empty |
| rSerializedCAS.iv_vecFeatureDefinitionTable.resize(3,0); |
| |
| #ifndef NDEBUG |
| vector<uima::lowlevel::TyFSFeature> vecFeatures; |
| crTypeSystem.getAllFeatures(vecFeatures); |
| assert( isInterval(1, uiFeatureNum, vecFeatures) ); |
| assert( uiFeatureNum == vecFeatures.size() + 1); |
| #endif |
| |
| assert( 0 == uima::lowlevel::TypeSystem::INVALID_FEATURE ); |
| for (i=1; i<uiFeatureNum; ++i) { |
| uima::lowlevel::TyFSFeature tyFeat = (uima::lowlevel::TyFSFeature) i; |
| UIMA_TPRINT("Adding feature with ID: " << tyFeat ); |
| UIMA_TPRINT("Adding feature: " << crTypeSystem.getFeatureName(tyFeat) ); |
| uima::lowlevel::TyFSType tyIntroType = crTypeSystem.getIntroType(tyFeat); |
| uima::lowlevel::TyFSType tyRangeType = crTypeSystem.getRangeType(tyFeat); |
| int tyMultiRefs = crTypeSystem.isMultipleReferencesAllowed(tyFeat) ? 1 : 0; |
| rSerializedCAS.iv_vecFeatureDefinitionTable.push_back( tyIntroType ); |
| rSerializedCAS.iv_vecFeatureDefinitionTable.push_back( tyRangeType ); |
| rSerializedCAS.iv_vecFeatureDefinitionTable.push_back( tyMultiRefs ); |
| } |
| |
| #ifndef NDEBUG |
| for (i=1; i<vecFeatures.size(); ++i) { |
| uima::lowlevel::TyFSFeature tyFeat = vecFeatures[i]; |
| uima::lowlevel::TyFSType tyIntroType = crTypeSystem.getIntroType(tyFeat); |
| uima::lowlevel::TyFSType tyRangeType = crTypeSystem.getRangeType(tyFeat); |
| int tyMultiRefs = crTypeSystem.isMultipleReferencesAllowed(tyFeat) ? 1 : 0; |
| assert( (tyFeat*2) <rSerializedCAS.iv_vecFeatureDefinitionTable.size() ); |
| assert( (tyFeat*2+1) <rSerializedCAS.iv_vecFeatureDefinitionTable.size() ); |
| assert( rSerializedCAS.iv_vecFeatureDefinitionTable[tyFeat*3] == tyIntroType ); |
| assert( rSerializedCAS.iv_vecFeatureDefinitionTable[tyFeat*3+1] == tyRangeType ); |
| assert( rSerializedCAS.iv_vecFeatureDefinitionTable[tyFeat*3+2] == tyMultiRefs ); |
| } |
| #endif |
| |
| |
| // 3. type string table |
| rSerializedCAS.iv_vecTypeSymbolTable.resize(uiTypeNum); |
| assert( rSerializedCAS.iv_vecTypeSymbolTable.size() == uiTypeNum ); |
| rSerializedCAS.iv_vecTypeSymbolTable[0] = invalidUSP; |
| for (i=1; i<uiTypeNum; ++i) { |
| icu::UnicodeString const & crTypeName = crTypeSystem.getTypeName(i); |
| UnicodeStringRef pus = createString( crTypeName.getBuffer(), crTypeName.length(), rSerializedCAS); |
| rSerializedCAS.iv_vecTypeSymbolTable[i] = pus; |
| } |
| |
| // 4. feature string and feature offset table |
| rSerializedCAS.iv_vecFeatureSymbolTable.resize(uiFeatureNum); |
| assert( rSerializedCAS.iv_vecFeatureSymbolTable.size() == uiFeatureNum ); |
| rSerializedCAS.iv_vecFeatureOffsetTable.resize(uiFeatureNum); |
| assert( rSerializedCAS.iv_vecFeatureOffsetTable.size() == uiFeatureNum ); |
| rSerializedCAS.iv_vecFeatureSymbolTable[0] = invalidUSP; |
| rSerializedCAS.iv_vecFeatureOffsetTable[0] = 0; |
| for (i=1; i<uiFeatureNum; ++i) { |
| uima::lowlevel::TyFSFeature tyFeat = i; |
| // string |
| icu::UnicodeString const & crFeatureName = crTypeSystem.getFeatureBaseName(tyFeat); |
| UnicodeStringRef pus = createString( crFeatureName.getBuffer(), crFeatureName.length(), rSerializedCAS); |
| rSerializedCAS.iv_vecFeatureSymbolTable[i] = pus; |
| |
| // offset |
| rSerializedCAS.iv_vecFeatureOffsetTable[i] = crTypeSystem.getFeatureOffset(tyFeat); |
| } |
| |
| // 5. type priorities |
| rSerializedCAS.iv_vecTypePriorityTable.resize(uiTypeNum-1); |
| for (i=1; i<uiTypeNum; ++i) { |
| size_t num = crTypeSystem.getTypePriorityNumber((uima::lowlevel::TyFSType) i); |
| rSerializedCAS.iv_vecTypePriorityTable[num] = i; |
| } |
| |
| // 6. string sub types |
| vector<uima::lowlevel::TyFSType> stringSubTypes; |
| crTypeSystem.getDirectSubTypes( uima::internal::gs_tyStringType, |
| stringSubTypes ); |
| |
| rSerializedCAS.iv_stringSubTypes.clear(); |
| for (i=0; i<stringSubTypes.size(); ++i) { |
| rSerializedCAS.iv_stringSubTypes.push_back(stringSubTypes[i]); |
| } |
| rSerializedCAS.iv_stringSubTypeValues.clear(); |
| rSerializedCAS.iv_stringSubTypeValuePos.clear(); |
| for (i=0; i<rSerializedCAS.iv_stringSubTypes.size(); ++i) { |
| size_t n = rSerializedCAS.iv_stringSubTypeValues.size(); |
| rSerializedCAS.iv_stringSubTypeValuePos.push_back(n); |
| |
| vector<icu::UnicodeString> const & stringValues = crTypeSystem.getStringsForStringSubtype(rSerializedCAS.iv_stringSubTypes[i]); |
| size_t j; |
| for (j=0; j<stringValues.size(); ++j) { |
| UnicodeStringRef ref(stringValues[j]); |
| rSerializedCAS.iv_stringSubTypeValues.push_back( ref ); |
| } |
| } |
| assert( rSerializedCAS.iv_stringSubTypes.size() == rSerializedCAS.iv_stringSubTypeValuePos.size() ); |
| } |
| |
| |
| #if defined( _MSC_VER ) |
| // locally disable warning about conversion from 'uima::internal::SerializedCAS::TyNum' to 'const int', possible loss of data |
| # pragma warning( disable: 4244 ) |
| #endif |
| void CASSerializer::serializeIndexDefinition(uima::internal::CASDefinition const & casdef, uima::internal::SerializedCAS & rSerializedCAS) { |
| uima::lowlevel::IndexDefinition const & indexDef = casdef.getIndexDefinition(); |
| |
| vector<uima::lowlevel::IndexDefinition::TyIndexID> vecIndexIDs; |
| |
| indexDef.getAllIndexIDs(vecIndexIDs); |
| size_t uiIndexNum = vecIndexIDs.size(); |
| rSerializedCAS.iv_vecIndexIDTable.resize(uiIndexNum); |
| rSerializedCAS.iv_vecComparatorStartTable.resize(uiIndexNum); |
| rSerializedCAS.iv_vecIndexKindTable.resize(uiIndexNum); |
| rSerializedCAS.iv_vecComparatorDefinitionTable.clear(); |
| |
| size_t i; |
| for (i=0; i<uiIndexNum; ++i) { |
| uima::lowlevel::IndexDefinition::TyIndexID const & crIndexID = vecIndexIDs[i]; |
| rSerializedCAS.iv_vecIndexIDTable[i] = createString( crIndexID.getBuffer(), |
| crIndexID.length(), rSerializedCAS); |
| rSerializedCAS.iv_vecIndexKindTable[i] = indexDef.getIndexKind( crIndexID ); |
| UIMA_TPRINT("Index ID: " << crIndexID ); |
| // start of the next comparator definition |
| // is at the end of rSerializedCAS.iv_vecComparatorDefinitionTable |
| rSerializedCAS.iv_vecComparatorStartTable[i] = rSerializedCAS.iv_vecComparatorDefinitionTable.size(); |
| |
| // add type of the index |
| uima::lowlevel::TyFSType indexType = indexDef.getTypeForIndex(crIndexID); |
| // add type of the comparator (even if the index has none) |
| rSerializedCAS.iv_vecComparatorDefinitionTable.push_back( indexType ); |
| |
| uima::lowlevel::IndexComparator const * pComparator = indexDef.getComparator( crIndexID ); |
| if ( pComparator != NULL ) { |
| UIMA_TPRINT(" Index has comparator!"); |
| assert( pComparator->getType() == indexType ); |
| // serialize comparator |
| vector<uima::lowlevel::TyFSFeature> const & crKeyFeatures = pComparator->getKeyFeatures(); |
| vector<uima::lowlevel::IndexComparator::EnKeyFeatureComp> const & crCompOps = pComparator->getComparisonOps(); |
| assert( crKeyFeatures.size() == crCompOps.size() ); |
| // add all key features |
| size_t j; |
| for (j=0; j<crKeyFeatures.size(); ++j) { |
| rSerializedCAS.iv_vecComparatorDefinitionTable.push_back( (SerializedCAS::TyNum) crKeyFeatures[j] ); |
| rSerializedCAS.iv_vecComparatorDefinitionTable.push_back( (SerializedCAS::TyNum) crCompOps[j] ); |
| } |
| } |
| } |
| } |
| |
| #ifdef BYEBYEPTRS |
| SerializedCAS::TyNum CASSerializer::adjustString(uima::lowlevel::TyHeapCell tyFeatureCell, |
| TyStringMap & stringMap, |
| uima::internal::SerializedCAS & rSerializedCAS) { |
| UIMA_TPRINT("adjustString() entered"); |
| UChar* * pPointerStringRefHeap = (UChar * *) tyFeatureCell; |
| UChar * puc = *pPointerStringRefHeap; |
| if (puc == NULL) { |
| return 0; |
| } |
| assert( puc != NULL); |
| assert( EXISTS(puc) ); |
| assert( EXISTS(pPointerStringRefHeap+1) ); |
| |
| SerializedCAS::TyNum iStrLen = (SerializedCAS::TyNum) * (pPointerStringRefHeap+1); |
| |
| ptrdiff_t iStringIndex = 0; |
| |
| // try to find the string |
| TyStringMap::iterator it = stringMap.lower_bound( puc ); |
| // if not found |
| if ( (it == stringMap.end()) || ( (*it).first != puc ) ) { |
| iStringIndex = stringMap.size() + 1; |
| // insert new one |
| TyStringMap::value_type vt(puc, iStringIndex); |
| stringMap.insert(it, vt); |
| UIMA_TPRINT(" iStringIndex: " << iStringIndex << ", StringSymblTableSize: " << rSerializedCAS.iv_vecStringSymbolTable.size()); |
| assert( iStringIndex == rSerializedCAS.iv_vecStringSymbolTable.size() ); |
| UnicodeStringRef ustrp = createString(puc, iStrLen, rSerializedCAS); |
| rSerializedCAS.iv_vecStringSymbolTable.push_back(ustrp); |
| } else { |
| iStringIndex = (*it).second; |
| } |
| |
| return iStringIndex; |
| } |
| #endif |
| |
| void CASSerializer::serializeFSHeapAndStringHeap(uima::CAS const & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { |
| uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); |
| uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); |
| uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap; |
| |
| // copy the FSHeap as is (all offsets and values) |
| size_t uiSegmentLength = tyTempHeap.getTopOfHeap(); |
| uima::lowlevel::TyHeapCell* daHeap = tyTempHeap.getHeapStart(); |
| rSerializedCAS.iv_vecFSHeapArray.resize(uiSegmentLength); |
| // copy the heap (better way to do this?) |
| for (size_t i=0; i<uiSegmentLength; i++) { |
| rSerializedCAS.iv_vecFSHeapArray[i] = daHeap[i]; |
| } |
| |
| // fill the vector of strings from the StringRefHeap |
| uima::lowlevel::FSHeap::TyStringHeap const & tyStringHeap = crHeap.iv_clTemporaryStringHeap; |
| uima::lowlevel::FSHeap::TyStringRefHeap const & tyStringRefHeap = crHeap.iv_clTemporaryStringRefHeap; |
| int uiStringRefLength = tyStringRefHeap.getTopOfHeap(); |
| int j = 1; // point at the first entry |
| rSerializedCAS.iv_vecStringSymbolTable.resize(1); |
| while (j < uiStringRefLength) { |
| UnicodeStringRef ustrp = UnicodeStringRef( tyStringHeap.getHeapStart()+ |
| tyStringRefHeap.getHeapValue(j), |
| (size_t) tyStringRefHeap.getHeapValue(j+1)); |
| rSerializedCAS.iv_vecStringSymbolTable.push_back(ustrp); |
| j += 2; |
| } |
| } |
| |
| |
| |
| void CASSerializer::serializeHeaps(uima::CAS const & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { |
| |
| //serialize the fs heap and string heap |
| serializeFSHeapAndStringHeap(crCAS, rSerializedCAS); |
| |
| uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); |
| uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); |
| |
| //8 bit heap |
| uima::lowlevel::FSHeap::Ty8BitHeap const & ty8BitHeap = crHeap.iv_clTemporary8BitHeap; |
| size_t uiSegmentLength = ty8BitHeap.getTopOfHeap(); |
| char* byteHeap = ty8BitHeap.getHeapStart(); |
| rSerializedCAS.iv_vecByteHeapArray.resize(uiSegmentLength); |
| for (size_t i=0; i<uiSegmentLength; i++) { |
| rSerializedCAS.iv_vecByteHeapArray[i] = byteHeap[i]; |
| } |
| |
| //16 bit heap |
| uima::lowlevel::FSHeap::Ty16BitHeap const & ty16BitHeap = crHeap.iv_clTemporary16BitHeap; |
| uiSegmentLength = ty16BitHeap.getTopOfHeap(); |
| short* shortHeap = ty16BitHeap.getHeapStart(); |
| rSerializedCAS.iv_vecShortHeapArray.resize(uiSegmentLength); |
| for (size_t i=0; i<uiSegmentLength; i++) { |
| rSerializedCAS.iv_vecShortHeapArray[i] = shortHeap[i]; |
| } |
| |
| //64 bit heap |
| uima::lowlevel::FSHeap::Ty64BitHeap const & ty64BitHeap = crHeap.iv_clTemporary64BitHeap; |
| uiSegmentLength = ty64BitHeap.getTopOfHeap(); |
| INT64* longHeap = ty64BitHeap.getHeapStart(); |
| rSerializedCAS.iv_vecLongHeapArray.resize(uiSegmentLength); |
| for (size_t i=0; i<uiSegmentLength; i++) { |
| rSerializedCAS.iv_vecLongHeapArray[i] = longHeap[i]; |
| } |
| |
| |
| |
| } |
| |
| |
| |
| |
| |
| |
| //--------------------------------------------------------------------- |
| // Indexed FS Format |
| // |
| // Element Size Number of Description |
| // (bytes) Elements |
| // ------------ --------- -------------------------------- |
| // 4 1 Number of Views in this CAS |
| // 4 1 Number of Sofas in base Index Repository = nBase |
| // 4 nBase TyFS array |
| // |
| // For each View: |
| // 4 1 Number of FS in sofa Index Repository = nFS |
| // 4 nFS TyFS array |
| // |
| //--------------------------------------------------------------------- |
| |
| |
| void CASSerializer::serializeIndexedFSs(uima::CAS & crCAS, |
| vector<uima::internal::SerializedCAS::TyNum> & iv_vecIndexedFSs) { |
| |
| uima::internal::CASImpl & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); |
| |
| int numViews = crCAS.getBaseCas()->iv_sofaCount; |
| iv_vecIndexedFSs.clear(); |
| iv_vecIndexedFSs.push_back(numViews); |
| |
| uima::lowlevel::IndexRepository * crIndexRep = |
| (uima::lowlevel::IndexRepository*)&crCASImpl.getBaseIndexRepository(); |
| |
| for (int view=0; view<=numViews; view++) { |
| vector<SerializedCAS::TyNum> perLoopIndexedFSs; |
| vector<uima::lowlevel::TyFSType> vecAllTypes; |
| perLoopIndexedFSs.clear(); |
| if (view==0) { |
| // First time through is for base CAS index |
| // FS returned should only be for SofaFS! |
| crIndexRep->getUsedIndexes(vecAllTypes); |
| } else { |
| // for all views found in the CAS, get new IndexRepository |
| crIndexRep = crCASImpl.iv_baseCas->iv_sofa2indexMap[view]; |
| if (crIndexRep == 0) { |
| // no indexed FS for this View, move on |
| iv_vecIndexedFSs.push_back(0); |
| continue; |
| } |
| crIndexRep->getUsedIndexes(vecAllTypes); |
| |
| //serialize the undefined index FSs |
| for (size_t i=0;i < crIndexRep->iv_undefinedindex.size(); i++ ) { |
| SerializedCAS::TyNum tyFSHeapIndex = (SerializedCAS::TyNum) crIndexRep->iv_undefinedindex[i]; |
| perLoopIndexedFSs.push_back(tyFSHeapIndex); |
| } |
| } |
| |
| // serialize index per type |
| if ( 0 == vecAllTypes.size() && 0 == perLoopIndexedFSs.size() ) { |
| // no indexed FS for this View, move on |
| iv_vecIndexedFSs.push_back(0); |
| continue; |
| } |
| for (size_t i=0; i<vecAllTypes.size(); ++i) { |
| vector<uima::lowlevel::internal::SingleIndex*> const & crSingleIndexes = |
| crIndexRep->getAllSingleIndexesForType(vecAllTypes[i]); |
| for (size_t j=0; j<crSingleIndexes.size(); ++j) { |
| unique_ptr<uima::lowlevel::IndexIterator> apIt(crSingleIndexes[j]->createIterator()); |
| for (apIt->moveToFirst(); apIt->isValid(); apIt->moveToNext()) { |
| uima::lowlevel::TyHeapCell pHeapCell = (uima::lowlevel::TyHeapCell) apIt->get(); |
| SerializedCAS::TyNum tyFSHeapIndex = (SerializedCAS::TyNum) pHeapCell; |
| perLoopIndexedFSs.push_back( tyFSHeapIndex ); |
| } |
| } |
| } |
| |
| // eliminate duplicates |
| sort(perLoopIndexedFSs.begin(), perLoopIndexedFSs.end()); |
| vector<SerializedCAS::TyNum>::iterator end = unique(perLoopIndexedFSs.begin(), perLoopIndexedFSs.end()); |
| // append indexedFSs from this loop |
| iv_vecIndexedFSs.push_back(end - perLoopIndexedFSs.begin()); |
| iv_vecIndexedFSs.insert(iv_vecIndexedFSs.end(), |
| perLoopIndexedFSs.begin(), |
| end); |
| } |
| } |
| |
| |
| /* no more document in de CAS |
| void CASSerializer::serializeDocument(uima::TCAS const & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { |
| uima::internal::TCASImpl const & crTCASImpl = uima::internal::TCASImpl::promoteCAS(crCAS); |
| rSerializedCAS.iv_ulstrDocument = crTCASImpl.getDocumentText(); |
| } |
| */ |
| |
| void CASSerializer::serializeDefinitions(uima::internal::CASDefinition const & casDef, uima::internal::SerializedCAS & rSerializedCAS) { |
| serializeTypeSystem(casDef, rSerializedCAS); |
| serializeIndexDefinition(casDef, rSerializedCAS); |
| } |
| |
| |
| #ifdef UIMA_ENABLE_SERIALIZATION_TIMING |
| #define UIMA_SERIALIZATION_TIMING(x) x |
| #else |
| #define UIMA_SERIALIZATION_TIMING(x) |
| #endif |
| |
| void CASSerializer::serializeData(uima::CAS & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { |
| // serializeDocument(crCAS, rSerializedCAS); |
| // serialize indexed FSs first so that the docAnnot can be created if necessary |
| UIMA_TPRINT("Serializing indexed FSs"); |
| UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.reset() ); |
| UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.start() ); |
| serializeIndexedFSs(*crCAS.getBaseCas(), rSerializedCAS.iv_vecIndexedFSs); |
| UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.stop() ); |
| UIMA_TPRINT("indexed FSs serialized"); |
| |
| UIMA_TPRINT("serializing all heaps"); |
| UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.reset() ); |
| UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.start() ); |
| //serializeFSHeapAndStringHeap(*crCAS.getBaseCas(), rSerializedCAS); |
| serializeHeaps(*crCAS.getBaseCas(), rSerializedCAS); |
| UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.stop() ); |
| UIMA_TPRINT("FS heap serialized"); |
| } |
| |
| //--------------------------------------------------------------------- |
| // Blob Format |
| // |
| // Element Size Number of Description |
| // (bytes) Elements |
| // ------------ --------- -------------------------------- |
| // 4 1 Blob key = "UIMA" in utf-8 |
| // 4 1 Version (currently = 1) |
| // 4 1 size of 32-bit FS Heap array = s32H |
| // 4 s32H 32-bit FS heap array |
| // 4 1 size of 16-bit string Heap array = sSH |
| // 2 sSH 16-bit string heap array |
| // 4 1 size of string Ref Heap array = sSRH |
| // 4 2*sSRH string ref offsets and lengths |
| // 4 1 size of FS index array = sFSI |
| // 4 sFSI FS index array |
| // 4 1 size of 8-bit Heap array = s8H |
| // 1 s8H 8-bit Heap array |
| // 4 1 size of 16-bit Heap array = s16H |
| // 2 s16H 16-bit Heap array |
| // 4 1 size of 64-bit Heap array = s64H |
| // 8 s64H 64-bit Heap array |
| //--------------------------------------------------------------------- |
| |
| |
| // estimate total size of serialized CAS data |
| size_t CASSerializer::getBlobSize(uima::CAS & crCAS) { |
| |
| // create STL vector of indexed FS so that we can size the output |
| UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.reset() ); |
| UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.start() ); |
| serializeIndexedFSs(*crCAS.getBaseCas(), iv_vecIndexedFSs); |
| UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.stop() ); |
| |
| // get a heap of references |
| uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); |
| uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); |
| uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap; |
| uima::lowlevel::FSHeap::TyStringHeap const & tyStringHeap = crHeap.iv_clTemporaryStringHeap; |
| uima::lowlevel::FSHeap::TyStringRefHeap const & tyStringRefHeap = crHeap.iv_clTemporaryStringRefHeap; |
| uima::lowlevel::FSHeap::Ty8BitHeap const & ty8BitHeap = crHeap.iv_clTemporary8BitHeap; |
| uima::lowlevel::FSHeap::Ty16BitHeap const & ty16BitHeap = crHeap.iv_clTemporary16BitHeap; |
| uima::lowlevel::FSHeap::Ty64BitHeap const & ty64BitHeap = crHeap.iv_clTemporary64BitHeap; |
| |
| size_t uiFSHeapLength = tyTempHeap.getTopOfHeap(); |
| size_t uiStringHeapLength = tyStringHeap.getTopOfHeap(); |
| size_t uialignedStrLen = 2 * ((uiStringHeapLength + 1)/2); |
| size_t uiRefHeapLength = tyStringRefHeap.getTopOfHeap(); |
| size_t uiIndexedFSLength = iv_vecIndexedFSs.size(); |
| size_t ui8BitHeapLength = ty8BitHeap.getTopOfHeap(); |
| size_t uialigned8BitHeapLength = 4 * ((ui8BitHeapLength+3)/4); |
| size_t ui16BitHeapLength = ty16BitHeap.getTopOfHeap(); |
| size_t uialigned16BitHeapLength = 2 * ((ui16BitHeapLength+1)/2); |
| size_t ui64BitHeapLength = ty64BitHeap.getTopOfHeap(); |
| |
| |
| |
| size_t blobSize = 2*4 // key and version |
| + (1 + uiFSHeapLength) * 4 // FSHeap length and data |
| + 1*4 + (uialignedStrLen * 2) // StringHeap length and data |
| + (1 + uiRefHeapLength) * 4 // StringRefheap length and data |
| + (1 + uiIndexedFSLength) * 4 // Indexed FS length and data |
| + (1*4 + uialigned8BitHeapLength) // 8 Bit Heap length and data |
| + (1*4 + uialigned16BitHeapLength*2 ) //16 Bit Heap length and data |
| + (1*4 + ui64BitHeapLength*8 ); //64 Bit Heap length and data |
| return blobSize; |
| } |
| |
| // serialize CAS data into single blob format |
| size_t CASSerializer::getBlob(uima::CAS & crCAS, void * buffer, size_t maxSize) { |
| |
| UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.reset() ); |
| UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.start() ); |
| |
| // get a heap of references |
| uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); |
| uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); |
| uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap; |
| uima::lowlevel::FSHeap::TyStringHeap const & tyStringHeap = crHeap.iv_clTemporaryStringHeap; |
| uima::lowlevel::FSHeap::TyStringRefHeap const & tyStringRefHeap = crHeap.iv_clTemporaryStringRefHeap; |
| uima::lowlevel::FSHeap::Ty8BitHeap const & ty8BitHeap = crHeap.iv_clTemporary8BitHeap; |
| uima::lowlevel::FSHeap::Ty16BitHeap const & ty16BitHeap = crHeap.iv_clTemporary16BitHeap; |
| uima::lowlevel::FSHeap::Ty64BitHeap const & ty64BitHeap = crHeap.iv_clTemporary64BitHeap; |
| |
| size_t uiFSHeapLength = tyTempHeap.getTopOfHeap(); |
| size_t uiStringHeapLength = tyStringHeap.getTopOfHeap(); |
| size_t uialignedStrLen = 2 * ((uiStringHeapLength + 1)/2); |
| size_t uiRefHeapLength = tyStringRefHeap.getTopOfHeap(); |
| size_t uiIndexedFSLength = iv_vecIndexedFSs.size(); |
| |
| size_t ui8BitHeapLength = ty8BitHeap.getTopOfHeap(); |
| size_t uialigned8BitHeapLength = 4 * ((ui8BitHeapLength+3)/4); |
| size_t ui16BitHeapLength = ty16BitHeap.getTopOfHeap(); |
| size_t uialigned16BitHeapLength = 2 * ((ui16BitHeapLength+1)/2); |
| size_t ui64BitHeapLength = ty64BitHeap.getTopOfHeap(); |
| |
| size_t blobSize = 2*4 // key and version |
| + (1 + uiFSHeapLength) * 4 // FSHeap length and data |
| + 1*4 + (uialignedStrLen * 2) // StringHeap length and data |
| + (1 + uiRefHeapLength) * 4 // StringRefheap length and data |
| + (1 + uiIndexedFSLength) * 4 // Indexed FS length and data |
| + (1*4 + uialigned8BitHeapLength) // 8 Bit Heap length and data |
| + (1*4 + uialigned16BitHeapLength*2 ) //16 Bit Heap length and data |
| + (1*4 + ui64BitHeapLength*8 ); //64 Bit Heap length and data |
| |
| |
| if (blobSize > maxSize) { |
| return 0; // can't serialize into given buffer |
| } |
| |
| // copy all data into the blob buffer |
| int* intPtr = (int*) buffer; |
| |
| #if defined(WORDS_BIGENDIAN) |
| char key[] = "UIMA"; |
| #else |
| char key[] = "AMIU"; |
| #endif |
| int version = 1; |
| intPtr[0] = ((int*)key)[0]; |
| intPtr[1] = version; |
| intPtr[2] = uiFSHeapLength; |
| assert (blobSize > (size_t)((intPtr + 3 + uiFSHeapLength) - (int*)buffer)); |
| memcpy(intPtr+3, tyTempHeap.getHeapStart(), 4*uiFSHeapLength); |
| intPtr += 3 + uiFSHeapLength; |
| |
| intPtr[0] = uialignedStrLen; |
| assert (blobSize > (size_t)((intPtr + 1 + uiStringHeapLength/2) - (int*)buffer)); |
| memcpy(intPtr+1, tyStringHeap.getHeapStart(), 2*uiStringHeapLength); |
| intPtr += 1 + uialignedStrLen/2; |
| |
| intPtr[0] = uiRefHeapLength; |
| assert (blobSize > (size_t)((intPtr + 1 + uiRefHeapLength) - (int*)buffer)); |
| memcpy(intPtr+1, tyStringRefHeap.getHeapStart(), 4*uiRefHeapLength); |
| intPtr += 1 + uiRefHeapLength; |
| |
| intPtr[0] = uiIndexedFSLength; |
| assert (blobSize >= (size_t)((intPtr + 1 + uiIndexedFSLength) - (int*)buffer)); |
| memcpy(intPtr+1, &iv_vecIndexedFSs[0], 4*uiIndexedFSLength); |
| intPtr += 1 + uiIndexedFSLength; |
| |
| intPtr[0] = uialigned8BitHeapLength; |
| assert (blobSize > (size_t)((intPtr + 1 + uialigned8BitHeapLength/4) - (int*)buffer)); |
| memcpy(intPtr+1, ty8BitHeap.getHeapStart(), ui8BitHeapLength); |
| intPtr += 1 + uialigned8BitHeapLength/4; |
| |
| intPtr[0] = uialigned16BitHeapLength; |
| assert (blobSize > (size_t)((intPtr + 1 + ui16BitHeapLength/2) - (int*)buffer)); |
| memcpy(intPtr+1, ty16BitHeap.getHeapStart(), 2*ui16BitHeapLength); |
| intPtr += 1 + uialigned16BitHeapLength/2; |
| |
| intPtr[0] = ui64BitHeapLength; |
| assert (blobSize > (size_t)((intPtr + 1 + ui64BitHeapLength*2) - (int*)buffer)); |
| memcpy(intPtr+1, ty64BitHeap.getHeapStart(), 8*ui64BitHeapLength); |
| |
| UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.stop() ); |
| return blobSize; |
| } |
| |
| |
| |
| |
| } |
| |
| } |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| |
| |