blob: 989bd3c59af3b2e32e8cdea393848688191cc349 [file] [log] [blame]
#ifndef UIMA_INTERNAL_SERIALIZEDCAS_HPP
#define UIMA_INTERNAL_SERIALIZEDCAS_HPP
/** \file internal_serializedcas.hpp .
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
Description:
-----------------------------------------------------------------------------
-------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Include dependencies */
/* ----------------------------------------------------------------------- */
#include "uima/pragmas.hpp" // must be first file to be included to get pragmas
#include <vector>
#include "uima/types.h"
#include "uima/unistrref.hpp"
#include "uima/lowlevel_internal_heap.hpp"
/* ----------------------------------------------------------------------- */
/* Constants */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Forward declarations */
/* ----------------------------------------------------------------------- */
namespace uima {
namespace internal {
class CASSerializer;
}
}
/* ----------------------------------------------------------------------- */
/* Types / Classes */
/* ----------------------------------------------------------------------- */
namespace uima {
namespace internal {
/**
* SerializedCAS is the container for all data structure which are the
* result of a serialized CAS/CASDefinition.
*
* The data format is now described in detail:
* 1. Type system: In general, every type and feature is basically an integer > 0.
* - type inheritance table: an integer array which for each type contains its parent,
* e.g. iv_vecTypeInheritanceTable[ t ] is t's parent.
* (length: number of types)
* - feature definition table: an integer array containing for each feature its introduction
* and range type, i.e.,
* iv_vecFeatureDefinitionTable[ 2*f ] is the intro type of f and
* iv_vecFeatureDefinitionTable[ 2*f+1 ] is the range type of f and
* (length: 2 * (number of features+1) )
* - feature offset table: an integer array indicating for each feature its offset
* on the heap, i.e.,
* iv_vecFeatureOffsetTable[ f ]
* (length: number of features)
* - type priority table: integer array indicating the internal type priority number:
* Type t1 has priority over t2 iff
* iv_vecTypePriorityTable[ t1 ] < iv_vecTypePriorityTable[ t2 ]
* (length: number of types)
* - type symbol table: string array indicating the name of each type:
* iv_vecTypeSymbolTable[ t ] is the fully qualified name of t
* (length: number of types)
* - feature symbol table: string array indicating the name of each feature:
* iv_vecFeatureSymbolTable[ f ] is the fully qualified name of f
* (length: number of features)
* - string sub types: integer array indicating which types are string sub types
* iv_stringSubTypes[ i ] is the ith string sub types
* (length: number of string sub types(
* - string subtype values: string array containing the string values for each string subtype:
* iv_stringSubTypeValues[ i ] is some string sub type value
* (length: nunber of string subtype values)
* - string subtype value positions: integer array indicating where the string values in
* the array above start:
* iv_stringSubTypeValuePos[ t ] is the index where the string sub types in
* iv_stringSubTypeValues start.
*
* 2. Index definitions:
* - index ID table: string array indication the names of the indexes:
* iv_vecIndexIDTable[ i ] is the name of the ith index
* (length: number of indexes)
* - comparator definition table: integer array of pairs of a feature and a comp ID:
* iv_vecComparatorDefinitionTable[ i ] is a feature and
* iv_vecComparatorDefinitionTable[ i+1 ] is an integer indicating if the comparison
* is standard or reverse
* for some adequate i (see next bullet)
* (length: number of overall features for all comparators)
* - comparator start table: integer array indicating for each index ID where its
* comparator description starts in the comparator definition table:
* iv_vecComparatorStartTable[ i ] is an index into iv_vecComparatorDefinitionTable for
* the index with the name iv_vecIndexIDTable[ i ]
* (length: number of indexes)
*
* 3. The Feature Structure Heap:
* - heap array: the heap as one contiguous integer array:
* iv_vecFSHeapArray[ i ] is the ith heap cell
* (length: overall heap size)
* - string symbol table: the string symbol table for the heap as a string array
* iv_vecStringSYmbolTable[ i ] is the ith string referenced on the heap.
* (length: number of strings referenced on the heap)
*
* 4. Indexes:
* - indexed FSs: integer array containing all indexed FSs
* iv_vecIndexedFSs[ i ] is the ith feature structure in any index
* (length: number of feature structures in any index)
*
*/
class UIMA_LINK_IMPORTSPEC SerializedCAS {
friend class uima::internal::CASSerializer;
public:
typedef WORD32 TyNum;
private:
SerializedCAS & operator=(SerializedCAS const &);
SerializedCAS(SerializedCAS const &);
typedef uima::lowlevel::internal::Heap<UChar> TyStringHeap;
TyStringHeap iv_stringHeap;
UnicodeStringRef addString(UnicodeStringRef const & crRef);
icu::UnicodeString iv_emptyString;
public:
UnicodeStringRef iv_ulstrDocument;
std::vector<TyNum> iv_vecTypeInheritanceTable;
std::vector<TyNum> iv_vecFeatureDefinitionTable;
std::vector<UnicodeStringRef> iv_vecTypeSymbolTable;
std::vector<UnicodeStringRef> iv_vecFeatureSymbolTable;
std::vector<TyNum> iv_vecFeatureOffsetTable;
std::vector<TyNum> iv_vecTypePriorityTable;
std::vector<TyNum> iv_stringSubTypes;
std::vector<UnicodeStringRef> iv_stringSubTypeValues;
std::vector<TyNum> iv_stringSubTypeValuePos;
std::vector<UnicodeStringRef> iv_vecIndexIDTable;
std::vector<TyNum> iv_vecComparatorDefinitionTable;
std::vector<TyNum> iv_vecComparatorStartTable;
std::vector<TyNum> iv_vecIndexKindTable;
std::vector<TyNum> iv_vecFSHeapArray;
std::vector<UnicodeStringRef> iv_vecStringSymbolTable;
std::vector<TyNum> iv_vecIndexedFSs;
std::vector<char> iv_vecByteHeapArray;
std::vector<short> iv_vecShortHeapArray;
std::vector<INT64> iv_vecLongHeapArray;
public:
SerializedCAS();
~SerializedCAS();
// type system
std::vector<TyNum> const & getTypeInheritanceTable() const {
return iv_vecTypeInheritanceTable;
}
std::vector<TyNum> const & getFeatureDefinitionTable() const {
return iv_vecFeatureDefinitionTable;
}
std::vector<UnicodeStringRef> const & getTypeSymbolTable() const {
return iv_vecTypeSymbolTable;
}
std::vector<UnicodeStringRef> const & getFeatureSymbolTable() const {
return iv_vecFeatureSymbolTable;
}
std::vector<TyNum> const & getFeatureOffsetTable() const {
return iv_vecFeatureOffsetTable;
}
std::vector<TyNum> const & getTypePriorityTable() const {
return iv_vecTypePriorityTable;
}
std::vector<TyNum> const & getStringSubTypes() const {
return iv_stringSubTypes;
}
std::vector<UnicodeStringRef> const & getStringSubTypeValues() const {
return iv_stringSubTypeValues;
}
std::vector<TyNum> const & getStringSubTypeValuePos() const {
return iv_stringSubTypeValuePos;
}
// index definition
std::vector<UnicodeStringRef> const & getIndexIDTable() const {
return iv_vecIndexIDTable;
}
std::vector<TyNum> const & getComparatorDefinitionTable() const {
return iv_vecComparatorDefinitionTable;
}
std::vector<TyNum> const & getComparatorStartTable() const {
return iv_vecComparatorStartTable;
}
std::vector<TyNum> const & getIndexKindTable() const {
return iv_vecIndexKindTable;
}
// document
UnicodeStringRef getDocument() const {
return iv_ulstrDocument;
}
// fsheap
std::vector<TyNum> const & getFSHeapArray() const {
return iv_vecFSHeapArray;
}
// 8bit heap
std::vector<char> const & getByteHeapArray() const {
return iv_vecByteHeapArray;
}
// 16bit heap
std::vector<short> const & getShortHeapArray() const {
return iv_vecShortHeapArray;
}
// 8bit heap
std::vector<INT64> const & getLongHeapArray() const {
return iv_vecLongHeapArray;
}
std::vector<UnicodeStringRef> const & getStringSymbolTable() const {
return iv_vecStringSymbolTable;
}
// indexes
std::vector<TyNum> const & getIndexedFSs() const {
return iv_vecIndexedFSs;
}
void reset();
// returns how many real Java objects (arrays and string) must be created
// in order to create a serialized representations of this CAS
size_t getNumberOfJavaObjectsToBeCreatedDefinitions() const {
return iv_vecTypeSymbolTable.size()
+ iv_vecFeatureSymbolTable.size()
+ iv_vecIndexIDTable.size()
+ iv_stringSubTypeValues.size()
+ 64;
}
size_t getNumberOfJavaObjectsToBeCreatedData() const {
return iv_vecStringSymbolTable.size() + 64;
}
void print(std::ostream &) const;
};
}
}
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
namespace uima {
namespace internal {
inline UnicodeStringRef SerializedCAS::addString(UnicodeStringRef const & crRef) {
size_t l = crRef.length();
int pOff = iv_stringHeap.increaseHeap(l + 1);
UChar* p = iv_stringHeap.getHeapStart() + pOff;
assert( (2*l) == crRef.getSizeInBytes() );
memcpy(p, crRef.getBuffer(), crRef.getSizeInBytes() );
return UnicodeStringRef(p,l);
}
}
}
/* ----------------------------------------------------------------------- */
#endif