blob: 44f0600103a7efe78aa7280ce38c1f3da1571c7b [file] [log] [blame]
#ifndef UIMA_CAS_HPP
#define UIMA_CAS_HPP
/** \file cas.hpp .
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
\brief Contains class uima::CAS
Description: The CAS object provides access to the type system, to indexes,
iterators and filters (constraints).
It also lets you create new annotations, Sofas and other data
structures.
Use uima::AnalysisEngine::newCAS() to instantiate a CAS
-----------------------------------------------------------------------------
-------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Include dependencies */
/* ----------------------------------------------------------------------- */
#include "uima/pragmas.hpp"
#include "uima/lowlevel_typedefs.hpp"
#include "uima/featurestructure.hpp"
#include "uima/language.hpp"
#include "uima/sofaid.hpp"
#include "uima/sofastream.hpp"
#include "uima/fsindex.hpp"
#include <map>
/* ----------------------------------------------------------------------- */
/* Constants */
/* ----------------------------------------------------------------------- */
/* option names for engine */
#define UIMA_ENGINE_CONFIG_OPTION_PREDEF_TYPES _TEXT("PredefinedTypes")
#define UIMA_ENGINE_CONFIG_OPTION_FSHEAP_PAGESIZE _TEXT("FSHeapPageSize")
#define UIMA_ENGINE_CONFIG_OPTION_STRINGHEAP_PAGESIZE _TEXT("StringHeapPageSize")
#define UIMA_ENGINE_CONFIG_OPTION_STRINGREFHEAP_PAGESIZE _TEXT("StringRefHeapPageSize")
const size_t UIMA_ENGINE_CONFIG_DEFAULT_FSHEAP_PAGESIZE = 10000; // this value seems good for search engine-style tokenization, old value = 128 * 1024
const size_t UIMA_ENGINE_CONFIG_DEFAULT_STRINGHEAP_PAGESIZE = UIMA_ENGINE_CONFIG_DEFAULT_FSHEAP_PAGESIZE;
const size_t UIMA_ENGINE_CONFIG_DEFAULT_STRINGREFHEAP_PAGESIZE = UIMA_ENGINE_CONFIG_DEFAULT_STRINGHEAP_PAGESIZE;
/* ----------------------------------------------------------------------- */
/* Forward declarations */
/* ----------------------------------------------------------------------- */
namespace uima {
class FeatureStructure;
class FSIndexRepository;
class FSFilterBuilder;
class ListFS;
class SofaFS;
class AnnotationFS;
class DocumentFS;
class FSIterator;
class ANIterator;
class SofaDataStream;
class LocalSofaDataStream;
class XCASDeserializerHandler;
class XmiDeserializerHandler;
class XCASWriter;
class XmiWriter;
class ComponentInfo;
class AnnotatorContext;
class SofaDataStream;
class Framework;
class CASPool;
class ANIndex;
namespace lowlevel {
class IndexRepository;
class IndexRepositoryDefinition;
class FSHeap;
class TypeSystem;
class DefaultFSIterator;
}
namespace internal {
class CASDefinition;
class EngineBase;
class CASSerializer;
class CASDeserializer;
class CASImpl;
void fromHeapCellTempl(lowlevel::TyHeapCell, uima::CAS &, FeatureStructure &);
}
}
/* ----------------------------------------------------------------------- */
/* Types / Classes */
/* ----------------------------------------------------------------------- */
namespace uima {
UIMA_EXC_CLASSDECLARE(CouldNotCreateFSOfFinalTypeException, CASException);
UIMA_EXC_CLASSDECLARE(DuplicateSofaNameException, CASException);
UIMA_EXC_CLASSDECLARE(InvalidBaseCasMethod, CASException);
/**
* The CAS (Common Analysis System)
* is the container where all feature structures are stored and
* maintained.
* The CAS object provides access to the type system, to indexes,
* iterators and filters (constraints).
* It also lets you create new annotations, Sofas and other data
* structures.
* <br>
* Use uima::AnalysisEngine::newCAS() to instantiate a CAS.
* <br>
* <br>
* The CAS APIs ( uima::CAS ) provide access to the following:
* <ol>
* <li>The creation of <strong>feature structures</strong> ( uima::FeatureStructure ).<br>
* These have one or more feature values which are
* like public data members of a feature structure.<br>
* The feature structures are organized in
* <strong>indexes</strong> ( uima::FSIndex ).
* <strong>Iterators</strong> ( uima::FSIterator ) over indexes are used to
* access individual feature structures.
* <li>The <strong>Type System</strong> ( uima::TypeSystem )
* where all possible feature structures are defined in terms of
* <strong>@link uima::Type types@endlink</strong>
* and <strong>@link uima::Feature features@endlink</strong>.
* <li>The creation of and access to Sofas ( uima::SofaFS ) and CAS views ( uima::CAS ) .
* <li>A set of @link PreDefTypes predefined types@endlink,
* @link PreDefFeatures features@endlink,
* @link PreDefIndexes indexes@endlink and
* @link UtilFuncts Utility Functions@endlink
* for text analysis.
* </ol>
*/
class UIMA_LINK_IMPORTSPEC CAS {
friend class uima::internal::FSPromoter;
friend class uima::lowlevel::FSHeap;
friend class ListFS;
friend class FeatureStructure;
friend class SofaFS;
friend class uima::internal::EngineBase;
friend class uima::internal::CASSerializer;
friend class uima::internal::CASDeserializer;
friend class uima::XCASWriter;
friend class uima::XmiWriter;
friend class uima::XCASDeserializerHandler;
friend class uima::XmiDeserializerHandler;
friend class uima::Framework;
friend class uima::CASPool;
friend class AnnotationFS;
friend class DocumentFS;
friend void uima::internal::fromHeapCellTempl(lowlevel::TyHeapCell, uima::CAS &, FeatureStructure &);
private:
CAS(void);
CAS(CAS const &);
CAS & operator=(CAS const &);
// Remove a View from sofaMap
void dropView(int sofaNum);
// private calls for internal use
CAS* getInitialView();
SofaFS createLocalSofa(const char* sofaName, const char* mimeType);
SofaFS createSofa(UnicodeStringRef const sofaName, UnicodeStringRef const mimeType);
SofaFS getSofa(int sofaNum);
SofaFS getSofa(char* sofaName);
SofaFS getSofa(UnicodeStringRef sofaName);
uima::lowlevel::IndexRepository * getIndexRepositoryForSofa(SofaFS sofa);
void bumpSofaCount();
void invalidBaseCasMethod();
void registerView(SofaFS);
void updateDocumentAnnotation( );
void copyDocumentString(UnicodeStringRef);
void refreshCachedTypes();
void createDocumentAnnotation( void );
void pickupDocumentAnnotation( );
void setDocTextFromDeserializtion(UnicodeStringRef);
inline int getNumViews() {
return iv_baseCas->iv_sofaCount;
}
CAS & getCasForTyFS(lowlevel::TyHeapCell);
// void updateAndIndexDocumentAnnotation();
protected:
int iv_sofaNum;
uima::internal::CASDefinition * iv_casDefinition;
bool bOwnsCASDefinition;
TypeSystem * iv_typeSystem;
uima::lowlevel::FSHeap * iv_heap;
uima::lowlevel::IndexRepository * iv_indexRepository;
uima::FSFilterBuilder * iv_filterBuilder;
// absolute Sofa counter
int iv_sofaCount;
// maps sofaNum to Cas views
std::map<int, CAS*> iv_sofa2tcasMap;
// maps sofaNum to index repository
std::vector<uima::lowlevel::IndexRepository *> iv_sofa2indexMap;
// reference to one-and-only base CAS
CAS* iv_baseCas;
// reference to one-and-only initial View
CAS* iv_initialView;
bool isbaseCas;
bool initialSofaCreated;
bool isDeletingViews; //set this flag to true when destroying CAS
AnnotatorContext *iv_componentInfo;
uima::lowlevel::TyFSType iv_utDocumentType;
uima::lowlevel::TyFSFeature iv_utDocumentLangAsIntFeat;
uima::lowlevel::TyFSFeature iv_utDocumentLangAsStrFeat;
uima::lowlevel::TyFS iv_tyDocumentAnnotation;
UChar const * iv_cpDocument;
size_t iv_uiDocumentLength;
UChar * iv_copyOfDocument;
/**
* Add a copy of <code>crString</code> to the string heap.
* @param crString the string to add
* @return an LString pointing to the new copy
*/
int addString(icu::UnicodeString const & crString);
/**
* Add a copy of <code>cpString</code> to the string heap.
* @param cpString a pointer to the string to copy (maynot be zero terminated)
* @param uiLen number of Unicode code units (not bytes!) to copy
* @return an LString pointing to the new copy
*/
int addString(UChar const * cpString, size_t uiLen);
/**
* Add a copy of <code>uls</code> to the string heap.
* @param uls a pointer to the string to copy (maynot be zero terminated)
* @return an LString pointing to the new copy
*/
int addString(const UnicodeStringRef & uls );
/**
* Get a copy of <code>crString</code> from the string heap.
* @param strRef the offset in the strRefHeap
* @return a UnicodeStringRef to the data in the stringHeap
*/
UnicodeStringRef getString(int strRef);
/**
* Construct a CAS.
* @param uiFSHeapPageSize the number of heap cells a heap page should contain.
* @param uiStringHeapPageSize the number of bytes a string heap page should contain.
* @param uiStringRefHeapPageSize the number of bytes a string-ref heap page should contain.
*/
CAS(uima::internal::CASDefinition &,
size_t uiFSHeapPageSize,
size_t uiStringHeapPageSize,
size_t uiStringRefHeapPageSize);
/**
* Construct a CAS and specify ownership of CASDefinition object.
*/
CAS(uima::internal::CASDefinition &,
bool bOwnsCASDefinition,
size_t uiFSHeapPageSize,
size_t uiStringHeapPageSize,
size_t uiStringRefHeapPageSize);
CAS(CAS* baseCas, SofaFS aSofa);
/**
* Only for internal use. Return a View for given Sofa on heap
*/
CAS* getViewBySofaNum(int sofaNum);
/**
* Only for internal use.
*/
void registerInitialSofa();
/**
* Only for internal use.
*/
bool isInitialSofaCreated();
/**
* Only for internal use.
*/
SofaFS createInitialSofa(UnicodeStringRef const mimeType);
public:
virtual ~CAS();
/**
* Returns the Sofa number
*/
inline int getSofaNum() {
return iv_sofaNum;
}
/**
* True if a CAS view
*/
bool isBackwardCompatibleCas();
/**
* If a View, returns the base CAS, else returns itself.
*/
inline CAS* getBaseCas() {
return iv_baseCas;
}
/**
* Returns true if a CAS view.
* @deprecated
*/
inline bool isView() {
return !isbaseCas;
}
/**
* Resets the CAS. In particular, all data structures meant only to
* live throughout a document are deleted.
*/
TyErrorId reset(void);
/**
* get the FSFilterBuilder associated with this CAS.
* @see FSFilterBuilder
*/
FSFilterBuilder const & getFSFilterBuilder() const;
/**
* Get the TypeSystem (const version).
*/
TypeSystem const & getTypeSystem(void) const;
/**
* Get the FSHeap pointer
*/
uima::lowlevel::FSHeap * getHeap();
/**
* create an FS of type <code>crType</code>
* @param crType the type of FS to create
* @throws CouldNotCreateFSOfFinalTypeException
* @return the created feature structure
*/
FeatureStructure createFS(Type const & crType);
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a FeatureStructure.
* @param uiSize the size of the array
*/
ArrayFS createArrayFS(size_t uiSize);
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a float.
* @param uiSize the size of the array
*/
FloatArrayFS createFloatArrayFS(size_t uiSize );
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a int.
* @param uiSize the size of the array
*/
IntArrayFS createIntArrayFS(size_t uiSize );
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a string.
* @param uiSize the size of the array
*/
StringArrayFS createStringArrayFS(size_t uiSize );
/**
* create a feature structure of type empty FS list (list length is zero)
* Each element in the list is a FeatureStructure.
*/
ListFS createListFS();
/**
* create a feature structure of type empty float list (list length is zero).
* Each element in the list is a float.
*/
FloatListFS createFloatListFS();
/**
* create a feature structure of type empty int list (list length is zero).
* Each element in the list is a int.
*/
IntListFS createIntListFS();
/**
* create a feature structure of type empty string list (list length is zero).
* Each element in the list is a string.
*/
StringListFS createStringListFS();
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a byte.
* @param uiSize the size of the array
*/
BooleanArrayFS createBooleanArrayFS(size_t uiSize );
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a byte.
* @param uiSize the size of the array
*/
ByteArrayFS createByteArrayFS(size_t uiSize );
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a short.
* @param uiSize the size of the array
*/
ShortArrayFS createShortArrayFS(size_t uiSize );
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a long.
* @param uiSize the size of the arrayt
*/
LongArrayFS createLongArrayFS(size_t uiSize );
/**
* create a feature structure of type FS Array.
* Each of the uiSize elements in the array is a long.
* @param uiSize the size of the array
*/
DoubleArrayFS createDoubleArrayFS(size_t uiSize );
/**
* Return the index repository for index use
* @throws CASException
* if View is the base CAS
*/
FSIndexRepository & getIndexRepository( void );
/**
* Return the index repository for index use (const-version)
*/
FSIndexRepository const & getIndexRepository( void ) const;
/**
* Get the lowlevel IndexRepository
*/
uima::lowlevel::IndexRepository & getLowlevelIndexRepository(void) const;
/**
* Return the base index repository: for internal use only
*/
FSIndexRepository &
getBaseIndexRepository( void );
/**
* Create a SofaFS
* @deprecated As of v2.0, use createView() instead.
*/
SofaFS createSofa(const SofaID & sofaName, const char* mimeType);
/**
* Retrieve the SofaFS as identified by the SofaID
* @deprecated As of v2.0, use getView() instead. From the view you can access the Sofa data, or
* call getSofa() if you truly need to access the SofaFS object.
*/
SofaFS getSofa(const SofaID & sofaName);
/**
* Get the Sofa feature structure associated with this CAS view.
*
* @return The SofaFS associated with this View.
*/
SofaFS getSofa();
/**
* Get the view for a Sofa (subject of analysis). The view provides access to the Sofa data and
* the index repository that contains metadata (annotations and other feature structures)
* pertaining to that Sofa.
*
* @param localViewName
* the local name, before any sofa name mapping is done, for this view (note: this is the
* same as the associated Sofa name).
*
* @return The view corresponding to this local name.
* @throws CASException
* if no View with this name exists in this CAS
*/
CAS* getView(const icu::UnicodeString & localViewName);
/**
* Get the view for a Sofa (subject of analysis). The view provides access to the Sofa data and
* the index repository that contains metadata (annotations and other feature structures)
* pertaining to that Sofa.
*
* @param aSofa
* a Sofa feature structure in the CAS
*
* @return The view for the given Sofa
*/
CAS* getView(SofaFS aSofa);
/**
* Create a view and its underlying Sofa (subject of analysis). The view provides access to the
* Sofa data and the index repository that contains metadata (annotations and other feature
* structures) pertaining to that Sofa.
*
* @param localViewName
* the local name, before any sofa name mapping is done, for this view (note: this is the
* same as the associated Sofa name).
*
* @return The view corresponding to this local name.
* @throws CASException
* if a View with this name already exists in this CAS
*/
CAS * createView(icu::UnicodeString const & localViewName);
/**
* Get the view name. The view name is the same as the name of the view's Sofa, retrieved by
* getSofa().getSofaID(), except for the initial View before its Sofa has been created.
*
* @return The name of the view
*/
UnicodeStringRef getViewName();
/**
* Set the document text. Once set, Sofa data is immutable, and cannot be set again until the CAS
* has been reset.
*
* @param text
* The text to be analyzed.
* @exception CASException
* If the Sofa data has already been set, or View is base CAS.
*/
void setDocumentText(UnicodeStringRef const text);
/**
* Set the document text, old style.
* @deprecated Use the new style
*/
void setDocumentText(UChar const * cpBuffer, size_t uiLength, bool bCopyToCAS = false);
/**
* Set the document text. Once set, Sofa data is immutable, and cannot be set again until the CAS
* has been reset.
*
* @param text
* The text to be analyzed.
* @param mimetype
* The mime type of the data
* @exception CASException
* If the Sofa data has already been set, or View is base CAS.
*/
virtual void setSofaDataString(UnicodeStringRef const text, icu::UnicodeString const & mimetype);
/**
* Get the document text.
*
* @return The text being analyzed.
*/
virtual UnicodeStringRef getDocumentText() const;
/**
* Returns the FeaturesStructure of type document annotation representing
* the current document.
*/
virtual DocumentFS const getDocumentAnnotation() const;
virtual DocumentFS getDocumentAnnotation();
/**
* Set the Sofa data as an ArrayFS. Once set, Sofa data is immutable, and cannot be set again
* until the CAS has been reset.
*
* @param array
* The ArrayFS to be analyzed.
* @param mime
* The mime type of the data
* @exception CASException
* If the Sofa data has already been set, or View is base CAS.
*/
virtual void setSofaDataArray(FeatureStructure array, icu::UnicodeString const & mime);
/**
* Get the Sofa data array.
*
* @return The Sofa Data being analyzed.
*/
virtual FeatureStructure getSofaDataArray();
/**
* Set the Sofa data as a URI. Once set, Sofa data is immutable, and cannot be set again until the
* CAS has been reset.
*
* @param uri
* The URI of the data to be analyzed.
* @param mime
* The mime type of the data
* @exception CASException
* If the Sofa data has already been set, or View is base CAS.
*/
virtual void setSofaDataURI(icu::UnicodeString const & uri, icu::UnicodeString const & mime);
/**
* Get the Sofa data array.
*
* @return The Sofa Data being analyzed.
*/
virtual UnicodeStringRef getSofaDataURI();
/**
* Get the Sofa data as a byte stream.
*
* @returns a SofaDataStream.
*/
virtual SofaDataStream * getSofaDataStream();
/**
* Informs the CAS of relevant information about the component that is currently procesing it.
* This is called by the framework automatically; users do not need to call it.
*/
void setCurrentComponentInfo(AnnotatorContext* info);
/**
* Get an iterator over all SofaFS
*
* @returns a FSIterator
*/
FSIterator getSofaIterator();
/**
* create an iterator over all the feature structures in this CAS.
* @throws InvalidIndexObjectException
*/
FSIterator iterator() ;
/** @defgroup UtilFuncts functions
* CAS Utility functions for Annotations
* @{ */
/**
* convenience function for creating an annotation.
* <code>type</code> must be a subtype of type Annotation.
* Note that this will not commit the new feature structure to the
* appropriate indexes. To do this CAS::commitFS() has to be called.
* @throws CASException
* if specified type is not an annotation, or View is base CAS.
*/
virtual AnnotationFS createAnnotation( Type const & type, size_t uiBeginPos, size_t uiEndPos );
/**
* CAS Utility functions for Annotation Iterators
* sets the index iterator to the begin position of <code>crFromAnn</code>.
*
* @param itOfType The iterator to move
* @param crFromAnn The annotation defining the begin position
*
* @return true if <code>beginPositioncrFromAnn</code> is also a valid
* begin position of the Annotation the iterator points to and
* the movement was successful, false otherwise.
* Note that both arguments, i.e. the index iterator and the
* FeatureStructure must be annotations.
*/
bool moveToBeginPosition (ANIterator & itOfType, AnnotationFS const & crFromAnn);
/** @} defgroup */
/** @defgroup PreDefIndexes Predefined Indexes
* Only very few applications and annotators will need to create specific
* indexes that are optimized for special iteration needs.
*
* Most applications and annotators will be just use some some
* "natural" way to iterate over the data in the feature structures
* in the FSStore.
*
* For annotation feature structures this is the linear text order
* with some special sorting for annotations that start at the same
* position in the text.
*
* For vocabulary feature structures a sort order based on lemma string
* and part of speech is the most accepted one.
*
* Those "natural" indexes are predefined in the CAS and can be
* accessed using the functions described here.
*
* For example to access tokens using the index over token annotations
* write:
* <pre>
* Type tokType = cas.getTypeSystem().getType(TT::TYPE_NAME_TOKEN_ANNOTATION);
* ANIndex tokIdx = cas.getAnnotationIndex(tokType);
* ANIterator tokIter = tokIdx.iterator();
* while(tokIter.isValid()) {
* AnnotationFS tokFS = tokIter.get();
* // ... do some processing with tokFS ...
* tokIter.moveToNext();
* }
* </pre>
* @{
*/
/**
* @name Access to predefined indexes
* @{ */
/**
* get the index over all annotations.
* The index is ordered where an annotation a1 is considered "less-than"
* another annotation a2 if the begin position of a1 is less then the begin
* position of a2, or, if they are equal, a1 is shorter than a2.
*
* @throws InvalidFSTypeObjectException
*/
virtual ANIndex getAnnotationIndex();
/**
* get the index over annotations of type <code>crType</code>.
* The index is ordered where an annotation a1 is considered "less-than"
* another annotation a2 if the begin position of a1 is less then the begin
* position of a2, or, if they are equal, a1 is shorter than a2.
*
* @param crType The returned index will return only annoations of type
* <code>crType</code>.
* <code>crType</code> must be derived from our annotation type.
*
* @throws InvalidFSTypeObjectException
*/
virtual ANIndex getAnnotationIndex(Type const & crType);
virtual ANIndex const getAnnotationIndex(Type const & crType) const;
/** @} */
/** @} */
/**
* @name Access to predefined index IDs.
* This should only be needed to get access to low level indexes.
* @{ */
icu::UnicodeString getAnnotationIndexID() const {
return CAS::INDEXID_ANNOTATION;
}
/** @} */
/** @defgroup PreDefTypes Predefined Types
*
* For each predefined type the system provides a string constant
* for the names of the type.
* Programmers are strongly encouraged to use them instead of string literals.
* E.g. write <code>uima::TT::TYPE_NAME_TOKEN_ANNOTATION</code> instead of
* <code>"uima.tt.TokenAnnotation"</code>
*
* @see @link PreDefFeatures Predefined Features@endlink
* @{
*/
/** @name CAS string constants for the names of predefined basic types.
* @{
*/
/// Use this instead of a string literal
static char const * NAME_SPACE_UIMA_CAS;
/// Use this instead of a string literal
static char const * TYPE_NAME_TOP;
/// Use this instead of a string literal
static char const * TYPE_NAME_INTEGER;
/// Use this instead of a string literal
static char const * TYPE_NAME_STRING;
/// Use this instead of a string literal
static char const * TYPE_NAME_FLOAT;
/// Use this instead of a string literal
static char const * TYPE_NAME_ARRAY_BASE;
/// Use this instead of a string literal
static char const * TYPE_NAME_FS_ARRAY;
/// Use this instead of a string literal
static char const * TYPE_NAME_FLOAT_ARRAY;
/// Use this instead of a string literal
static char const * TYPE_NAME_INTEGER_ARRAY;
/// Use this instead of a string literal
static char const * TYPE_NAME_STRING_ARRAY;
/// Use this instead of a string literal
static char const * TYPE_NAME_LIST_BASE;
/// Use this instead of a string literal
static char const * TYPE_NAME_FS_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_EMPTY_FS_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_NON_EMPTY_FS_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_FLOAT_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_NON_EMPTY_FLOAT_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_EMPTY_FLOAT_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_INTEGER_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_NON_EMPTY_INTEGER_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_EMPTY_INTEGER_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_STRING_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_NON_EMPTY_STRING_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_EMPTY_STRING_LIST;
/// Use this instead of a string literal
static char const * TYPE_NAME_SOFA;
/// Use this instead of a string literal
static char const * TYPE_NAME_LOCALSOFA;
/// Use this instead of a string literal
static char const * TYPE_NAME_REMOTESOFA;
/// Use this instead of a string literal
static char const * NAME_DEFAULT_TEXT_SOFA;
/// Use this instead of a string literal
static char const * NAME_DEFAULT_SOFA;
/// Use this instead of a string literal
static char const * TYPE_NAME_ANNOTATION_BASE;
/// Use this instead of a string literal
static char const * TYPE_NAME_ANNOTATION;
/// Use this instead of a string literal
static char const * TYPE_NAME_DOCUMENT_ANNOTATION;
/// Use this instead of a string literal
static char const * INDEXID_ANNOTATION;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFA;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFA;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_BEGIN;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_BEGIN;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_END;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_END;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_LANGUAGE;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_LANGUAGE;
static char const * TYPE_NAME_BOOLEAN;
static char const * TYPE_NAME_BYTE;
static char const * TYPE_NAME_SHORT;
static char const * TYPE_NAME_LONG;
static char const * TYPE_NAME_DOUBLE;
static char const * TYPE_NAME_BOOLEAN_ARRAY;
static char const * TYPE_NAME_BYTE_ARRAY;
static char const * TYPE_NAME_SHORT_ARRAY;
static char const * TYPE_NAME_LONG_ARRAY;
static char const * TYPE_NAME_DOUBLE_ARRAY;
/** @} */
/** @} defgroup*/
/** @defgroup PreDefFeatures Predefined Features
* For each predefined feature the system provides a string constant
* for the names of the feature.
* Programmers are strongly encouraged to use them instead of string literals.
* E.g. write <code>CAS::FEATURENAME_LEMMAENTRIES</code> instead of
* <code>"UIMA_Feature_LemmaEntries"</code>
* @see @link PreDefTypes Predefined Types@endlink
* @{
*/
/** @name CAS string constants for the names of predefined basic features.
* @{
*/
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_HEAD;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_TAIL;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_FS_LIST_HEAD;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_FS_LIST_TAIL;
/// Use this instead of a string literal
////static char const * FEATURE_FULL_NAME_FLOAT_LIST_HEAD;
/// Use this instead of a string literal
////static char const * FEATURE_FULL_NAME_FLOAT_LIST_TAIL;
/// Use this instead of a string literal
////static char const * FEATURE_FULL_NAME_INTEGER_LIST_HEAD;
/// Use this instead of a string literal
////static char const * FEATURE_FULL_NAME_INTEGER_LIST_TAIL;
/// Use this instead of a string literal
////static char const * FEATURE_FULL_NAME_STRING_LIST_HEAD;
/// Use this instead of a string literal
////static char const * FEATURE_FULL_NAME_STRING_LIST_TAIL;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFANUM;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFAID;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFAMIME;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFAURI;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFASTRING;
/// Use this instead of a string literal
static char const * FEATURE_BASE_NAME_SOFAARRAY;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFANUM;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFAID;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFAMIME;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFAURI;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFASTRING;
/// Use this instead of a string literal
static char const * FEATURE_FULL_NAME_SOFAARRAY;
/** @} */
/** @} defgroup*/
/// @if internal
/**
* @name Access to predefined index IDs.
* This should only be needed to get access to low level indexes.
* Use functions like getLemmaIndex() to access high level indexes.
* @{ */
/// Use this instead of a string literal
static char const * INDEXID_SOFA;
/** @} */
/// @endif internal
/**
* @defgroup CreatorIDs Creator IDs
* Creator IDs are used with
* uima::Type::getCreatorID() and uima::Feature::getCreatorID()
* The ID shows which component has created a type or feature.
* They are intended for information/debug/display purposes.
* Currently two creator IDs are predefined:
* - CAS::ustrCREATOR_ID_CAS for the core types/features created by the CAS (e.g. Integer)
* - CAS::ustrCREATOR_ID_CAS for the basic linguistic types/features created by the CAS (e.g. Annotation)
*
* For types/features not created by CAS the creator id
* is the name of the component/annotator
* @{ */
/**
* The @link CreatorIDs creator id@endlink constant for types/features created by the CAS (e.g. Integer).
* See @link CreatorIDs separate section @endlink for details
*/
static icu::UnicodeString const ustrCREATOR_ID_CAS;
/** @} defgroup */
};
/**
* The Sofa is implemented as a feature structure of type uima.cas.Sofa.
* Class SofaFS derives from a standard FeatureStrucure and extends
* with convenience functions specific for feature structures of type
* uima.cas.Sofa.
* <br>
* The generic CAS Apis must never be used to set/get features
* of a Sofa. Instead SofaFS methods must be used to set/get features
* of a Sofa.
* <br>
* The features of the Sofa type include:
* <ul>
* <li><strong> SofaID </strong> : Every Sofa in a CAS must have a unique SofaID.
* SofaIDs are the primary handle or access.
* <li><strong> Mime type </strong> : This string feature can be used to describe the type of the data represented by the Sofa.
* <li><strong>Sofa Data</strong> : The data itself. This data can be resident in the CAS or it can be a reference to data outside the CAS.
* </ul>
* To create a Sofa FS, use CAS.createSofa().
* <br>
* <br>
* Sofa data can be contained locally in the CAS itself or it can be
* remote from CAS. To set the local Sofa data in the Sofa FS use:
* uima::SofaFS::setLocalSofaData(). If the data is remote from the CAS use:
* uima::SofaFS::setRemoteSofaURI().
*
*
*/
class UIMA_LINK_IMPORTSPEC SofaFS : public FeatureStructure {
friend class LocalSofaDataStream;
friend class CAS;
protected:
/**
* Set the Sofa mime type.
*/
void setSofaMime(icu::UnicodeString const & aString);
public:
/**
* Default constructor. Creates an invalid SofaFS.
*/
SofaFS();
/**
* Promote a generic FeatureStructure object to an SofaFS.
* <code>fs</code> must be of type sofa.
* If <code>fs</code> is not of type sofa the operation will not
* immediately fail but access to member functions of SofaFS
* will result in exceptions beeing thrown.
* This is done by value like all FS API but SofaFS and FS are
* shallow objects.
*/
explicit SofaFS(FeatureStructure const & fs);
/**
* Set the Local Subject of Analysis to be a predefined ArrayFS.
*/
void setLocalSofaData(FeatureStructure aFS);
/**
* Set the Local Subject of Analysis to be a String.
*/
void setLocalSofaData(UnicodeStringRef const aString);
/**
* Set the Remote Subject of Analysis URI
*/
void setRemoteSofaURI(const char* aURI);
void setRemoteSofaURI(icu::UnicodeString const & aString);
/**
* Get the Sofa name.
* @returns String
*/
UnicodeStringRef getSofaID();
/**
* Get the Sofa mime type.
* @returns String
*/
UnicodeStringRef getSofaMime();
/**
* Get the Sofa URI value.
* @returns String or null if not valid
*/
UnicodeStringRef getSofaURI();
/**
* Get the Sofa Ref value.
* @returns the integer identifier for this Sofa
*/
int getSofaRef();
/**
* Get the Sofa FSArray value.
* @returns FeatureStructure (invalid FS if not valid)
*/
FeatureStructure getLocalFSData();
/**
* Get the Sofa FSArray value.
* @returns String or null if not valid
*/
UnicodeStringRef getLocalStringData();
/**
* Get the Sofa Data Stream for this SofA fs.
* Values for one of these features -- sofaString, SofaArray,
* sofaURI -- must have been set.
* If the sofaURI has a valid value, a valid SofaDataStream
* is returned only if there is a stream handler registered
* for the URI scheme.
* @returns a SofaDataStream or null if not valid
*/
SofaDataStream * getSofaDataStream();
/**
* Get the Sofa Data Stream for the give Sofa FeatureStructure.
* @param fs a valid SofA feature structure with values for one of
* these feature -- sofaString, sofaArray, or sofaURI -- set.
* If the sofaURI feature has a valid value, a valid SofaDataStream
* is returned only if there is a stream handler registered
* for the URI scheme.
* @returns a SofaDataStream or null if not valid
*/
static SofaDataStream * getSofaDataStream(FeatureStructure & fs);
protected:
}
; // class SofaFS
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
inline SofaFS::SofaFS() :
FeatureStructure() {}
/**
* This enum is used for annotation iterators to determine there iteration
* behaviour.
* This is used in ANIndex::subIterator(), AnnotationFS::subIterator()
* and ANIterator::subIterator()
* @see ANIndex
* @see ANIterator
* @see AnnotationFS
*/
typedef enum EnIteratorAmbiguity_ {
enAmbiguous, /// Default behaviour: return all annotations, even if several are available for a postion.
enUnambiguous, /// return only one annotation for a given position.
enNumberOfIteratorAmbiguityElems // must be last in enum
} EnIteratorAmbiguity;
/**
* Class AnnotationFS derives from a standard FeatureStrucure and extends
* with convenience functions specific for feature structures of type
* annotation.
* The most important functions deal with access to the features begin position
* and end position as well as access to the text covered by the annotation.
*/
class UIMA_LINK_IMPORTSPEC AnnotationFS : public FeatureStructure {
public:
/**
* Default constructor. Creates an invalid AnnotationFS.
*/
AnnotationFS();
/**
* Promote a generic FeatureStructure object to an AnnotationFS.
* <code>fs</code> must be of type annotation.
* If <code>fs</code> is not of type annotation the operation will not
* immediately fail but access to member functions of AnnotationFS
* will result in exceptions beeing thrown.
* This is done by value like all FS API but AnnotationFS and FS are
* shallow objects.
*/
explicit AnnotationFS(FeatureStructure const & fs);
/**
* Returns the CAS object in which this feature structure lives.
* This just saves a cast over the methode CAS::getCAS() inherited
* from uima::CAS
*/
CAS & getCAS();
CAS const & getCAS() const;
/**
* Gets the CAS view associated with the Sofa that this Annotation is over.
*
* @return the CAS view associated with the Annotation's Sofa
*/
CAS * getView();
/**
* Get the start position of this annotation feature structure.
*/
size_t getBeginPosition( void ) const;
/**
* Get the end position of this annotation feature structure.
*/
size_t getEndPosition( void ) const;
/**
* Get the number of characters covered by this annotation feature structure.
* This is just a shortcut for getEndPosition()-getBeginPosition()
*/
size_t getLength( void ) const;
/**
* Get a reference to the text spanned by this annotation feature structure.
* @throws CASException
* if FS is not a valid annotation.
*/
UnicodeStringRef getCoveredText( void ) const;
/**
* Get the first annotation A that covers this annotation, i.e.
* that has a begin position(A) <= begin position(this) and an
* end position(A) >= end position(this).
*
* Note that the covering relation here is reflexive:
* getFirstCoveringAnnotation(x, t) == x if t is the type of x
*
* If several annotations fulfill these conditions, the one with the
* starting position nearest begin position relative to this annotation is
* returned.
* Of covering annotations with the same starting positions, the one
* with the biggest end position relative to to this annotation is chosen.
*
* @return The next covering features structure as defined above
* (!isValid() if none found)
*/
AnnotationFS getFirstCoveringAnnotation ( Type ofType ) const;
/**
* create an iterator over an index of annoations of type crType
* only returning Annotations with begin position >= this.getBeginPosition()
* and begin positon < this.getEndPosition()
*
* @param crType The type of annotation over which to iterate.
crType must be subsumed by type annotation.
* @param enAmbiguous If set to CAS::enAmbiguous calling
* moveToNext/moveToPrevious will alway move the resulting
* interator to an annotation that is no longer covered
* by the current annotation.
* This means that:
* moveToNext will always return an annotation with a
* begin position > than the current end position.
* moveToPrevious will always return an annotation with a
* end position < than the current begin position.
* In a situation like this:<br>
* <tt>|--------- Name1 ------||-------- Name2 -------|</tt><br>
* <tt>|-- Tok1 --||-- Tok2 --||-- Tok3 --||-- Tok4 --|</tt><br>
* A normal iterator starting out with Name1 would return:
* Name1, Tok1, Tok2, Name2, Tok3, Tok4
* A unambibous iterator starting out with Name1 would return:
* Name1, Name2
* (This assumes that the types Name and Tok are subsumed
* by crType and no other subusumed annotations cover the
* area.)
*
* @throws InvalidIndexObjectException
*/
ANIterator subIterator( Type const & crType, EnIteratorAmbiguity enIsAmbiguous = enAmbiguous ) const;
protected:
}
; // class AnnotationFS
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
inline AnnotationFS::AnnotationFS() :
FeatureStructure() {}
inline size_t AnnotationFS::getLength() const {
return getEndPosition()-getBeginPosition();
}
/* ----------------------------------------------------------------------- */
/* AnnotationIterator */
/* ----------------------------------------------------------------------- */
/**
* Iterator over AnnotationFS objects in a CAS annotation index (ANIndex).
* Iterators are created by calling ANIndex.iterator()
* @see AnnotationFS
* @see ANIndex
*/
class UIMA_LINK_IMPORTSPEC ANIterator : public FSIterator {
friend class ANIndex;
protected:
ANIterator(uima::lowlevel::IndexIterator*, uima::CAS*);
public:
/// Default CTOR
ANIterator();
/// upgrade/conversion CTOR
explicit ANIterator( FSIterator const & );
/// retrieve the current element in the index for this iterator.
AnnotationFS get() const;
}
; // class ANIterator
/* ----------------------------------------------------------------------- */
/* ANIterator Implementation */
/* ----------------------------------------------------------------------- */
inline ANIterator::ANIterator(uima::lowlevel::IndexIterator* pIt, uima::CAS * cas) :
FSIterator(pIt, cas) {
assert(sizeof(ANIterator) == sizeof(FSIterator));//no additonal data members
}
inline ANIterator::ANIterator() :
FSIterator() {
assert(sizeof(ANIterator) == sizeof(FSIterator));//no additonal data members
}
inline ANIterator::ANIterator(FSIterator const & crOther) :
FSIterator(crOther) {
assert(sizeof(ANIterator) == sizeof(FSIterator));//no additonal data members
}
inline AnnotationFS ANIterator::get() const {
return (AnnotationFS)FSIterator::get();
}
/* ----------------------------------------------------------------------- */
/* AnnotationIndex */
/* ----------------------------------------------------------------------- */
/**
* This class represents a single index over feature structures of
* type annotation.
* @see AnnotationFS
* @see ANIterator
*/
class UIMA_LINK_IMPORTSPEC ANIndex : public FSIndex {
public:
/**
* Default constructor. Creates an invalid ANIndex object.
*/
ANIndex();
/**
* Upgrade/Conversion constructor from a standard index.
*/
explicit ANIndex( FSIndex const & );
/**
* create an iterator over all the feature structures in this index.
* @throws InvalidIndexObjectException
*/
ANIterator iterator() const;
/**
* create an iterator over this index with the filter <code>cpFilter</code>,
* @see FSFilter
* @throws InvalidIndexObjectException
*/
ANIterator filteredIterator(FSFilter const * cpFilter) const;
/**
* create an iterator over this index only returning Annotations with
* begin position >= an.BeginPos and begin positon < an.EndPos
*
* @param an The annotatation "under" which the subiterator
* iterates
* @param enAmbiguous If set to CAS::enAmbiguous calling
* moveToNext/moveToPrevious will alway move the resulting
* interator to an annotation that is no longer covered
* by the current annotation.
* This means that:
* moveToNext will always return an annotation with a
* begin position > than the current end position.
* moveToPrevious will always return an annotation with a
* end position < than the current begin position.
* In a situation like this:<br>
* <tt>|--------- Name1 ------||-------- Name2 -------|</tt><br>
* <tt>|-- Tok1 --||-- Tok2 --||-- Tok3 --||-- Tok4 --|</tt><br>
* A normal iterator starting out with Name1 would return:
* Name1, Tok1, Tok2, Name2, Tok3, Tok4
* A unambibous iterator starting out with Name1 would return:
* Name1, Name2
* (This assumes that the types Name and Tok are subsumed
* by the type for this index and no other subusumed
* annotations cover the area.)
* @throws InvalidIndexObjectException
*/
ANIterator subIterator( AnnotationFS const & an, EnIteratorAmbiguity enIsAmbiguous = enAmbiguous ) const;
/**
* create an iterator over this index such that calling
* moveToNext/moveToPrevious will alway move the resulting iterator to
* an annotation that is no longer covered by the current annotation.
* This means that:
* moveToNext will always return an annotation with a
* begin position > than the current end position.
* moveToPrevious will always return an annotation with a
* end position < than the current begin position.
* In a situation like this:<br>
* <tt>|--------- Name1 ------||-------- Name2 -------|</tt><br>
* <tt>|-- Tok1 --||-- Tok2 --||-- Tok3 --||-- Tok4 --|</tt><br>
* A normal iterator starting out with Name1 would return:
* Name1, Tok1, Tok2, Name2, Tok3, Tok4
* A unambibous iterator starting out with Name1 would return:
* Name1, Name2
* (This assumes that the types Name and Tok are subsumed
* by the type for this index and no other subusumed annotations cover the
* area.)
* @throws InvalidIndexObjectException
*/
ANIterator unambiguousIterator() const;
};
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
inline ANIndex::ANIndex() :
FSIndex() {}
inline ANIndex::ANIndex( FSIndex const & crFSIndex )
: FSIndex(crFSIndex) {
assert(sizeof(ANIndex) == sizeof(FSIndex)); // no additional data members
}
inline ANIterator ANIndex::iterator() const {
return (ANIterator)FSIndex::iterator();
}
inline ANIterator ANIndex::filteredIterator(FSFilter const * cpFilter) const {
return (ANIterator)FSIndex::filteredIterator(cpFilter);
}
/**
* Class DocumentFS derives from AnnotationFS and extends
* with convenience functions specific for the single feature structures
* of type Document-Annotation that is present for each input document.
* The most important additional functions deal with access to the
* features document language and the document text.
* To get an easy access to this annotation just call the function
* CAS::getDocumentAnnotation()
*/
class UIMA_LINK_IMPORTSPEC DocumentFS : public AnnotationFS {
public:
/**
* Default constructor. Creates an invalid DocumentFS.
*/
DocumentFS();
/**
* Promote a generic FeatureStructure object to an DocumentFS.
* <code>fs</code> must be of document annotation.
* If <code>fs</code> is not of document annotation the operation will not
* immediately fail but access to member functions of AnnotationFS
* will result in exceptions beeing thrown.
*/
explicit DocumentFS(FeatureStructure const & fs);
/**
* Returns the language of the current document.
* The return value may be invalid (<TT>!Language.isValid()</TT>) if
* this function is called when no valid document is present.
* The return value may be unspecified (<TT>== CosEnLanguage_Unspecified</TT>)
* If no language was specified for the document and no language
* detection annotator has run.
*/
Language getLanguage() const;
/**
* Set the language of the current document.
*/
void setLanguage(Language const &);
private:
friend class CAS;
}
; // class DocumentFS
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
inline DocumentFS::DocumentFS() :
AnnotationFS() {}
}
#endif