src/test/src/annotator_dump.cpp - uima-uimacpp - Git at Google

 /** \file filename annotator_dump.cpp

  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.

 -------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------- */
 /*       Include dependencies                                              */
 /* ----------------------------------------------------------------------- */

 // this is included ONCE for the main source file of each binary
 #include "uima/pragmas.hpp"
 #include "uima/annotator_dump.hpp"
 #include "uima/xmlwriter.hpp"

 #include <iostream>
 #include <algorithm>
 #ifdef _MSC_VER
 #include <minmax.h> // for min
 #endif
 using namespace std;

 #include "uima/assertmsg.h"
 #include "uima/macros.h"
 #include "uima/trace.hpp"

 #include "uima/strconvert.hpp"
 //#include "uima/fixed_vector.hpp"

 //config parameter names
 /* ----------------------------------------------------------------------- */
 /*           Constants                                                     */
 /* ----------------------------------------------------------------------- */

 #define ANNOTATOR_DUMP_PARAM_OUTFILE          _TEXT("OutputFile")
 #define ANNOTATOR_DUMP_PARAM_APPEND           _TEXT("AppendFile")
 #define ANNOTATOR_DUMP_PARAM_DUMP_DOCBUFFER   _TEXT("DumpDocBuffer")
 #define ANNOTATOR_DUMP_PARAM_SAVE_DOCBUFFER   _TEXT("SaveDocBuffer")
 #define ANNOTATOR_DUMP_PARAM_STYLE            _TEXT("OutputStyle")
 #define ANNOTATOR_DUMP_OUTPUT_TYPES           _TEXT("OutputTypes")

 //error codes

 const int ANNOTATOR_DUMP_ERROR_OFFSET           = 100;
 const int ANNOTATOR_DUMP_ERROR_OPEN             = (0 + ANNOTATOR_DUMP_ERROR_OFFSET);
 const int ANNOTATOR_DUMP_ERROR_AT               = (1 + ANNOTATOR_DUMP_ERROR_OFFSET);

 const int ANNOTATOR_DUMP_WARN_OFFSET            = 200;
 const int ANNOTATOR_DUMP_WARN_TET               = (0 + ANNOTATOR_DUMP_WARN_OFFSET);

 const int ANNOTATOR_DUMP_MSG_OFFSET             = 300;
 const int ANNOTATOR_DUMP_MSG_STYLE              = (0 + ANNOTATOR_DUMP_MSG_OFFSET);
 const int ANNOTATOR_DUMP_MSG_TYPES              = (1 + ANNOTATOR_DUMP_MSG_OFFSET);

 /* ----------------------------------------------------------------------- */
 /*       Implementation                                                    */
 /* ----------------------------------------------------------------------- */

 // Default Constructor

 AnnotatorDump::AnnotatorDump(void) :
     iv_clOutputStream() {}

 AnnotatorDump::~AnnotatorDump(void) {
   ;
 }

 TyErrorId
 AnnotatorDump::initialize(
   AnnotatorContext & rclAnnotatorContext) {
   TyErrorId                  tyErrId;
   string                     strFileName;
   int                     uiOutputStyle;


   // Default Values

   // in append mode all data in a session/collection is dumped into one file
   // otherwise the same dump file is deleted and rewritten for each document
   // in the session/collection
   // default is false
   iv_bAppendFile = false;

   // we don't dump the Document Buffer
   iv_bDumpDocBuffer = false;
   iv_bSaveDocBuffer = false;

   // the representation will be in Xml-Format
   iv_enOutputStyle = Xml;

   // Reading the Values from the Config-Section

   //Filename for the Output-Stream
   icu::UnicodeString us;
   tyErrId = rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_OUTFILE, us);

   // Convert filename to default encoding
   UnicodeStringRef usr(us);
   usr.extract(strFileName);

   if (tyErrId != UIMA_ERR_NONE) {
     getAnnotatorContext().getLogger().logError(
       _TEXT("Required option '" ANNOTATOR_DUMP_PARAM_OUTFILE "' not found"),
       (long)ANNOTATOR_DUMP_ERROR_OPEN);
     return(UIMA_ERR_USER_ANNOTATOR_CONFIG_INVALID_PARAM);
   }

   iv_clOutputFilename = strFileName.c_str();
   (void) rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_DUMP_DOCBUFFER, iv_bDumpDocBuffer);
   (void) rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_SAVE_DOCBUFFER, iv_bSaveDocBuffer);
   (void) rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_APPEND, iv_bAppendFile);

   // in append mode all data in a session/collection is dumped into one file
   // otherwise the same dump file is deleted and rewritten for each document
   // in the session/collection
   if (iv_bAppendFile) {
     tyErrId = openOutputFile();
     if (tyErrId != UIMA_ERR_NONE) {
       return tyErrId;
     }
   }

   //Output Style
   uiOutputStyle = 0;
   tyErrId = rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_STYLE,  uiOutputStyle);
   if (tyErrId != UIMA_ERR_CONFIG_OPTION_NOT_FOUND) {
     switch (uiOutputStyle) {
     case 0:
       iv_enOutputStyle = Xml;
       break;
     case 1:
       iv_enOutputStyle = XCas;
       break;
     default:
       getAnnotatorContext().getLogger().logWarning(
         "Invalid Output Style. Use Default",
         (long) ANNOTATOR_DUMP_MSG_STYLE);
       break;
     }
   }

   /*
      int iDummy;
      if (   rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_OUTPUT_TYPES, iDummy)){
        getAnnotatorContext().getLogger().logWarning((long) ANNOTATOR_DUMP_MSG_TYPES,
          _TEXT("Specification of output types currently not suported. All types will be dumped."));
      }
   */

   // Test getting a multi-valued parameter

   vector<string*> vecOutputTypes;
   tyErrId = rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_OUTPUT_TYPES,  vecOutputTypes);
   if (tyErrId != UIMA_ERR_CONFIG_OPTION_NOT_FOUND) {
     size_t i, len = 0;
     for (i=0; i<vecOutputTypes.size(); ++i) {
       string * rString = vecOutputTypes[i];
       len += rString->length();
     }
     cout << "  parameter OutputTypes has "<<i<<" values with a total of "<<len<<" characters." << endl;
     // Release contents of vector allocated by extractValue in case library uses a different heap.
     rclAnnotatorContext.release(vecOutputTypes);
   }

   // Release string buffer allocated by library in case it uses a different heap.
   usr.release(strFileName);

   return(TyErrorId)UIMA_ERR_NONE;
 }


 TyErrorId AnnotatorDump::typeSystemInit(uima::TypeSystem const & typeSystem) {

   // Test that can get all types in a vector
   std::vector<Type> allTypes;
   typeSystem.getAllTypes(allTypes);
   cout << "  typeSystem has " << allTypes.size() << " types -" << endl << "      from '"
   << allTypes[0].getName() << "' to '" << allTypes[allTypes.size()-1].getName() << "'" << endl;
   typeSystem.release(allTypes);                // Release storage allocated from library's heap

   std::vector<Feature> allFeats;
   typeSystem.getAllFeatures(allFeats);
   cout << "  typeSystem has " << allFeats.size() << " features -" << endl << "      from '"
   << allFeats[0].getName() << "' to '" << allFeats[allFeats.size()-1].getName() << "'" << endl;
   typeSystem.release(allFeats);                // Release storage allocated from library's heap

   return UIMA_ERR_NONE;
 }


 TyErrorId
 AnnotatorDump::openOutputFile( void ) {
   iv_clOutputStream.open(iv_clOutputFilename.getAsCString());     //overwrite

   if (iv_clOutputStream.good()) {
     return UIMA_ERR_NONE;
   }
   getAnnotatorContext().getLogger().logError(
     string("Failed to open Output File ")+ iv_clOutputFilename.getAsCString(),
     (long) ANNOTATOR_DUMP_ERROR_OPEN);
   return(UIMA_ERR_USER_ANNOTATOR_IO_WRITE_PROBLEM);
 }

 void
 AnnotatorDump::closeOutputFile( void ) {
   iv_clOutputStream.close();
 }

 TyErrorId
 AnnotatorDump::destroy(
 ) {
   if (iv_bAppendFile) {
     closeOutputFile();
   }

   return(TyErrorId)UIMA_ERR_NONE;
 }

 TyErrorId
 AnnotatorDump::reconfigure(
 ) {
   return(TyErrorId)UIMA_ERR_NONE;
 }

 TyErrorId
 AnnotatorDump::process(CAS & tcas,
                        const ResultSpecification &
                       ) {
   TyErrorId tyErrId;

   // in append mode all data in a session/collection is dumped into one file
   // otherwise the same dump file is deleted and rewritten for each document
   // in the session/collection
   if (!iv_bAppendFile) {
     tyErrId = openOutputFile();
     if (tyErrId != UIMA_ERR_NONE) {
       return tyErrId;
     }
   }

   assert(iv_clOutputStream.good());

   if (iv_bDumpDocBuffer) {
     //    Dumping the Document Buffer
     UnicodeStringRef doc = tcas.getDocumentText();

     outputDocBuffer(doc);
   }

   uima::CASWriterABase * writer = NULL;
   switch (iv_enOutputStyle) {
   case Xml:
     writer = new uima::XMLDumpWriter(tcas, iv_bDumpDocBuffer);
     break;
   case XCas:
     writer = new uima::XCASWriter(tcas, iv_bDumpDocBuffer);
     break;
   default:
     assert(false);
   }
   assert( EXISTS(writer) );
   unique_ptr<CASWriterABase> apWriter( writer );
   apWriter->write(iv_clOutputStream);

   // in append mode all data in a session/collection is dumped into one file
   // otherwise the same dump file is deleted and rewritten for each document
   // in the session/collection
   if (!iv_bAppendFile) {
     closeOutputFile();
   }

   return(TyErrorId)UIMA_ERR_NONE;
 }

 void AnnotatorDump::outputDocBuffer(UnicodeStringRef const & crclDoc) {
   assert(crclDoc.length() > 0);
   uima::util::Filename        clFilename(iv_clOutputFilename);
   ofstream                   clOutStream;
   TyDocIndex                 tyIndexFirst = 0;
   TyDocIndex                 tyIndexLast = crclDoc.length() - 1;

   clFilename.setNewExtension(_TEXT(".asc"));
   if (iv_bAppendFile) {
     clOutStream.open(clFilename.getAsCString(), ios::out | ios::app);   //append (create, resp.)
   } else {
     clOutStream.open(clFilename.getAsCString());    //overwrite
   }

   if (iv_clOutputStream.good()) {
     clOutStream << "Document Buffer Status:" << endl
     << "=======================" << endl;

     clOutStream << endl
     << "Document buffer length...........: " << crclDoc.length() << endl
     << "Document buffer size in memory...: " << (crclDoc.length() * sizeof(UChar)) << endl
     << "Document buffer index first......: " << tyIndexFirst << endl
     << "Document buffer index last.......: " << tyIndexLast << endl;

     clOutStream << endl
     << "Document Buffer Dump:" << endl
     << "=====================" << endl;
     assertWithMsg(sizeof(WORD16) == sizeof(UChar), "Port required");
     DUMPHEX(clOutStream, (WORD16 const *)crclDoc.getBuffer(), crclDoc.length());
     clOutStream << endl;
   }
   if (iv_bSaveDocBuffer) {
     ofstream                clOutStream;

     clFilename.setNewExtension(_TEXT(".ucs"));

     clOutStream.open(clFilename.getAsCString(), ios::binary);   //overwrite
     /** include byte-order-mark ??
        clOutStream << CosClConverterABase::getUCS2HostEndianId();
     **/
     clOutStream.write((const char *) crclDoc.getBuffer(), (crclDoc.length() * sizeof(UChar)));
   }
 }


 /* ----------------------------------------------------------------------- */
 /*   Mapping for generic C API wrapper                                     */
 /* ----------------------------------------------------------------------- */

 typedef AnnotatorDump UserDefinedAnnotator;
 // define for error/exception info in annotator_generic.inl
 #define UIMA_ANNOTATOR_NAME "annotator_dump"

 /* ----------------------------------------------------------------------- */
 /*   Include generic C API wrapper                                         */
 /* ----------------------------------------------------------------------- */

 ///#include "uima/annotator_generic.inl"
 MAKE_AE(AnnotatorDump);
 /* <EOF> */
	/** \file filename annotator_dump.cpp

	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.

	-------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------- */
	/* Include dependencies */
	/* ----------------------------------------------------------------------- */

	// this is included ONCE for the main source file of each binary
	#include "uima/pragmas.hpp"
	#include "uima/annotator_dump.hpp"
	#include "uima/xmlwriter.hpp"

	#include <iostream>
	#include <algorithm>
	#ifdef _MSC_VER
	#include <minmax.h> // for min
	#endif
	using namespace std;

	#include "uima/assertmsg.h"
	#include "uima/macros.h"
	#include "uima/trace.hpp"

	#include "uima/strconvert.hpp"
	//#include "uima/fixed_vector.hpp"

	//config parameter names
	/* ----------------------------------------------------------------------- */
	/* Constants */
	/* ----------------------------------------------------------------------- */

	#define ANNOTATOR_DUMP_PARAM_OUTFILE _TEXT("OutputFile")
	#define ANNOTATOR_DUMP_PARAM_APPEND _TEXT("AppendFile")
	#define ANNOTATOR_DUMP_PARAM_DUMP_DOCBUFFER _TEXT("DumpDocBuffer")
	#define ANNOTATOR_DUMP_PARAM_SAVE_DOCBUFFER _TEXT("SaveDocBuffer")
	#define ANNOTATOR_DUMP_PARAM_STYLE _TEXT("OutputStyle")
	#define ANNOTATOR_DUMP_OUTPUT_TYPES _TEXT("OutputTypes")

	//error codes

	const int ANNOTATOR_DUMP_ERROR_OFFSET = 100;
	const int ANNOTATOR_DUMP_ERROR_OPEN = (0 + ANNOTATOR_DUMP_ERROR_OFFSET);
	const int ANNOTATOR_DUMP_ERROR_AT = (1 + ANNOTATOR_DUMP_ERROR_OFFSET);

	const int ANNOTATOR_DUMP_WARN_OFFSET = 200;
	const int ANNOTATOR_DUMP_WARN_TET = (0 + ANNOTATOR_DUMP_WARN_OFFSET);

	const int ANNOTATOR_DUMP_MSG_OFFSET = 300;
	const int ANNOTATOR_DUMP_MSG_STYLE = (0 + ANNOTATOR_DUMP_MSG_OFFSET);
	const int ANNOTATOR_DUMP_MSG_TYPES = (1 + ANNOTATOR_DUMP_MSG_OFFSET);

	/* ----------------------------------------------------------------------- */
	/* Implementation */
	/* ----------------------------------------------------------------------- */

	// Default Constructor

	AnnotatorDump::AnnotatorDump(void) :
	iv_clOutputStream() {}

	AnnotatorDump::~AnnotatorDump(void) {
	;
	}

	TyErrorId
	AnnotatorDump::initialize(
	AnnotatorContext & rclAnnotatorContext) {
	TyErrorId tyErrId;
	string strFileName;
	int uiOutputStyle;


	// Default Values

	// in append mode all data in a session/collection is dumped into one file
	// otherwise the same dump file is deleted and rewritten for each document
	// in the session/collection
	// default is false
	iv_bAppendFile = false;

	// we don't dump the Document Buffer
	iv_bDumpDocBuffer = false;
	iv_bSaveDocBuffer = false;

	// the representation will be in Xml-Format
	iv_enOutputStyle = Xml;

	// Reading the Values from the Config-Section

	//Filename for the Output-Stream
	icu::UnicodeString us;
	tyErrId = rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_OUTFILE, us);

	// Convert filename to default encoding
	UnicodeStringRef usr(us);
	usr.extract(strFileName);

	if (tyErrId != UIMA_ERR_NONE) {
	getAnnotatorContext().getLogger().logError(
	_TEXT("Required option '" ANNOTATOR_DUMP_PARAM_OUTFILE "' not found"),
	(long)ANNOTATOR_DUMP_ERROR_OPEN);
	return(UIMA_ERR_USER_ANNOTATOR_CONFIG_INVALID_PARAM);
	}

	iv_clOutputFilename = strFileName.c_str();
	(void) rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_DUMP_DOCBUFFER, iv_bDumpDocBuffer);
	(void) rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_SAVE_DOCBUFFER, iv_bSaveDocBuffer);
	(void) rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_APPEND, iv_bAppendFile);

	// in append mode all data in a session/collection is dumped into one file
	// otherwise the same dump file is deleted and rewritten for each document
	// in the session/collection
	if (iv_bAppendFile) {
	tyErrId = openOutputFile();
	if (tyErrId != UIMA_ERR_NONE) {
	return tyErrId;
	}
	}

	//Output Style
	uiOutputStyle = 0;
	tyErrId = rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_PARAM_STYLE, uiOutputStyle);
	if (tyErrId != UIMA_ERR_CONFIG_OPTION_NOT_FOUND) {
	switch (uiOutputStyle) {
	case 0:
	iv_enOutputStyle = Xml;
	break;
	case 1:
	iv_enOutputStyle = XCas;
	break;
	default:
	getAnnotatorContext().getLogger().logWarning(
	"Invalid Output Style. Use Default",
	(long) ANNOTATOR_DUMP_MSG_STYLE);
	break;
	}
	}

	/*
	int iDummy;
	if ( rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_OUTPUT_TYPES, iDummy)){
	getAnnotatorContext().getLogger().logWarning((long) ANNOTATOR_DUMP_MSG_TYPES,
	_TEXT("Specification of output types currently not suported. All types will be dumped."));
	}
	*/

	// Test getting a multi-valued parameter

	vector<string*> vecOutputTypes;
	tyErrId = rclAnnotatorContext.extractValue(ANNOTATOR_DUMP_OUTPUT_TYPES, vecOutputTypes);
	if (tyErrId != UIMA_ERR_CONFIG_OPTION_NOT_FOUND) {
	size_t i, len = 0;
	for (i=0; i<vecOutputTypes.size(); ++i) {
	string * rString = vecOutputTypes[i];
	len += rString->length();
	}
	cout << " parameter OutputTypes has "<<i<<" values with a total of "<<len<<" characters." << endl;
	// Release contents of vector allocated by extractValue in case library uses a different heap.
	rclAnnotatorContext.release(vecOutputTypes);
	}

	// Release string buffer allocated by library in case it uses a different heap.
	usr.release(strFileName);

	return(TyErrorId)UIMA_ERR_NONE;
	}


	TyErrorId AnnotatorDump::typeSystemInit(uima::TypeSystem const & typeSystem) {

	// Test that can get all types in a vector
	std::vector<Type> allTypes;
	typeSystem.getAllTypes(allTypes);
	cout << " typeSystem has " << allTypes.size() << " types -" << endl << " from '"
	<< allTypes[0].getName() << "' to '" << allTypes[allTypes.size()-1].getName() << "'" << endl;
	typeSystem.release(allTypes); // Release storage allocated from library's heap

	std::vector<Feature> allFeats;
	typeSystem.getAllFeatures(allFeats);
	cout << " typeSystem has " << allFeats.size() << " features -" << endl << " from '"
	<< allFeats[0].getName() << "' to '" << allFeats[allFeats.size()-1].getName() << "'" << endl;
	typeSystem.release(allFeats); // Release storage allocated from library's heap

	return UIMA_ERR_NONE;
	}


	TyErrorId
	AnnotatorDump::openOutputFile( void ) {
	iv_clOutputStream.open(iv_clOutputFilename.getAsCString()); //overwrite

	if (iv_clOutputStream.good()) {
	return UIMA_ERR_NONE;
	}
	getAnnotatorContext().getLogger().logError(
	string("Failed to open Output File ")+ iv_clOutputFilename.getAsCString(),
	(long) ANNOTATOR_DUMP_ERROR_OPEN);
	return(UIMA_ERR_USER_ANNOTATOR_IO_WRITE_PROBLEM);
	}

	void
	AnnotatorDump::closeOutputFile( void ) {
	iv_clOutputStream.close();
	}

	TyErrorId
	AnnotatorDump::destroy(
	) {
	if (iv_bAppendFile) {
	closeOutputFile();
	}

	return(TyErrorId)UIMA_ERR_NONE;
	}

	TyErrorId
	AnnotatorDump::reconfigure(
	) {
	return(TyErrorId)UIMA_ERR_NONE;
	}

	TyErrorId
	AnnotatorDump::process(CAS & tcas,
	const ResultSpecification &
	) {
	TyErrorId tyErrId;

	// in append mode all data in a session/collection is dumped into one file
	// otherwise the same dump file is deleted and rewritten for each document
	// in the session/collection
	if (!iv_bAppendFile) {
	tyErrId = openOutputFile();
	if (tyErrId != UIMA_ERR_NONE) {
	return tyErrId;
	}
	}

	assert(iv_clOutputStream.good());

	if (iv_bDumpDocBuffer) {
	// Dumping the Document Buffer
	UnicodeStringRef doc = tcas.getDocumentText();

	outputDocBuffer(doc);
	}

	uima::CASWriterABase * writer = NULL;
	switch (iv_enOutputStyle) {
	case Xml:
	writer = new uima::XMLDumpWriter(tcas, iv_bDumpDocBuffer);
	break;
	case XCas:
	writer = new uima::XCASWriter(tcas, iv_bDumpDocBuffer);
	break;
	default:
	assert(false);
	}
	assert( EXISTS(writer) );
	unique_ptr<CASWriterABase> apWriter( writer );
	apWriter->write(iv_clOutputStream);

	// in append mode all data in a session/collection is dumped into one file
	// otherwise the same dump file is deleted and rewritten for each document
	// in the session/collection
	if (!iv_bAppendFile) {
	closeOutputFile();
	}

	return(TyErrorId)UIMA_ERR_NONE;
	}

	void AnnotatorDump::outputDocBuffer(UnicodeStringRef const & crclDoc) {
	assert(crclDoc.length() > 0);
	uima::util::Filename clFilename(iv_clOutputFilename);
	ofstream clOutStream;
	TyDocIndex tyIndexFirst = 0;
	TyDocIndex tyIndexLast = crclDoc.length() - 1;

	clFilename.setNewExtension(_TEXT(".asc"));
	if (iv_bAppendFile) {
	clOutStream.open(clFilename.getAsCString(), ios::out \| ios::app); //append (create, resp.)
	} else {
	clOutStream.open(clFilename.getAsCString()); //overwrite
	}

	if (iv_clOutputStream.good()) {
	clOutStream << "Document Buffer Status:" << endl
	<< "=======================" << endl;

	clOutStream << endl
	<< "Document buffer length...........: " << crclDoc.length() << endl
	<< "Document buffer size in memory...: " << (crclDoc.length() * sizeof(UChar)) << endl
	<< "Document buffer index first......: " << tyIndexFirst << endl
	<< "Document buffer index last.......: " << tyIndexLast << endl;

	clOutStream << endl
	<< "Document Buffer Dump:" << endl
	<< "=====================" << endl;
	assertWithMsg(sizeof(WORD16) == sizeof(UChar), "Port required");
	DUMPHEX(clOutStream, (WORD16 const *)crclDoc.getBuffer(), crclDoc.length());
	clOutStream << endl;
	}
	if (iv_bSaveDocBuffer) {
	ofstream clOutStream;

	clFilename.setNewExtension(_TEXT(".ucs"));

	clOutStream.open(clFilename.getAsCString(), ios::binary); //overwrite
	/** include byte-order-mark ??
	clOutStream << CosClConverterABase::getUCS2HostEndianId();
	**/
	clOutStream.write((const char ) crclDoc.getBuffer(), (crclDoc.length() sizeof(UChar)));
	}
	}


	/* ----------------------------------------------------------------------- */
	/* Mapping for generic C API wrapper */
	/* ----------------------------------------------------------------------- */

	typedef AnnotatorDump UserDefinedAnnotator;
	// define for error/exception info in annotator_generic.inl
	#define UIMA_ANNOTATOR_NAME "annotator_dump"

	/* ----------------------------------------------------------------------- */
	/* Include generic C API wrapper */
	/* ----------------------------------------------------------------------- */

	///#include "uima/annotator_generic.inl"
	MAKE_AE(AnnotatorDump);
	/* <EOF> */