src/test/src/test_language.cpp - uima-uimacpp - Git at Google

 /** @name uimatest_language.cpp

  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.

 -------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------- */
 /*       Include dependencies                                              */
 /* ----------------------------------------------------------------------- */

 #include <stdlib.h>
 #include <string.h>
 #include <iostream>
 #include <iomanip>
 using namespace std;

 #include "uima/assertmsg.h"
 #include "uima/consoleui.hpp"

 #include "uima/ftools.hpp"
 #include "uima/err_ids.h"
 #include "uima/strtools.hpp"
 #include "uima/strconvert.hpp"
 #include "uima/language.hpp"

 using namespace uima;

 /* ----------------------------------------------------------------------- */
 /*       Constants                                                         */
 /* ----------------------------------------------------------------------- */

 #define MAIN_TITLE            _TEXT("Language Class Unit Tester")

 const TCHAR * gs_szUsage = _TEXT("\t[--verbose]"
                                 );

 static const TCHAR *          gs_szHelp = _TEXT("\t"
     "Perform some test with the Language class.\n\t"
     "\n\t"
     "[--verbose]                 verbose display mode\n\t"
                                                );

 static bool                   gs_bVerbose = false;

 #define ASSERT(x) if (!(x)) { cerr << "FAILED ASSERTION '" << UIMA_STRINGIFY(x) \
  << "' in " << __FILE__ << " at line " << __LINE__ << endl; exit(1); }

 /* ----------------------------------------------------------------------- */
 /*       Types / Classes                                                   */
 /* ----------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------- */
 /*       Implementation                                                    */
 /* ----------------------------------------------------------------------- */

 void mainDisplay(util::ConsoleUI & rclConsole, Language & rclLang)
 /* ----------------------------------------------------------------------- */
 {
   char   s1[50];
   string str2;
   int id = rclLang.asNumber();
   sprintf(s1,"0x%08x %d",id,id);
   if (rclLang.hasLanguage())
     str2 = rclLang.getLanguageCode();
   else
     str2 = "**";
   if ( rclLang.hasTerritory() )
     str2 += " - " + (string)rclLang.getTerritoryCode();

   rclConsole.format(s1, str2.c_str());
 }

 // Display an array of Unicode characters
 void ucharDisplay(util::ConsoleUI & rclConsole, const char* tag, const UChar* ucbuff, int len) {
   char cbuf[1024];
   char* s = cbuf;
   *s++ = '\"';
   for ( int i = 0; i < len; ++i ) {
     if (ucbuff[i] < 128)
       s += sprintf(s,"%c",ucbuff[i]);
     else
       s += sprintf(s,"\\u%04x",ucbuff[i]);
   }
   *s++ = '\"';
   *s = '\0';
   rclConsole.format(tag, cbuf);
 }


 TyErrorId
 mainTest(
   util::ConsoleUI &       rclConsole,
   bool                  bVerbose
 ) {
   TyErrorId               enErrorId = UIMA_ERR_NONE;

   rclConsole.formatHeader(_TEXT("Performing language tests:"));

   Language clTestLang1("en-US");
   Language clTestLang2("En_us");
   Language clTestLang3("en-GB");
   Language clTestLang4("en");
   Language clTestLang5("fr");
   Language clTestLang6;

   Language clInvalid1("foo");
   Language clInvalid2("fu-bar");
   Language clInvalid3("f0-ba");
   Language clInvalid4("en us");
   Language clInvalid5("és-ca");

   ASSERT( !clInvalid1.isValid());
   ASSERT( !clInvalid2.isValid());
   ASSERT( !clInvalid3.isValid());
   ASSERT( !clInvalid4.isValid());
   ASSERT( !clInvalid5.isValid());
   ASSERT( clTestLang1.isValid());
   ASSERT( clTestLang3.hasTerritory());
   ASSERT(!clTestLang4.hasTerritory());

   mainDisplay ( rclConsole, clTestLang1 );
   mainDisplay ( rclConsole, clTestLang2 );
   mainDisplay ( rclConsole, clTestLang3 );
   mainDisplay ( rclConsole, clTestLang4 );
   mainDisplay ( rclConsole, clTestLang5 );
   mainDisplay ( rclConsole, clTestLang6 );

   rclConsole.format("construct tests", "OK");

   // Test the prefix-match concept, e.g. "en" matches "en-US"
   ASSERT(!clTestLang1.matches(clTestLang3));
   ASSERT( clTestLang1.matches(clTestLang2));
   ASSERT( clTestLang1.matches(clTestLang4));
   ASSERT(!clTestLang1.matches(clTestLang5));
   ASSERT( clTestLang1.matches(clTestLang6));

   rclConsole.format("match tests", "OK");

   // Test the comparison operators
   ASSERT(clTestLang1 == clTestLang2);
   ASSERT(clTestLang1 != clTestLang3);
   ASSERT(clTestLang4 < clTestLang3);
   ASSERT(clTestLang3 < clTestLang1);

   rclConsole.format("comparison tests", "OK");

   // Test the construction from the numeric form
   Language clTestLang7(clTestLang1.asNumber());
   ASSERT(clTestLang7 == clTestLang1);

   // Test the construction of one from just the language part of another
   Language clTestLang8(clTestLang1.getLanguageCode());
   ASSERT(clTestLang1.matches(clTestLang8));
   ASSERT(clTestLang8 == clTestLang4);

   // Same test but construct from the numeric form of the language
   Language clTestLang9(clTestLang1.getLanguage());
   ASSERT(clTestLang1.matches(clTestLang9));
   ASSERT(clTestLang9 == clTestLang4);

   rclConsole.format("numeric construct tests", "OK");

   //------------------------------------------------------
   // Test some UnicodeStringRef copy functions
   //------------------------------------------------------

   int len;
   UChar ucbuf[100];
   UErrorCode err;
   std::string ss;

   const char* cstr = "abcdefghijklmnopqrstuvyzyz 0123456789";
   icu::UnicodeString us1(cstr);
   UnicodeStringRef usr(us1);

   // into std::string with default conversion
   usr.extract(ss);
   if (bVerbose) rclConsole.format("extract to std::string",ss.c_str());
   ASSERT (strlen(cstr) == ss.length());

   // substring into icu::UnicodeString
   icu::UnicodeString us2("initialValue");
   icu::UnicodeString us3("z 0");
   usr.extractBetween(25,28,us2);               // Should be "z 0"
   if (bVerbose) ucharDisplay(rclConsole, "extractBetween into UChar buffer", us2.getBuffer(), us2.length());
   ASSERT ( us2 == us3 );

   // substring into UChar buffer
   err = U_ZERO_ERROR;
   len = us1.extract(ucbuf,100,err);         // Pre-fill buffer

   usr.extract(27,3,ucbuf,5);                // extract part of USR into the buffer
   icu::UnicodeString us4("abcde012ijklm");
   if (bVerbose) ucharDisplay(rclConsole, "extract into UChar buffer", ucbuf, 13);
   ASSERT ( us4.compare(ucbuf,13) == 0 );

   // extract into too-small UChar buffer
   err = U_ZERO_ERROR;
   len = usr.extract(ucbuf,36,err);                // too small
   ASSERT(err == U_BUFFER_OVERFLOW_ERROR);
   err = U_ZERO_ERROR;
   len = usr.extract(ucbuf,37,err);                // no room for final 0
   ASSERT(err == U_STRING_NOT_TERMINATED_WARNING);
   err = U_ZERO_ERROR;
   len = usr.extract(ucbuf,38,err);                // just right
   ASSERT(err == U_ZERO_ERROR);
   if (bVerbose) ucharDisplay(rclConsole, "extract into UChar buffer", ucbuf, len);
   ASSERT ( us1.compare(ucbuf,len) == 0 );


   //------------------------------------------------------
   // Test some UnicodeStringRef conversion functions
   //------------------------------------------------------

   // Create a icu::UnicodeString with 8 Arabic characters followed by a blank.
   // Also get the utf-8 form as a reference
   UChar uc[] = {0x062c, 0x0648, 0x0631, 0x062c, 0x062a, 0x0627, 0x0648, 0x0646, 0x0020};
   int nchars = sizeof(uc)/sizeof(UChar);
   icu::UnicodeString US1(uc, nchars);
   char u8[100];
   US1.extract(0, US1.length(), u8, 100, "utf-8");

   // Create two UnicodeStringRef and compare them
   UnicodeStringRef USR1(US1);
   UnicodeStringRef USR2(uc, nchars);
   if (bVerbose) ucharDisplay(rclConsole, "Construct from icu::UnicodeString", USR1.getBuffer(), USR1.length());
   if (bVerbose) ucharDisplay(rclConsole, "Construct from UChar array",   USR2.getBuffer(), USR2.length());
   ASSERT ( USR1.compare(US1) == 0 );

   // Extract into a buffer using the utf-8 converter
   char cbuf[100];
   *cbuf = 0;
   len = USR1.extract(0, USR1.length(), cbuf, 100, "utf-8");
   if (bVerbose) rclConsole.format("extract into buffer with utf-8 converter", cbuf);
   ASSERT ( strcmp(u8,cbuf) == 0 );

   // Extract into a string using the utf-8 converter
   USR1.extract(ss, "utf-8");
   if (bVerbose) rclConsole.format("extract into string with utf-8 converter", ss.c_str());
   ASSERT ( strlen(u8) == ss.length() );
   ASSERT ( strncmp(u8,ss.data(),strlen(u8)) == 0 );

   // Test the "re-try when overflows" logic in unistrref.cpp
   // Create a string that converts to >255 chars
   icu::UnicodeString US2(uc, nchars);
   for ( int i=0; i < 15; ++i )
     US2.append(uc, nchars);

   // Extract the 12th repeat
   UnicodeStringRef USR3(US2);
   USR3.extract(11*nchars, nchars, ss, "utf-8");
   if (bVerbose) rclConsole.format("extract part into string with utf-8 converter", ss.c_str());
   ASSERT ( strlen(u8) == ss.length() );

   // Extract all to string with converter
   USR3.extract(ss, "utf-8");
   ASSERT ( 16*strlen(u8) == ss.length() );

   // Explicit extract to utf-8 (no ICU converter)
   std::string ss2;
   USR3.extractUTF8(ss2);
   ASSERT ( ss2 == ss );

   // Convert to utf-8 (no ICU converter)
   std::string ss3 = USR3.asUTF8();
   ASSERT ( ss3 == ss );

   rclConsole.format("UnicodeStringRef tests", "OK");

   return enErrorId;
 }

 int main(int argc, char * argv[]) /*
 ---------------------------------- */
 {
   util::ConsoleUI         clConsole(argc, argv, MAIN_TITLE, "");
   TyErrorId               enErrorId;

   clConsole.handleUsageHelp(gs_szUsage, gs_szHelp);
   gs_bVerbose = clConsole.hasArgSwitch(_TEXT("verbose"));

   enErrorId = mainTest(clConsole, gs_bVerbose);

   if (enErrorId == UIMA_ERR_NONE) {
     clConsole.info(_TEXT("The program terminated successfully."));
   } else {
     clConsole.info(_TEXT("The program terminated with an error."));
   }
   return(int) enErrorId;
 } //lint !e529 rclResMgr not subsequently referenced

 /* <EOF> */
	/** @name uimatest_language.cpp

	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.

	-------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------- */
	/* Include dependencies */
	/* ----------------------------------------------------------------------- */

	#include <stdlib.h>
	#include <string.h>
	#include <iostream>
	#include <iomanip>
	using namespace std;

	#include "uima/assertmsg.h"
	#include "uima/consoleui.hpp"

	#include "uima/ftools.hpp"
	#include "uima/err_ids.h"
	#include "uima/strtools.hpp"
	#include "uima/strconvert.hpp"
	#include "uima/language.hpp"

	using namespace uima;

	/* ----------------------------------------------------------------------- */
	/* Constants */
	/* ----------------------------------------------------------------------- */

	#define MAIN_TITLE _TEXT("Language Class Unit Tester")

	const TCHAR * gs_szUsage = _TEXT("\t[--verbose]"
	);

	static const TCHAR * gs_szHelp = _TEXT("\t"
	"Perform some test with the Language class.\n\t"
	"\n\t"
	"[--verbose] verbose display mode\n\t"
	);

	static bool gs_bVerbose = false;

	#define ASSERT(x) if (!(x)) { cerr << "FAILED ASSERTION '" << UIMA_STRINGIFY(x) \
	<< "' in " << __FILE__ << " at line " << __LINE__ << endl; exit(1); }

	/* ----------------------------------------------------------------------- */
	/* Types / Classes */
	/* ----------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------- */
	/* Implementation */
	/* ----------------------------------------------------------------------- */

	void mainDisplay(util::ConsoleUI & rclConsole, Language & rclLang)
	/* ----------------------------------------------------------------------- */
	{
	char s1[50];
	string str2;
	int id = rclLang.asNumber();
	sprintf(s1,"0x%08x %d",id,id);
	if (rclLang.hasLanguage())
	str2 = rclLang.getLanguageCode();
	else
	str2 = "**";
	if ( rclLang.hasTerritory() )
	str2 += " - " + (string)rclLang.getTerritoryCode();

	rclConsole.format(s1, str2.c_str());
	}

	// Display an array of Unicode characters
	void ucharDisplay(util::ConsoleUI & rclConsole, const char* tag, const UChar* ucbuff, int len) {
	char cbuf[1024];
	char* s = cbuf;
	*s++ = '\"';
	for ( int i = 0; i < len; ++i ) {
	if (ucbuff[i] < 128)
	s += sprintf(s,"%c",ucbuff[i]);
	else
	s += sprintf(s,"\\u%04x",ucbuff[i]);
	}
	*s++ = '\"';
	*s = '\0';
	rclConsole.format(tag, cbuf);
	}


	TyErrorId
	mainTest(
	util::ConsoleUI & rclConsole,
	bool bVerbose
	) {
	TyErrorId enErrorId = UIMA_ERR_NONE;

	rclConsole.formatHeader(_TEXT("Performing language tests:"));

	Language clTestLang1("en-US");
	Language clTestLang2("En_us");
	Language clTestLang3("en-GB");
	Language clTestLang4("en");
	Language clTestLang5("fr");
	Language clTestLang6;

	Language clInvalid1("foo");
	Language clInvalid2("fu-bar");
	Language clInvalid3("f0-ba");
	Language clInvalid4("en us");
	Language clInvalid5("és-ca");

	ASSERT( !clInvalid1.isValid());
	ASSERT( !clInvalid2.isValid());
	ASSERT( !clInvalid3.isValid());
	ASSERT( !clInvalid4.isValid());
	ASSERT( !clInvalid5.isValid());
	ASSERT( clTestLang1.isValid());
	ASSERT( clTestLang3.hasTerritory());
	ASSERT(!clTestLang4.hasTerritory());

	mainDisplay ( rclConsole, clTestLang1 );
	mainDisplay ( rclConsole, clTestLang2 );
	mainDisplay ( rclConsole, clTestLang3 );
	mainDisplay ( rclConsole, clTestLang4 );
	mainDisplay ( rclConsole, clTestLang5 );
	mainDisplay ( rclConsole, clTestLang6 );

	rclConsole.format("construct tests", "OK");

	// Test the prefix-match concept, e.g. "en" matches "en-US"
	ASSERT(!clTestLang1.matches(clTestLang3));
	ASSERT( clTestLang1.matches(clTestLang2));
	ASSERT( clTestLang1.matches(clTestLang4));
	ASSERT(!clTestLang1.matches(clTestLang5));
	ASSERT( clTestLang1.matches(clTestLang6));

	rclConsole.format("match tests", "OK");

	// Test the comparison operators
	ASSERT(clTestLang1 == clTestLang2);
	ASSERT(clTestLang1 != clTestLang3);
	ASSERT(clTestLang4 < clTestLang3);
	ASSERT(clTestLang3 < clTestLang1);

	rclConsole.format("comparison tests", "OK");

	// Test the construction from the numeric form
	Language clTestLang7(clTestLang1.asNumber());
	ASSERT(clTestLang7 == clTestLang1);

	// Test the construction of one from just the language part of another
	Language clTestLang8(clTestLang1.getLanguageCode());
	ASSERT(clTestLang1.matches(clTestLang8));
	ASSERT(clTestLang8 == clTestLang4);

	// Same test but construct from the numeric form of the language
	Language clTestLang9(clTestLang1.getLanguage());
	ASSERT(clTestLang1.matches(clTestLang9));
	ASSERT(clTestLang9 == clTestLang4);

	rclConsole.format("numeric construct tests", "OK");

	//------------------------------------------------------
	// Test some UnicodeStringRef copy functions
	//------------------------------------------------------

	int len;
	UChar ucbuf[100];
	UErrorCode err;
	std::string ss;

	const char* cstr = "abcdefghijklmnopqrstuvyzyz 0123456789";
	icu::UnicodeString us1(cstr);
	UnicodeStringRef usr(us1);

	// into std::string with default conversion
	usr.extract(ss);
	if (bVerbose) rclConsole.format("extract to std::string",ss.c_str());
	ASSERT (strlen(cstr) == ss.length());

	// substring into icu::UnicodeString
	icu::UnicodeString us2("initialValue");
	icu::UnicodeString us3("z 0");
	usr.extractBetween(25,28,us2); // Should be "z 0"
	if (bVerbose) ucharDisplay(rclConsole, "extractBetween into UChar buffer", us2.getBuffer(), us2.length());
	ASSERT ( us2 == us3 );

	// substring into UChar buffer
	err = U_ZERO_ERROR;
	len = us1.extract(ucbuf,100,err); // Pre-fill buffer

	usr.extract(27,3,ucbuf,5); // extract part of USR into the buffer
	icu::UnicodeString us4("abcde012ijklm");
	if (bVerbose) ucharDisplay(rclConsole, "extract into UChar buffer", ucbuf, 13);
	ASSERT ( us4.compare(ucbuf,13) == 0 );

	// extract into too-small UChar buffer
	err = U_ZERO_ERROR;
	len = usr.extract(ucbuf,36,err); // too small
	ASSERT(err == U_BUFFER_OVERFLOW_ERROR);
	err = U_ZERO_ERROR;
	len = usr.extract(ucbuf,37,err); // no room for final 0
	ASSERT(err == U_STRING_NOT_TERMINATED_WARNING);
	err = U_ZERO_ERROR;
	len = usr.extract(ucbuf,38,err); // just right
	ASSERT(err == U_ZERO_ERROR);
	if (bVerbose) ucharDisplay(rclConsole, "extract into UChar buffer", ucbuf, len);
	ASSERT ( us1.compare(ucbuf,len) == 0 );


	//------------------------------------------------------
	// Test some UnicodeStringRef conversion functions
	//------------------------------------------------------

	// Create a icu::UnicodeString with 8 Arabic characters followed by a blank.
	// Also get the utf-8 form as a reference
	UChar uc[] = {0x062c, 0x0648, 0x0631, 0x062c, 0x062a, 0x0627, 0x0648, 0x0646, 0x0020};
	int nchars = sizeof(uc)/sizeof(UChar);
	icu::UnicodeString US1(uc, nchars);
	char u8[100];
	US1.extract(0, US1.length(), u8, 100, "utf-8");

	// Create two UnicodeStringRef and compare them
	UnicodeStringRef USR1(US1);
	UnicodeStringRef USR2(uc, nchars);
	if (bVerbose) ucharDisplay(rclConsole, "Construct from icu::UnicodeString", USR1.getBuffer(), USR1.length());
	if (bVerbose) ucharDisplay(rclConsole, "Construct from UChar array", USR2.getBuffer(), USR2.length());
	ASSERT ( USR1.compare(US1) == 0 );

	// Extract into a buffer using the utf-8 converter
	char cbuf[100];
	*cbuf = 0;
	len = USR1.extract(0, USR1.length(), cbuf, 100, "utf-8");
	if (bVerbose) rclConsole.format("extract into buffer with utf-8 converter", cbuf);
	ASSERT ( strcmp(u8,cbuf) == 0 );

	// Extract into a string using the utf-8 converter
	USR1.extract(ss, "utf-8");
	if (bVerbose) rclConsole.format("extract into string with utf-8 converter", ss.c_str());
	ASSERT ( strlen(u8) == ss.length() );
	ASSERT ( strncmp(u8,ss.data(),strlen(u8)) == 0 );

	// Test the "re-try when overflows" logic in unistrref.cpp
	// Create a string that converts to >255 chars
	icu::UnicodeString US2(uc, nchars);
	for ( int i=0; i < 15; ++i )
	US2.append(uc, nchars);

	// Extract the 12th repeat
	UnicodeStringRef USR3(US2);
	USR3.extract(11*nchars, nchars, ss, "utf-8");
	if (bVerbose) rclConsole.format("extract part into string with utf-8 converter", ss.c_str());
	ASSERT ( strlen(u8) == ss.length() );

	// Extract all to string with converter
	USR3.extract(ss, "utf-8");
	ASSERT ( 16*strlen(u8) == ss.length() );

	// Explicit extract to utf-8 (no ICU converter)
	std::string ss2;
	USR3.extractUTF8(ss2);
	ASSERT ( ss2 == ss );

	// Convert to utf-8 (no ICU converter)
	std::string ss3 = USR3.asUTF8();
	ASSERT ( ss3 == ss );

	rclConsole.format("UnicodeStringRef tests", "OK");

	return enErrorId;
	}

	int main(int argc, char * argv[]) /*
	---------------------------------- */
	{
	util::ConsoleUI clConsole(argc, argv, MAIN_TITLE, "");
	TyErrorId enErrorId;

	clConsole.handleUsageHelp(gs_szUsage, gs_szHelp);
	gs_bVerbose = clConsole.hasArgSwitch(_TEXT("verbose"));

	enErrorId = mainTest(clConsole, gs_bVerbose);

	if (enErrorId == UIMA_ERR_NONE) {
	clConsole.info(_TEXT("The program terminated successfully."));
	} else {
	clConsole.info(_TEXT("The program terminated with an error."));
	}
	return(int) enErrorId;
	} //lint !e529 rclResMgr not subsequently referenced

	/* <EOF> */