blob: 3e740703be9ddc5651aaa765c6e7acc8ccd6bd46 [file] [log] [blame]
/** @name uimatest_language.cpp
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Include dependencies */
/* ----------------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <iomanip>
using namespace std;
#include "uima/assertmsg.h"
#include "uima/consoleui.hpp"
#include "uima/ftools.hpp"
#include "uima/err_ids.h"
#include "uima/strtools.hpp"
#include "uima/strconvert.hpp"
#include "uima/language.hpp"
using namespace uima;
/* ----------------------------------------------------------------------- */
/* Constants */
/* ----------------------------------------------------------------------- */
#define MAIN_TITLE _TEXT("Language Class Unit Tester")
const TCHAR * gs_szUsage = _TEXT("\t[--verbose]"
);
static const TCHAR * gs_szHelp = _TEXT("\t"
"Perform some test with the Language class.\n\t"
"\n\t"
"[--verbose] verbose display mode\n\t"
);
static bool gs_bVerbose = false;
#define ASSERT(x) if (!(x)) { cerr << "FAILED ASSERTION '" << UIMA_STRINGIFY(x) \
<< "' in " << __FILE__ << " at line " << __LINE__ << endl; exit(1); }
/* ----------------------------------------------------------------------- */
/* Types / Classes */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
void mainDisplay(util::ConsoleUI & rclConsole, Language & rclLang)
/* ----------------------------------------------------------------------- */
{
char s1[50];
string str2;
int id = rclLang.asNumber();
sprintf(s1,"0x%08x %d",id,id);
if (rclLang.hasLanguage())
str2 = rclLang.getLanguageCode();
else
str2 = "**";
if ( rclLang.hasTerritory() )
str2 += " - " + (string)rclLang.getTerritoryCode();
rclConsole.format(s1, str2.c_str());
}
// Display an array of Unicode characters
void ucharDisplay(util::ConsoleUI & rclConsole, const char* tag, const UChar* ucbuff, int len) {
char cbuf[1024];
char* s = cbuf;
*s++ = '\"';
for ( int i = 0; i < len; ++i ) {
if (ucbuff[i] < 128)
s += sprintf(s,"%c",ucbuff[i]);
else
s += sprintf(s,"\\u%04x",ucbuff[i]);
}
*s++ = '\"';
*s = '\0';
rclConsole.format(tag, cbuf);
}
TyErrorId
mainTest(
util::ConsoleUI & rclConsole,
bool bVerbose
) {
TyErrorId enErrorId = UIMA_ERR_NONE;
rclConsole.formatHeader(_TEXT("Performing language tests:"));
Language clTestLang1("en-US");
Language clTestLang2("En_us");
Language clTestLang3("en-GB");
Language clTestLang4("en");
Language clTestLang5("fr");
Language clTestLang6;
Language clInvalid1("foo");
Language clInvalid2("fu-bar");
Language clInvalid3("f0-ba");
Language clInvalid4("en us");
Language clInvalid5("és-ca");
ASSERT( !clInvalid1.isValid());
ASSERT( !clInvalid2.isValid());
ASSERT( !clInvalid3.isValid());
ASSERT( !clInvalid4.isValid());
ASSERT( !clInvalid5.isValid());
ASSERT( clTestLang1.isValid());
ASSERT( clTestLang3.hasTerritory());
ASSERT(!clTestLang4.hasTerritory());
mainDisplay ( rclConsole, clTestLang1 );
mainDisplay ( rclConsole, clTestLang2 );
mainDisplay ( rclConsole, clTestLang3 );
mainDisplay ( rclConsole, clTestLang4 );
mainDisplay ( rclConsole, clTestLang5 );
mainDisplay ( rclConsole, clTestLang6 );
rclConsole.format("construct tests", "OK");
// Test the prefix-match concept, e.g. "en" matches "en-US"
ASSERT(!clTestLang1.matches(clTestLang3));
ASSERT( clTestLang1.matches(clTestLang2));
ASSERT( clTestLang1.matches(clTestLang4));
ASSERT(!clTestLang1.matches(clTestLang5));
ASSERT( clTestLang1.matches(clTestLang6));
rclConsole.format("match tests", "OK");
// Test the comparison operators
ASSERT(clTestLang1 == clTestLang2);
ASSERT(clTestLang1 != clTestLang3);
ASSERT(clTestLang4 < clTestLang3);
ASSERT(clTestLang3 < clTestLang1);
rclConsole.format("comparison tests", "OK");
// Test the construction from the numeric form
Language clTestLang7(clTestLang1.asNumber());
ASSERT(clTestLang7 == clTestLang1);
// Test the construction of one from just the language part of another
Language clTestLang8(clTestLang1.getLanguageCode());
ASSERT(clTestLang1.matches(clTestLang8));
ASSERT(clTestLang8 == clTestLang4);
// Same test but construct from the numeric form of the language
Language clTestLang9(clTestLang1.getLanguage());
ASSERT(clTestLang1.matches(clTestLang9));
ASSERT(clTestLang9 == clTestLang4);
rclConsole.format("numeric construct tests", "OK");
//------------------------------------------------------
// Test some UnicodeStringRef copy functions
//------------------------------------------------------
int len;
UChar ucbuf[100];
UErrorCode err;
std::string ss;
const char* cstr = "abcdefghijklmnopqrstuvyzyz 0123456789";
icu::UnicodeString us1(cstr);
UnicodeStringRef usr(us1);
// into std::string with default conversion
usr.extract(ss);
if (bVerbose) rclConsole.format("extract to std::string",ss.c_str());
ASSERT (strlen(cstr) == ss.length());
// substring into icu::UnicodeString
icu::UnicodeString us2("initialValue");
icu::UnicodeString us3("z 0");
usr.extractBetween(25,28,us2); // Should be "z 0"
if (bVerbose) ucharDisplay(rclConsole, "extractBetween into UChar buffer", us2.getBuffer(), us2.length());
ASSERT ( us2 == us3 );
// substring into UChar buffer
err = U_ZERO_ERROR;
len = us1.extract(ucbuf,100,err); // Pre-fill buffer
usr.extract(27,3,ucbuf,5); // extract part of USR into the buffer
icu::UnicodeString us4("abcde012ijklm");
if (bVerbose) ucharDisplay(rclConsole, "extract into UChar buffer", ucbuf, 13);
ASSERT ( us4.compare(ucbuf,13) == 0 );
// extract into too-small UChar buffer
err = U_ZERO_ERROR;
len = usr.extract(ucbuf,36,err); // too small
ASSERT(err == U_BUFFER_OVERFLOW_ERROR);
err = U_ZERO_ERROR;
len = usr.extract(ucbuf,37,err); // no room for final 0
ASSERT(err == U_STRING_NOT_TERMINATED_WARNING);
err = U_ZERO_ERROR;
len = usr.extract(ucbuf,38,err); // just right
ASSERT(err == U_ZERO_ERROR);
if (bVerbose) ucharDisplay(rclConsole, "extract into UChar buffer", ucbuf, len);
ASSERT ( us1.compare(ucbuf,len) == 0 );
//------------------------------------------------------
// Test some UnicodeStringRef conversion functions
//------------------------------------------------------
// Create a icu::UnicodeString with 8 Arabic characters followed by a blank.
// Also get the utf-8 form as a reference
UChar uc[] = {0x062c, 0x0648, 0x0631, 0x062c, 0x062a, 0x0627, 0x0648, 0x0646, 0x0020};
int nchars = sizeof(uc)/sizeof(UChar);
icu::UnicodeString US1(uc, nchars);
char u8[100];
US1.extract(0, US1.length(), u8, 100, "utf-8");
// Create two UnicodeStringRef and compare them
UnicodeStringRef USR1(US1);
UnicodeStringRef USR2(uc, nchars);
if (bVerbose) ucharDisplay(rclConsole, "Construct from icu::UnicodeString", USR1.getBuffer(), USR1.length());
if (bVerbose) ucharDisplay(rclConsole, "Construct from UChar array", USR2.getBuffer(), USR2.length());
ASSERT ( USR1.compare(US1) == 0 );
// Extract into a buffer using the utf-8 converter
char cbuf[100];
*cbuf = 0;
len = USR1.extract(0, USR1.length(), cbuf, 100, "utf-8");
if (bVerbose) rclConsole.format("extract into buffer with utf-8 converter", cbuf);
ASSERT ( strcmp(u8,cbuf) == 0 );
// Extract into a string using the utf-8 converter
USR1.extract(ss, "utf-8");
if (bVerbose) rclConsole.format("extract into string with utf-8 converter", ss.c_str());
ASSERT ( strlen(u8) == ss.length() );
ASSERT ( strncmp(u8,ss.data(),strlen(u8)) == 0 );
// Test the "re-try when overflows" logic in unistrref.cpp
// Create a string that converts to >255 chars
icu::UnicodeString US2(uc, nchars);
for ( int i=0; i < 15; ++i )
US2.append(uc, nchars);
// Extract the 12th repeat
UnicodeStringRef USR3(US2);
USR3.extract(11*nchars, nchars, ss, "utf-8");
if (bVerbose) rclConsole.format("extract part into string with utf-8 converter", ss.c_str());
ASSERT ( strlen(u8) == ss.length() );
// Extract all to string with converter
USR3.extract(ss, "utf-8");
ASSERT ( 16*strlen(u8) == ss.length() );
// Explicit extract to utf-8 (no ICU converter)
std::string ss2;
USR3.extractUTF8(ss2);
ASSERT ( ss2 == ss );
// Convert to utf-8 (no ICU converter)
std::string ss3 = USR3.asUTF8();
ASSERT ( ss3 == ss );
rclConsole.format("UnicodeStringRef tests", "OK");
return enErrorId;
}
int main(int argc, char * argv[]) /*
---------------------------------- */
{
util::ConsoleUI clConsole(argc, argv, MAIN_TITLE, "");
TyErrorId enErrorId;
clConsole.handleUsageHelp(gs_szUsage, gs_szHelp);
gs_bVerbose = clConsole.hasArgSwitch(_TEXT("verbose"));
enErrorId = mainTest(clConsole, gs_bVerbose);
if (enErrorId == UIMA_ERR_NONE) {
clConsole.info(_TEXT("The program terminated successfully."));
} else {
clConsole.info(_TEXT("The program terminated with an error."));
}
return(int) enErrorId;
} //lint !e529 rclResMgr not subsequently referenced
/* <EOF> */