blob: f9bfe99cd08bb2b7b43590a9ba80d41ff40c51af [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_i18npool.hxx"
#include <cclass_unicode.hxx>
#include <com/sun/star/i18n/UnicodeScript.hpp>
#include <com/sun/star/i18n/UnicodeType.hpp>
#include <com/sun/star/i18n/KCharacterType.hpp>
#include <unicode/uchar.h>
#include <i18nutil/x_rtl_ustring.h>
#include <breakiteratorImpl.hxx>
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::lang;
using namespace ::rtl;
namespace com { namespace sun { namespace star { namespace i18n {
// ----------------------------------------------------
// class cclass_Unicode
// ----------------------------------------------------;
cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
pTable( NULL ),
pStart( NULL ),
pCont( NULL ),
nStartTypes( 0 ),
nContTypes( 0 ),
eState( ssGetChar ),
cGroupSep( ',' ),
cDecimalSep( '.' )
{
trans = new Transliteration_casemapping();
cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
}
cclass_Unicode::~cclass_Unicode() {
destroyParserTable();
delete trans;
}
OUString SAL_CALL
cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
sal_Int32 len = Text.getLength();
if (nPos >= len)
return OUString();
if (nCount + nPos > len)
nCount = len - nPos;
trans->setMappingType(MappingTypeToUpper, rLocale);
return trans->transliterateString2String(Text, nPos, nCount);
}
OUString SAL_CALL
cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
sal_Int32 len = Text.getLength();
if (nPos >= len)
return OUString();
if (nCount + nPos > len)
nCount = len - nPos;
trans->setMappingType(MappingTypeToLower, rLocale);
return trans->transliterateString2String(Text, nPos, nCount);
}
OUString SAL_CALL
cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
sal_Int32 len = Text.getLength();
if (nPos >= len)
return OUString();
if (nCount + nPos > len)
nCount = len - nPos;
trans->setMappingType(MappingTypeToTitle, rLocale);
rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
sal_Unicode* out = pStr->buffer;
BreakIteratorImpl brk(xMSF);
Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
if (i >= bdy.endPos)
bdy = brk.nextWord(Text, bdy.endPos, rLocale,
WordType::ANYWORD_IGNOREWHITESPACES);
*out = (i == bdy.startPos) ?
trans->transliterateChar2Char(Text[i]) : Text[i];
}
*out = 0;
return OUString( pStr, SAL_NO_ACQUIRE );
}
sal_Int16 SAL_CALL
cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
}
sal_Int16 SAL_CALL
cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
}
sal_Int16 SAL_CALL
cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
// ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
// while OO.o enum UnicideScript starts from 0.
// To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
}
sal_Int32 SAL_CALL
cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
using namespace ::com::sun::star::i18n::KCharacterType;
sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
switch ( u_charType(ch) ) {
// Upper
case U_UPPERCASE_LETTER :
return UPPER|LETTER|PRINTABLE|BASE_FORM;
// Lower
case U_LOWERCASE_LETTER :
return LOWER|LETTER|PRINTABLE|BASE_FORM;
// Title
case U_TITLECASE_LETTER :
return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
// Letter
case U_MODIFIER_LETTER :
case U_OTHER_LETTER :
return LETTER|PRINTABLE|BASE_FORM;
// Digit
case U_DECIMAL_DIGIT_NUMBER:
case U_LETTER_NUMBER:
case U_OTHER_NUMBER:
return DIGIT|PRINTABLE|BASE_FORM;
// Base
case U_NON_SPACING_MARK:
case U_ENCLOSING_MARK:
case U_COMBINING_SPACING_MARK:
return BASE_FORM|PRINTABLE;
// Print
case U_SPACE_SEPARATOR:
case U_DASH_PUNCTUATION:
case U_INITIAL_PUNCTUATION:
case U_FINAL_PUNCTUATION:
case U_CONNECTOR_PUNCTUATION:
case U_OTHER_PUNCTUATION:
case U_MATH_SYMBOL:
case U_CURRENCY_SYMBOL:
case U_MODIFIER_SYMBOL:
case U_OTHER_SYMBOL:
return PRINTABLE;
// Control
case U_CONTROL_CHAR:
case U_FORMAT_CHAR:
return CONTROL;
case U_LINE_SEPARATOR:
case U_PARAGRAPH_SEPARATOR:
return CONTROL|PRINTABLE;
// for all others
default:
return U_GENERAL_OTHER_TYPES;
}
}
sal_Int32 SAL_CALL
cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
return getCharType(Text, &nPos, 0);
}
sal_Int32 SAL_CALL
cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
sal_Int32 result = getCharType(Text, &nPos, 0);
for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
result |= getCharType(Text, &nPos, 1);
return result;
}
ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
const OUString& Text,
sal_Int32 nPos,
const Locale& rLocale,
sal_Int32 startCharTokenType,
const OUString& userDefinedCharactersStart,
sal_Int32 contCharTokenType,
const OUString& userDefinedCharactersCont )
throw(RuntimeException)
{
ParseResult r;
if ( Text.getLength() <= nPos )
return r;
setupParserTable( rLocale,
startCharTokenType, userDefinedCharactersStart,
contCharTokenType, userDefinedCharactersCont );
parseText( r, Text, nPos );
return r;
}
ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
sal_Int32 nTokenType,
const OUString& Text,
sal_Int32 nPos,
const Locale& rLocale,
sal_Int32 startCharTokenType,
const OUString& userDefinedCharactersStart,
sal_Int32 contCharTokenType,
const OUString& userDefinedCharactersCont )
throw(RuntimeException)
{
ParseResult r;
if ( Text.getLength() <= nPos )
return r;
setupParserTable( rLocale,
startCharTokenType, userDefinedCharactersStart,
contCharTokenType, userDefinedCharactersCont );
parseText( r, Text, nPos, nTokenType );
return r;
}
OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
{
return OUString::createFromAscii(cClass);
}
sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
{
return !rServiceName.compareToAscii(cClass);
}
Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
{
Sequence< OUString > aRet(1);
aRet[0] = OUString::createFromAscii(cClass);
return aRet;
}
} } } }