blob: 9ca7f25450ddbc108a37a02c7302180111f0bbc3 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
// MARKER( autogen include statement, do not remove
#include "precompiled_lnth.hxx"
#include <com/sun/star/uno/Reference.h>
#include <cppuhelper/factory.hxx> // helper for factories
#include <com/sun/star/registry/XRegistryKey.hpp>
#include <com/sun/star/beans/XPropertySet.hpp>
#include <i18npool/mslangid.hxx>
#include <tools/debug.hxx>
#include <unotools/processfactory.hxx>
#include <osl/mutex.hxx>
#include <unotools/pathoptions.hxx>
#include <unotools/lingucfg.hxx>
#include <rtl/string.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/textenc.h>
#include "nthesimp.hxx"
#include <linguistic/misc.hxx>
#include <linguistic/lngprops.hxx>
#include "nthesdta.hxx"
#include <list>
#include <set>
#include <string.h>
// values assigned to capitalization types
// XML-header to query SPELLML support
#define SPELLML_SUPPORT "<?xml?>"
using namespace utl;
using namespace osl;
using namespace rtl;
using namespace com::sun::star;
using namespace com::sun::star::beans;
using namespace com::sun::star::lang;
using namespace com::sun::star::uno;
using namespace com::sun::star::linguistic2;
using namespace linguistic;
static uno::Reference< XLinguServiceManager > GetLngSvcMgr_Impl()
uno::Reference< XLinguServiceManager > xRes;
uno::Reference< XMultiServiceFactory > xMgr = getProcessServiceFactory();
if (
xRes = uno::Reference< XLinguServiceManager > ( xMgr->createInstance(
"" ) ) ), UNO_QUERY ) ;
return xRes;
Thesaurus::Thesaurus() :
aEvtListeners ( GetLinguMutex() )
bDisposing = sal_False;
pPropHelper = NULL;
aThes = NULL;
aCharSetInfo = NULL;
aTEncs = NULL;
aTLocs = NULL;
aTNames = NULL;
numthes = 0;
if (aThes)
for (int i = 0; i < numthes; i++)
if (aThes[i]) delete aThes[i];
aThes[i] = NULL;
delete[] aThes;
aThes = NULL;
if (aCharSetInfo)
for (int i = 0; i < numthes; i++)
if (aCharSetInfo[i]) delete aCharSetInfo[i];
aCharSetInfo[i] = NULL;
delete[] aCharSetInfo;
aCharSetInfo = NULL;
numthes = 0;
if (aTEncs) delete[] aTEncs;
aTEncs = NULL;
if (aTLocs) delete[] aTLocs;
aTLocs = NULL;
if (aTNames) delete[] aTNames;
aTNames = NULL;
if (pPropHelper)
delete pPropHelper;
PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl()
if (!pPropHelper)
Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY );
pPropHelper = new PropertyHelper_Thesaurus( (XThesaurus *) this, xPropSet );
pPropHelper->AddAsPropListener(); //! after a reference is established
return *pPropHelper;
Sequence< Locale > SAL_CALL Thesaurus::getLocales()
MutexGuard aGuard( GetLinguMutex() );
// this routine should return the locales supported by the installed
// dictionaries.
if (!numthes)
SvtLinguConfig aLinguCfg;
// get list of dictionaries-to-use
std::list< SvtLinguConfigDictionaryEntry > aDics;
uno::Sequence< rtl::OUString > aFormatList;
aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Thesauri"),
A2OU(""), aFormatList );
sal_Int32 nLen = aFormatList.getLength();
for (sal_Int32 i = 0; i < nLen; ++i)
std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
//!! for compatibility with old dictionaries (the ones not using extensions
//!! or new configuration entries, but still using the dictionary.lst file)
//!! Get the list of old style spell checking dictionaries to use...
std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
GetOldStyleDics( "THES" ) );
// to prefer dictionaries with configuration entries we will only
// use those old style dictionaries that add a language that
// is not yet supported by the list od new style dictionaries
MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
numthes = aDics.size();
if (numthes)
// get supported locales from the dictionaries-to-use...
sal_Int32 k = 0;
std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
sal_Int32 nLen2 = aLocaleNames.getLength();
for (k = 0; k < nLen2; ++k)
aLocaleNamesSet.insert( aLocaleNames[k] );
// ... and add them to the resulting sequence
aSuppLocales.realloc( aLocaleNamesSet.size() );
std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
k = 0;
for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
Locale aTmp( MsLangId::convertLanguageToLocale(
MsLangId::convertIsoStringToLanguage( *aItB )));
aSuppLocales[k++] = aTmp;
//! For each dictionary and each locale we need a separate entry.
//! If this results in more than one dictionary per locale than (for now)
//! it is undefined which dictionary gets used.
//! In the future the implementation should support using several dictionaries
//! for one locale.
numthes = 0;
for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
numthes = numthes + aDictIt->aLocaleNames.getLength();
// add dictionary information
aThes = new MyThes* [numthes];
aTEncs = new rtl_TextEncoding [numthes];
aTLocs = new Locale [numthes];
aTNames = new OUString [numthes];
aCharSetInfo = new CharClass* [numthes];
k = 0;
for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
if (aDictIt->aLocaleNames.getLength() > 0 &&
aDictIt->aLocations.getLength() > 0)
uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
sal_Int32 nLocales = aLocaleNames.getLength();
// currently only one language per dictionary is supported in the actual implementation...
// Thus here we work-around this by adding the same dictionary several times.
// Once for each of it's supported locales.
for (sal_Int32 i = 0; i < nLocales; ++i)
aThes[k] = NULL;
aTLocs[k] = MsLangId::convertLanguageToLocale(
MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] ));
aCharSetInfo[k] = new CharClass( aTLocs[k] );
// also both files have to be in the same directory and the
// file names must only differ in the extension (.aff/.dic).
// Thus we use the first location only and strip the extension part.
rtl::OUString aLocation = aDictIt->aLocations[0];
sal_Int32 nPos = aLocation.lastIndexOf( '.' );
aLocation = aLocation.copy( 0, nPos );
aTNames[k] = aLocation;
DBG_ASSERT( k == numthes, "index mismatch?" );
/* no dictionary found so register no dictionaries */
numthes = 0;
aThes = NULL;
aTEncs = NULL;
aTLocs = NULL;
aTNames = NULL;
aCharSetInfo = NULL;
return aSuppLocales;
sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
MutexGuard aGuard( GetLinguMutex() );
sal_Bool bRes = sal_False;
if (!aSuppLocales.getLength())
sal_Int32 nLen = aSuppLocales.getLength();
for (sal_Int32 i = 0; i < nLen; ++i)
const Locale *pLocale = aSuppLocales.getConstArray();
if (rLocale == pLocale[i])
bRes = sal_True;
return bRes;
Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings(
const OUString& qTerm, const Locale& rLocale,
const PropertyValues& rProperties)
throw(IllegalArgumentException, RuntimeException)
MutexGuard aGuard( GetLinguMutex() );
uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
uno::Reference< XLinguServiceManager > xLngSvcMgr( GetLngSvcMgr_Impl() );
uno::Reference< XSpellChecker1 > xSpell;
OUString rTerm(qTerm);
OUString pTerm(qTerm);
sal_uInt16 ct = CAPTYPE_UNKNOWN;
sal_Int32 stem = 0;
sal_Int32 stem2 = 0;
sal_Int16 nLanguage = LocaleToLanguage( rLocale );
if (nLanguage == LANGUAGE_NONE || !rTerm.getLength())
return noMeanings;
if (!hasLocale( rLocale ))
throw( IllegalArgumentException() );
return noMeanings;
if (prevTerm == qTerm && prevLocale == nLanguage)
return prevMeanings;
mentry * pmean = NULL;
sal_Int32 nmean = 0;
PropertyHelper_Thesaurus &rHelper = GetPropHelper();
rHelper.SetTmpPropVals( rProperties );
MyThes * pTH = NULL;
CharClass * pCC = NULL;
// find the first thesaurus that matches the locale
for (int i =0; i < numthes; i++)
if (rLocale == aTLocs[i])
// open up and intialize this thesaurus if need be
if (!aThes[i])
OUString datpath = aTNames[i] + A2OU(".dat");
OUString idxpath = aTNames[i] + A2OU(".idx");
OUString ndat;
OUString nidx;
OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding()));
OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding()));
#if defined(WNT)
// workaround for Windows specifc problem that the
// path length in calls to 'fopen' is limted to somewhat
// about 120+ characters which will usually be exceed when
// using dictionaries as extensions.
aTmpidx = Win_GetShortPathName( nidx );
aTmpdat = Win_GetShortPathName( ndat );
aThes[i] = new MyThes(aTmpidx.getStr(),aTmpdat.getStr());
if (aThes[i])
aTEncs[i] = getTextEncodingFromCharset(aThes[i]->get_th_encoding());
pTH = aThes[i];
eEnc = aTEncs[i];
pCC = aCharSetInfo[i];
if (pTH)
// we don't want to work with a default text encoding since following incorrect
// results may occur only for specific text and thus may be hard to notice.
// Thus better always make a clean exit here if the text encoding is in question.
// Hopefully something not working at all will raise proper attention quickly. ;-)
DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
return noMeanings;
while (pTH)
// convert word to all lower case for searching
if (!stem)
ct = capitalType(rTerm, pCC);
OUString nTerm(makeLowerCase(rTerm, pCC));
OString aTmp( OU2ENC(nTerm, eEnc) );
nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
if (nmean)
aMeanings.realloc( nmean );
mentry * pe = pmean;
OUString codeTerm = qTerm;
Reference< XSpellAlternatives > xTmpRes2;
if (stem)
xTmpRes2 = xSpell->spell( A2OU("<?xml?><query type='analyze'><word>") +
pTerm + A2OU("</word></query>"), nLanguage, rProperties );
if (
Sequence<OUString>seq = xTmpRes2->getAlternatives();
if (seq.getLength() > 0)
codeTerm = seq[0];
stem2 = 1;
#if 0
OString o = OUStringToOString(codeTerm, RTL_TEXTENCODING_UTF8);
fprintf(stderr, "CODETERM: %s\n", o.pData->buffer);
for (int j = 0; j < nmean; j++)
int count = pe->count;
if (count)
Sequence< OUString > aStr( count );
OUString *pStr = aStr.getArray();
for (int i=0; i < count; i++)
OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc );
sal_Int32 catpos = sTerm.indexOf('(');
sal_Int32 catpos2 = 0;
OUString catst;
OUString catst2;
if (catpos > 2)
// remove category name for affixation and casing
catst = A2OU(" ") + sTerm.copy(catpos);
sTerm = sTerm.copy(0, catpos);
sTerm = sTerm.trim();
// generate synonyms with affixes
if (stem && stem2)
Reference< XSpellAlternatives > xTmpRes;
xTmpRes = xSpell->spell( A2OU("<?xml?><query type='generate'><word>") +
sTerm + A2OU("</word>") + codeTerm + A2OU("</query>"), nLanguage, rProperties );
if (
Sequence<OUString>seq = xTmpRes->getAlternatives();
if (seq.getLength() > 0)
sTerm = seq[0];
if (catpos2)
sTerm = catst2 + sTerm;
sal_uInt16 ct1 = capitalType(sTerm, pCC);
if (CAPTYPE_MIXED == ct1)
ct = ct1;
OUString cTerm;
switch (ct)
cTerm = makeUpperCase(sTerm, pCC);
cTerm = makeInitCap(sTerm, pCC);
cTerm = sTerm;
OUString aAlt( cTerm + catst);
pStr[i] = aAlt;
#if 0
Meaning * pMn = new Meaning(rTerm,nLanguage,rHelper);
Meaning * pMn = new Meaning(rTerm,nLanguage);
OUString dTerm(pe->defn,strlen(pe->defn),eEnc );
Reference<XMeaning>* pMeaning = aMeanings.getArray();
pMeaning[j] = pMn;
if (nmean)
prevTerm = qTerm;
prevMeanings = aMeanings;
prevLocale = nLanguage;
return aMeanings;
if (stem || !
return noMeanings;
stem = 1;
xSpell = uno::Reference< XSpellChecker1 >( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
if (! || !xSpell->isValid( A2OU(SPELLML_SUPPORT), nLanguage, rProperties ))
return noMeanings;
Reference< XSpellAlternatives > xTmpRes;
xTmpRes = xSpell->spell( A2OU("<?xml?><query type='stem'><word>") +
rTerm + A2OU("</word></query>"), nLanguage, rProperties );
if (
Sequence<OUString>seq = xTmpRes->getAlternatives();
#if 0
for (int i = 0; i < seq.getLength(); i++)
OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8);
fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
if (seq.getLength() > 0)
rTerm = seq[0]; // XXX Use only the first stem
// stem the last word of the synonym (for categories after affixation)
rTerm = rTerm.trim();
sal_Int32 pos = rTerm.lastIndexOf(' ');
if (!pos)
return noMeanings;
xTmpRes = xSpell->spell( A2OU("<?xml?><query type='stem'><word>") +
rTerm.copy(pos + 1) + A2OU("</word></query>"), nLanguage, rProperties );
if (
Sequence<OUString>seq = xTmpRes->getAlternatives();
if (seq.getLength() > 0)
pTerm = rTerm.copy(pos + 1);
rTerm = rTerm.copy(0, pos + 1) + seq[0];
#if 0
for (int i = 0; i < seq.getLength(); i++)
OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8);
fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
return noMeanings;
Reference< XInterface > SAL_CALL Thesaurus_CreateInstance(
const Reference< XMultiServiceFactory > & /*rSMgr*/ )
Reference< XInterface > xService = (cppu::OWeakObject*) new Thesaurus;
return xService;
OUString SAL_CALL Thesaurus::getServiceDisplayName( const Locale& /*rLocale*/ )
MutexGuard aGuard( GetLinguMutex() );
return A2OU( "New Thesaurus" );
void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments )
throw(Exception, RuntimeException)
MutexGuard aGuard( GetLinguMutex() );
if (!pPropHelper)
sal_Int32 nLen = rArguments.getLength();
if (1 == nLen)
Reference< XPropertySet > xPropSet;
rArguments.getConstArray()[0] >>= xPropSet;
//! Pointer allows for access of the non-UNO functions.
//! And the reference to the UNO-functions while increasing
//! the ref-count and will implicitly free the memory
//! when the object is not longer used.
pPropHelper = new PropertyHelper_Thesaurus( (XThesaurus *) this, xPropSet );
pPropHelper->AddAsPropListener(); //! after a reference is established
DBG_ERROR( "wrong number of arguments in sequence" );
sal_uInt16 SAL_CALL Thesaurus::capitalType(const OUString& aTerm, CharClass * pCC)
sal_Int32 tlen = aTerm.getLength();
if ((pCC) && (tlen))
String aStr(aTerm);
sal_Int32 nc = 0;
for (sal_uInt16 tindex = 0; tindex < tlen; tindex++)
if (pCC->getCharacterType(aStr,tindex) &
::com::sun::star::i18n::KCharacterType::UPPER) nc++;
if (nc == 0)
return (sal_uInt16) CAPTYPE_NOCAP;
if (nc == tlen)
return (sal_uInt16) CAPTYPE_ALLCAP;
if ((nc == 1) && (pCC->getCharacterType(aStr,0) &
return (sal_uInt16) CAPTYPE_INITCAP;
return (sal_uInt16) CAPTYPE_MIXED;
return (sal_uInt16) CAPTYPE_UNKNOWN;
OUString SAL_CALL Thesaurus::makeLowerCase(const OUString& aTerm, CharClass * pCC)
if (pCC)
return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
return aTerm;
OUString SAL_CALL Thesaurus::makeUpperCase(const OUString& aTerm, CharClass * pCC)
if (pCC)
return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
return aTerm;
OUString SAL_CALL Thesaurus::makeInitCap(const OUString& aTerm, CharClass * pCC)
sal_Int32 tlen = aTerm.getLength();
if ((pCC) && (tlen))
OUString bTemp = aTerm.copy(0,1);
if (tlen > 1)
return ( pCC->toUpper_rtl(bTemp, 0, 1)
+ pCC->toLower_rtl(aTerm,1,(tlen-1)) );
return pCC->toUpper_rtl(bTemp, 0, 1);
return aTerm;
void SAL_CALL Thesaurus::dispose()
MutexGuard aGuard( GetLinguMutex() );
if (!bDisposing)
bDisposing = sal_True;
EventObject aEvtObj( (XThesaurus *) this );
aEvtListeners.disposeAndClear( aEvtObj );
void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener )
MutexGuard aGuard( GetLinguMutex() );
if (!bDisposing &&
aEvtListeners.addInterface( rxListener );
void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener )
MutexGuard aGuard( GetLinguMutex() );
if (!bDisposing &&
aEvtListeners.removeInterface( rxListener );
// Service specific part
OUString SAL_CALL Thesaurus::getImplementationName()
MutexGuard aGuard( GetLinguMutex() );
return getImplementationName_Static();
sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName )
MutexGuard aGuard( GetLinguMutex() );
Sequence< OUString > aSNL = getSupportedServiceNames();
const OUString * pArray = aSNL.getConstArray();
for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
if( pArray[i] == ServiceName )
return sal_True;
return sal_False;
Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames()
MutexGuard aGuard( GetLinguMutex() );
return getSupportedServiceNames_Static();
Sequence< OUString > Thesaurus::getSupportedServiceNames_Static()
MutexGuard aGuard( GetLinguMutex() );
Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
aSNS.getArray()[0] = A2OU( SN_THESAURUS );
return aSNS;
void * SAL_CALL Thesaurus_getFactory( const sal_Char * pImplName,
XMultiServiceFactory * pServiceManager, void * )
void * pRet = 0;
if ( !Thesaurus::getImplementationName_Static().compareToAscii( pImplName ) )
Reference< XSingleServiceFactory > xFactory =
// acquire, because we return an interface pointer instead of a reference
pRet = xFactory.get();
return pRet;