blob: 238cdb5a6c4cc1536031d8b0f5c9888dea41ebcb [file] [log] [blame]
/** \file internal_capability_container.cpp .
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
Description:
-----------------------------------------------------------------------------
02/05/2003 Initial creation
-------------------------------------------------------------------------- */
//#define DEBUG_VERBOSE
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include "uima/pragmas.hpp"
using namespace std;
#include "uima/macros.h"
#include "uima/internal_capability_container.hpp"
#include "uima/err_ids.h"
#include "uima/msg.h"
namespace uima {
UIMA_EXC_CLASSIMPLEMENT(CapabilityException, Exception);
namespace internal {
CapabilityContainer::CapabilityContainer( )
: iv_psetInputTOFsForUnspecifiedLang(NULL),
iv_psetOutputTOFsForUnspecifiedLang(NULL),
iv_bHasEmptyOutputTOFsForUnspecifiedLang(false) {}
/*static*/
void
CapabilityContainer::initTypeOrFeatures(
TyMapLang2TypeOrFeatures & rMap, // output argument
Capability::TyVecCapabilityTofs const & vecTofs,
Capability::TyVecCapabilityLanguages const & vecLangs,
TypeSystem const & typeSystem) {
const int BUF_SIZE = 0x100;
char cBuf[BUF_SIZE];
Capability::TyVecCapabilityLanguages::const_iterator itLang;
for (itLang = vecLangs.begin(); itLang != vecLangs.end(); ++itLang) {
// convert unicode lang string to single-byte lang string
assert((*itLang).length() < BUF_SIZE);
// Note: this conversion can be used only for invariant characters
u_UCharsToChars((*itLang).getBuffer(), cBuf, (*itLang).length());
// zero terminate single-byte buffer
cBuf[(*itLang).length()] = '\0';
// Special workaround code for the new way to specify the unspecified language
if (strcasecmp(cBuf, "x-unspecified") == 0) {
strcpy(cBuf, Language::UNSPECIFIED);
}
// create a language object based on single-byte lang string
Language lang(cBuf);
if (!lang.isValid()) {
/* taph 06.02.2003: once we have more detailed information about
the origin of the error we need to replace "unknown configuration file"
with the filename of the XML file (and maybe line number?) */
UIMA_EXC_THROW_NEW(uima::CapabilityException, // exc-type
UIMA_ERR_ENGINE_LANGUAGE_INVALID, // error code
ErrorMessage(UIMA_MSG_ID_EXC_INVALID_LANGUAGE, cBuf), // error message
ErrorMessage(UIMA_MSG_ID_EXCON_CHECKING_CAPABILITY_SPEC,"unknown configuration file"), // error context message
ErrorInfo::recoverable);
}
// create a new empty vector in the map for this lang
TySetTypeOrFeatures & setTofs = rMap[lang];
// now fill the vector with tof objects created from tof strings
Capability::TyVecCapabilityTofs::const_iterator itTof;
for (itTof = vecTofs.begin(); itTof != vecTofs.end(); ++itTof) {
TypeOrFeature tof;
// the tof string may be a type...
Type t = typeSystem.getType(*itTof);
if (t.isValid()) {
tof = TypeOrFeature(t);
} else {
// or the tof string may be a feature
Feature f = typeSystem.getFeatureByFullName(*itTof);
if (f.isValid()) {
tof = TypeOrFeature(f);
} else {
/* taph 06.02.2003: once we have more detailed information about
the origin of the error we need to replace "unknown configuration file"
with the filename of the XML file (and maybe line number?) */
if (tof.isType()) {
UIMA_EXC_THROW_NEW(uima::CapabilityException, // exc-type
UIMA_ERR_INVALID_FSTYPE_OBJECT, // error code
ErrorMessage(UIMA_MSG_ID_EXC_UNKNOWN_TYPE_NAME, *itTof), // error message
ErrorMessage(UIMA_MSG_ID_EXCON_CHECKING_CAPABILITY_SPEC, "unknown configuration file"), // error context message
ErrorInfo::recoverable);
} else {
UIMA_EXC_THROW_NEW(uima::CapabilityException, // exc-type
UIMA_ERR_INVALID_FSFEATURE_OBJECT,// error code
ErrorMessage(UIMA_MSG_ID_EXC_UNKNOWN_FEATURE_NAME, *itTof), // error message
ErrorMessage(UIMA_MSG_ID_EXCON_CHECKING_CAPABILITY_SPEC, "unknown configuration file"), // error context message
ErrorInfo::recoverable);
}
}
}
assert(tof.isValid());
setTofs.insert(tof);
} // for all tof strings
} // for all lang strings
}
void CapabilityContainer::computeClosure(TyMapLang2TypeOrFeatures & rMap, TyMapLang2TypeOrFeatures & noTerritoryMap) {
assert( noTerritoryMap.empty() );
// 1. all original TOFS from iv_mapLang2*TypesOrFeatures for language codes without territory
// are copied as iv_mapLang2*TypesOrFeaturesNoTerritory
TyMapLang2TypeOrFeatures::const_iterator cit;
for (cit = rMap.begin(); cit != rMap.end(); ++cit) {
Language const & lang = (*cit).first;
if ( !lang.hasTerritory() ) {
noTerritoryMap[lang] = (*cit).second;
}
}
// 2. all TOFs from en are appended to en_US, en_GB etc. for iv_mapLang2*TypesOrFeatures
TyMapLang2TypeOrFeatures::iterator it;
for (it = rMap.begin(); it != rMap.end(); ++it) {
Language const & lang = (*it).first;
if ( lang.hasTerritory() ) {
// put all TOFs "down" from pt to pt_BR
Language langWithoutTerritory(lang.getLanguage());
TySetTypeOrFeatures & rSet = rMap[langWithoutTerritory];
TySetTypeOrFeatures & specificSet = (*it).second;
// MSV6 compile error: specificSet.insert( rSet.begin(), rSet.end() );
TySetTypeOrFeatures::const_iterator setit;
for (setit = rSet.begin(); setit != rSet.end(); ++setit) {
specificSet.insert( *setit );
}
}
}
// 3. all TOFs from en_GB, en_US, etc. are appended to en for iv_mapLang2*TypesOrFeatures
for (it = rMap.begin(); it != rMap.end(); ++it) {
Language const & lang = (*it).first;
if ( lang.hasTerritory() ) {
// put all TOFs "up" from pt_BR to pt
Language langWithoutTerritory(lang.getLanguage());
TySetTypeOrFeatures & rSet = (*it).second;
// MSV6 compile error: rMap[langWithoutTerritory].insert( rSet.begin(), rSet.end() );
TySetTypeOrFeatures::const_iterator setit;
for (setit = rSet.begin(); setit != rSet.end(); ++setit) {
rMap[langWithoutTerritory].insert(*setit);
}
}
}
#ifdef DEBUG_VERBOSE
UIMA_TPRINT("Standard Map:");
for (cit = rMap.begin(); cit != rMap.end(); ++cit) {
UIMA_TPRINT("Language: " << (*cit).first);
UIMA_TPRINT(" TOFs:");
TySetTypeOrFeatures const & set = (*cit).second;
TySetTypeOrFeatures::const_iterator setcit;
for (setcit = set.begin(); setcit != set.end(); ++setcit) {
UIMA_TPRINT(" " << (*setcit).getName());
}
}
#endif
}
void
CapabilityContainer::init( vector<Capability*> const & vecCap, TypeSystem const & typeSystem, EnMatchPolicy enMatchPolicy ) {
iv_enMatchPolicy = enMatchPolicy;
vector<Capability*>::const_iterator it;
assert(iv_mapLang2InputTypesOrFeatures.size() == 0);
assert(iv_mapLang2OutputTypesOrFeatures.size() == 0);
for (it = vecCap.begin(); it != vecCap.end(); ++it) {
assert(EXISTS(*it));
Capability const & cap = (*(*it));
initTypeOrFeatures(
iv_mapLang2InputTypesOrFeatures,
cap.getCapabilityTypes(Capability::INPUT),
cap.getSupportedLanguages(),
typeSystem);
initTypeOrFeatures(
iv_mapLang2InputTypesOrFeatures,
cap.getCapabilityFeatures(Capability::INPUT),
cap.getSupportedLanguages(),
typeSystem);
initTypeOrFeatures(
iv_mapLang2OutputTypesOrFeatures,
cap.getCapabilityTypes(Capability::OUTPUT),
cap.getSupportedLanguages(),
typeSystem);
initTypeOrFeatures(
iv_mapLang2OutputTypesOrFeatures,
cap.getCapabilityFeatures(Capability::OUTPUT),
cap.getSupportedLanguages(),
typeSystem);
}
// try to find input tofs for unspecifed language
assert(iv_psetInputTOFsForUnspecifiedLang == NULL);
TySetTypeOrFeatures const * pSetInputTOFs =
findInputTOFs(Language(Language::UNSPECIFIED));
if (pSetInputTOFs != NULL) {
iv_psetInputTOFsForUnspecifiedLang = pSetInputTOFs;
}
// try to find output tofs for unspecifed language
assert(iv_psetOutputTOFsForUnspecifiedLang == NULL);
TySetTypeOrFeatures const * pSetOutputTOFs =
findOutputTOFs(Language(Language::UNSPECIFIED));
if (pSetOutputTOFs != NULL) {
iv_psetOutputTOFsForUnspecifiedLang = pSetOutputTOFs;
}
copyEntriesForUnspecifedLanguage();
if (hasEmptyOutputTypeOrFeatures(Language(Language::UNSPECIFIED))) {
iv_bHasEmptyOutputTOFsForUnspecifiedLang = true;
}
computeClosure(iv_mapLang2OutputTypesOrFeatures, iv_mapLang2OutputTypesOrFeaturesNoTerritory );
computeClosure(iv_mapLang2InputTypesOrFeatures, iv_mapLang2InputTypesOrFeaturesNoTerritory );
}
void CapabilityContainer::copyEntriesForUnspecifedLanguage() {
if (iv_psetInputTOFsForUnspecifiedLang == NULL) {
// if it is not in the input it can't be in the output either
assert(findOutputTOFs(Language(Language::UNSPECIFIED)) == NULL);
return;
}
assert(EXISTS(iv_psetInputTOFsForUnspecifiedLang));
if (iv_psetInputTOFsForUnspecifiedLang->size() != 0) {
TyMapLang2TypeOrFeatures::iterator it;
for (it = iv_mapLang2InputTypesOrFeatures.begin(); it != iv_mapLang2InputTypesOrFeatures.end(); ++it) {
if (&(*it).second != iv_psetInputTOFsForUnspecifiedLang) { // don't append onto ourselves
if ((*it).second.size() > 0) { // don't append to empty
#ifndef NDEBUG
size_t uiOldSize = (*it).second.size();
#endif /* NDEBUG */
// MSV6 compile error: (*it).second.insert(iv_psetInputTOFsForUnspecifiedLang->begin(), iv_psetInputTOFsForUnspecifiedLang->end());
TySetTypeOrFeatures::const_iterator tofit;
for (tofit = iv_psetInputTOFsForUnspecifiedLang->begin(); tofit != iv_psetInputTOFsForUnspecifiedLang->end(); ++tofit) {
(*it).second.insert( *tofit );
}
assert((*it).second.size() >= uiOldSize);
}
}
}
// Now do the same copying for the output
// if it is in the input it must be in the output too
assert(findOutputTOFs(Language(Language::UNSPECIFIED)) != NULL);
assert(EXISTS(iv_psetOutputTOFsForUnspecifiedLang));
for (it = iv_mapLang2OutputTypesOrFeatures.begin(); it != iv_mapLang2OutputTypesOrFeatures.end(); ++it) {
if (&(*it).second != iv_psetOutputTOFsForUnspecifiedLang) { // don't append onto ourselves
if ((*it).second.size() > 0) { // don't append to empty
#ifndef NDEBUG
size_t uiOldSize = (*it).second.size();
#endif /* NDEBUG */
// MSV6 compile error: (*it).second.insert(iv_psetOutputTOFsForUnspecifiedLang->begin(), iv_psetOutputTOFsForUnspecifiedLang->end());
TySetTypeOrFeatures::const_iterator tofit;
for (tofit = iv_psetOutputTOFsForUnspecifiedLang->begin(); tofit != iv_psetOutputTOFsForUnspecifiedLang->end(); ++tofit) {
(*it).second.insert(*tofit);
}
assert((*it).second.size() >= uiOldSize);
}
}
}
}
}
/*static*/ CapabilityContainer::TySetTypeOrFeatures const *
CapabilityContainer::findTOFs(TyMapLang2TypeOrFeatures const & crMap,
TyMapLang2TypeOrFeatures const & noTerritoryMap,
Language const & lang, EnMatchPolicy enMatchPolicy) {
// temporary
assert( enMatchPolicy == enStrictThenLooseMatch );
//first try strict match using built-in find function
TyMapLang2TypeOrFeatures::const_iterator itMap = crMap.find(lang);
if (itMap != crMap.end()) {
return (&(*itMap).second);
}
if (enMatchPolicy == enStrictMatchOnly) {
return NULL;
}
assert(enMatchPolicy == enStrictThenLooseMatch);
// language was not found directly
// now try without territory
Language newLang(lang.getLanguage());
assert( !newLang.hasTerritory() );
for (itMap = noTerritoryMap.begin(); itMap != noTerritoryMap.end(); ++itMap) {
assert( !(*itMap).first.hasTerritory() );
if ((*itMap).first.getLanguage() == lang.getLanguage()) {
return (&(*itMap).second);
}
}
return NULL;
}
CapabilityContainer::TySetTypeOrFeatures const *
CapabilityContainer::findInputTOFs(Language const & lang) const {
// try to find the set for the language
TySetTypeOrFeatures const * pSetTOFs =
findTOFs(iv_mapLang2InputTypesOrFeatures, iv_mapLang2InputTypesOrFeaturesNoTerritory, lang, iv_enMatchPolicy);
// if there is one return that one
if (pSetTOFs != NULL) {
return pSetTOFs;
}
// else return whatever we may have for the unspecified language
return (iv_psetInputTOFsForUnspecifiedLang);
}
CapabilityContainer::TySetTypeOrFeatures const *
CapabilityContainer::findOutputTOFs(Language const & lang) const {
// try to find the set for the language
TySetTypeOrFeatures const * pSetTOFs =
findTOFs(iv_mapLang2OutputTypesOrFeatures, iv_mapLang2OutputTypesOrFeaturesNoTerritory, lang, iv_enMatchPolicy);
// if there is one return that one
if (pSetTOFs != NULL) {
return pSetTOFs;
}
// else return whatever we may have for the unspecified language
return (iv_psetOutputTOFsForUnspecifiedLang);
}
CapabilityContainer::TySetTypeOrFeatures const &
CapabilityContainer::getInputTypeOrFeatures( Language const & lang) const {
// try to find it
TySetTypeOrFeatures const * pSetTOFs = findInputTOFs(lang);
if (pSetTOFs != NULL) {
return (*pSetTOFs);
}
// don't use the [] operator because would insert and return a
// newly created empty vector
assert(iv_setEmptyTypeOrFeaturesSet.size() == 0);
return iv_setEmptyTypeOrFeaturesSet;
}
bool
CapabilityContainer::hasInputTypeOrFeature( TypeOrFeature const & tof, Language const & lang) const {
// try to find it
TySetTypeOrFeatures const * pSetTOFs = findInputTOFs(lang);
if (pSetTOFs == NULL) {
return false;
}
TySetTypeOrFeatures::const_iterator itSet = pSetTOFs->find(tof);
if (itSet == pSetTOFs->end()) {
return false;
}
return true;
}
CapabilityContainer::TySetTypeOrFeatures const &
CapabilityContainer::getOutputTypeOrFeatures( Language const & lang) const {
// try to find it
TySetTypeOrFeatures const * pSetTOFs = findOutputTOFs(lang);
if (pSetTOFs != NULL) {
return (*pSetTOFs);
}
// don't use the [] operator because will insert and return a
// newly created empty vector
assert(iv_setEmptyTypeOrFeaturesSet.size() == 0);
return iv_setEmptyTypeOrFeaturesSet;
}
bool
CapabilityContainer::hasOutputTypeOrFeature( TypeOrFeature const & tof, Language const & lang) const {
// try to find it
TySetTypeOrFeatures const * pSetTOFs = findOutputTOFs(lang);
if (pSetTOFs == NULL) {
return false;
}
TySetTypeOrFeatures::const_iterator itSet = pSetTOFs->find(tof);
if (itSet == pSetTOFs->end()) {
return false;
}
return true;
}
bool
CapabilityContainer::hasEmptyOutputTypeOrFeatures( Language const & lang) const {
// try to find it in output
TySetTypeOrFeatures const * pSetTOFs = findOutputTOFs(lang);
if (pSetTOFs == NULL) {
// we must have an explicit entry for that language in our map
// or maybe we have an empty entry for the unspecifed lang
return iv_bHasEmptyOutputTOFsForUnspecifiedLang;
}
// but that explicit entry must be empty
if (pSetTOFs->size() > 0) {
return false;
}
return true;
}
} // namespace internal
} // namespace uima