src/test/src/test_typepriority.cpp - uima-uimacpp - Git at Google

 /** \file test_typepriority.cpp .

  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.

 -------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------- */
 /*       Include dependencies                                              */
 /* ----------------------------------------------------------------------- */
 #include "uima/engine.hpp"
 #include "uima/typesystem.hpp"
 #include "uima/tt_types.hpp"
 #include "uima/cas.hpp"
 #include "uima/fsindexrepository.hpp"
 #include "uima/resmgr.hpp"

 /* ----------------------------------------------------------------------- */
 /*       Constants                                                         */
 /* ----------------------------------------------------------------------- */
 #ifndef NDEBUG
 #define ASSERT_OR_THROWEXCEPTION(x) assert(x)
 #else
 #define ASSERT_OR_THROWEXCEPTION(x) if (!(x)) { cerr << __FILE__ << ": Error in line " << __LINE__ << endl; exit(1); }
 #endif

 #define LOG(x) cout << __FILE__ << __LINE__ << ": " << x << endl

 using namespace uima;
 using namespace std;
 /* ----------------------------------------------------------------------- */
 /*       Forward declarations                                              */
 /* ----------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------- */
 /*       Types / Classes                                                   */
 /* ----------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------- */
 /*       Implementation                                                    */
 /* ----------------------------------------------------------------------- */
 int findLastPosition(vector<uima::Type> const & types, Type t) {
   int result = -1;
   int i;
   for (i=0; i<types.size(); ++i) {
     if (t.subsumes( types[i] ) ) {
       result = i;
     }
   }
   return result;
 }


 void checkPriority(vector<uima::Type> const & types, Type t1, Type t2) {
   LOG("Checking priority between " << t1.getName() << " and " <<  t2.getName() );
   int iPos1 = findLastPosition(types, t1);
   ASSERT_OR_THROWEXCEPTION( iPos1 >= 0);
   int iPos2 = findLastPosition(types, t2);
   ASSERT_OR_THROWEXCEPTION( iPos2 >= 0);
   LOG("Last position of annotation of type " << t1.getName() << ": " << iPos1);
   LOG("Last position of annotation of type " << t2.getName() << ": " << iPos2);

   ASSERT_OR_THROWEXCEPTION( iPos1 < iPos2 );
 }

 void checkIndex(CAS const & rTCAS) {
   uima::Type annType = rTCAS.getTypeSystem().getType(uima::CAS::TYPE_NAME_ANNOTATION);
   uima::Type tokType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_TOKEN_ANNOTATION);
   uima::Type sentType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_SENTENCE_ANNOTATION);
   uima::Type parType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_PARAGRAPH_ANNOTATION);
 //   uima::Type docType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_DOCUMENT_ANNOTATION);
   uima::Type placeType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_PLACE_NAME_ANNOTATION);
   uima::Type lexType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_LEXICAL_ANNOTATION);
   uima::Type docStructType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_DOC_STRUCTURE_ANNOTATION);
   uima::Type nameType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_NAME_ANNOTATION);
   uima::Type mulTokType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_MULTI_TOKEN_ANNOTATION);
   FSIndex ix = rTCAS.getIndexRepository().getIndex("TestIndex", annType);
   FSIterator it = ix.iterator();

   size_t i=0;
   vector<uima::Type> types;
   for (it.moveToFirst(); it.isValid(); it.moveToNext() ) {
     AnnotationFS annFS = (AnnotationFS) it.get();
     LOG("Annotation " << annFS.getType().getName());
     if (annFS.getBeginPosition() == 0) {
       types.push_back( it.get().getType() );
       LOG("Type " << i << ": " << types[i].getName() );
       ++i;
     }
   }

   /*
   Normally longer annots come first when starting at same position.
   Override this for token (lextype) over sent & para (docstructtype)
   */
   checkPriority( types, lexType, docStructType);

   // Override again for sentence over paragraph
   checkPriority( types, sentType, parType );

 }

 /* ----------------------------------------------------------------------- */
 /*       Main routine                                                      */
 /* ----------------------------------------------------------------------- */

 int main(int argc, char * argv[]) /*
 ---------------------------------- */
 {
   LOG("UIMATEST_TYPE_PRIORITY started");
   int iRetVal = 0;
   try {
     char const * config =
       "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
       "<taeDescription"
       "   xmlns=\"http://uima.apache.org/resourceSpecifier\""
       "   xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
       "   xsi:schemaLocation=\"http://uima.apache.org/resourceSpecifier http://uima.apache.org/resourceSpecifierSchema.xsd\""
       "   >"
       "    <frameworkImplementation>UIMA</frameworkImplementation>"
       "    <primitive>false</primitive>"
       "    <delegateAnalysisEngineSpecifiers>"
       "        <delegateAnalysisEngine key=\"frost\">"
   //ee???      "            <includeFile>tok.xml</includeFile>"
       "            <import location=\"tok.xml\" />"
       "        </delegateAnalysisEngine>"
       /* ee - reducing number of annotators used for testing
             "        <delegateAnalysisEngine key=\"doxtract\">"
             "            <includeFile>doxtract.xml</includeFile>"
             "        </delegateAnalysisEngine>"
       */
       "        <delegateAnalysisEngine key=\"dump\">"
   //ee      "            <includeFile>dump.xml</includeFile>"
       "            <import location=\"dump.xml\" />"
       "        </delegateAnalysisEngine>"
       "    </delegateAnalysisEngineSpecifiers>"
       "    <analysisEngineMetaData>"
       "        <name>UIMAAnnotator</name>"
       "        <description></description>"
       "        <version>1.0</version>"
       "        <vendor>IBM Corporation</vendor>"
       "        <flowConstraints>"
       "            <fixedFlow>"
       "                <node>frost</node>"
   //ee      "                <node>doxtract</node>"
       "                <node>dump</node>"
       "            </fixedFlow>"
       "        </flowConstraints>"
       "        <typePriorities>"

       "         <priorityList>"
       "             <type>uima.tt.LexicalAnnotation</type>"
       "             <type>uima.tt.DocStructureAnnotation</type>"
       "         </priorityList>"

       "         <priorityList>"
       "             <type>uima.tt.SentenceAnnotation</type> "
       "             <type>uima.tt.ParagraphAnnotation</type>"
       "         </priorityList>"
       /*
             "         <priorityList>"
             "             <type>uima.tt.MultiTokenAnnotation</type>"
             "             <type>uima.tt.TokenAnnotation</type>"
             "         </priorityList>"
             "            <priorityList>"
             "                <type>uima.tt.TokenAnnotation</type>"
             "                <type>uima.tt.SentenceAnnotation</type>"
             "                <type>uima.tt.ParagraphAnnotation</type>"
             "            </priorityList>"
             "            <priorityList>"
             "                <type>uima.tt.TokenAnnotation</type>"
             "                <type>uima.tcas.DocumentAnnotation</type>"
             "                <type>uima.tt.ParagraphAnnotation</type>"
             "            </priorityList>"
             "            <priorityList>"
             "                <type>uima.tt.TokenAnnotation</type>"
             "                <type>uima.tt.PlaceName</type>"
             "            </priorityList>"
       */
       "        </typePriorities>"
       "        <fsIndexCollection>"
       "          <imports> "
       "            <import location=\"tt_indexes.xml\"/>"
       "           </imports> "
       "           <fsIndexes> "
       "            <fsIndexDescription>"
       "                <label>TestIndex</label>"
       "                <typeName>uima.tcas.Annotation</typeName>"
       "                <kind>sorted</kind>"
       "                <keys>"
       "                    <fsIndexKey>"
       "                        <featureName>begin</featureName>"
       "                        <comparator>standard</comparator>"
       "                    </fsIndexKey>"
       "                    <fsIndexKey>"
       "                        <typePriority></typePriority>"
       "                    </fsIndexKey>"
       "                </keys>"
       "            </fsIndexDescription>"
       "        </fsIndexes> "
       "        </fsIndexCollection> "
       "        <capabilities>"
       "            <capability>"
       "                <inputs></inputs>"
       "                <outputs>"
       "                    <type>uima.tt.TokenAnnotation</type>"
       "                    <type>uima.tt.SentenceAnnotation</type>"
       "                    <type>uima.tt.ParagraphAnnotation</type>"
       "                    <type>uima.tt.NameAnnotation</type>"
       "                </outputs>"
       "                <languagesSupported>"
       "                    <language>en</language>"
       "                </languagesSupported>"
       "            </capability>"
       "        </capabilities>"
       "    </analysisEngineMetaData>"
       "</taeDescription>";

     icu::UnicodeString configUS(config);
     UChar const * buf = configUS.getBuffer();
     size_t len = configUS.length();

     uima::ResourceManager::createInstance("test");

     ErrorInfo errInfo;
     uima::TextAnalysisEngine * pEngine = TextAnalysisEngine::createTextAnalysisEngine(buf, len, errInfo);
     if (pEngine == NULL ) {
       LOG("Error: " << errInfo.asString());
       ASSERT_OR_THROWEXCEPTION(false);
     }
     ASSERT_OR_THROWEXCEPTION(EXISTS(pEngine));
     ASSERT_OR_THROWEXCEPTION( errInfo.getErrorId() == UIMA_ERR_NONE );

     CAS * tcas = pEngine->newCAS();
     ASSERT_OR_THROWEXCEPTION( EXISTS(tcas) );

     icu::UnicodeString us("London calling. The Clash.");
     tcas->setDocumentText(us.getBuffer(), us.length());
     tcas->getDocumentAnnotation().setLanguage("en");
     TyErrorId err = pEngine->process(*tcas);
     ASSERT_OR_THROWEXCEPTION( err == UIMA_ERR_NONE );

     checkIndex( *tcas );

     err = pEngine->destroy();
     ASSERT_OR_THROWEXCEPTION( err == UIMA_ERR_NONE );

     delete tcas;
     delete pEngine;
     LOG("UIMATEST_TYPE_PRIORITY finished");
   } catch (Exception & exc) {
     cerr << exc.asString() << endl;
     iRetVal = 1;
   } catch (...) {
     cerr << "Unexpected exception " << endl;
     iRetVal = 1;
   }
   return iRetVal;
 }

 /* <EOF> */
	/** \file test_typepriority.cpp .

	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.

	-------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------- */
	/* Include dependencies */
	/* ----------------------------------------------------------------------- */
	#include "uima/engine.hpp"
	#include "uima/typesystem.hpp"
	#include "uima/tt_types.hpp"
	#include "uima/cas.hpp"
	#include "uima/fsindexrepository.hpp"
	#include "uima/resmgr.hpp"

	/* ----------------------------------------------------------------------- */
	/* Constants */
	/* ----------------------------------------------------------------------- */
	#ifndef NDEBUG
	#define ASSERT_OR_THROWEXCEPTION(x) assert(x)
	#else
	#define ASSERT_OR_THROWEXCEPTION(x) if (!(x)) { cerr << __FILE__ << ": Error in line " << __LINE__ << endl; exit(1); }
	#endif

	#define LOG(x) cout << __FILE__ << __LINE__ << ": " << x << endl

	using namespace uima;
	using namespace std;
	/* ----------------------------------------------------------------------- */
	/* Forward declarations */
	/* ----------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------- */
	/* Types / Classes */
	/* ----------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------- */
	/* Implementation */
	/* ----------------------------------------------------------------------- */
	int findLastPosition(vector<uima::Type> const & types, Type t) {
	int result = -1;
	int i;
	for (i=0; i<types.size(); ++i) {
	if (t.subsumes( types[i] ) ) {
	result = i;
	}
	}
	return result;
	}


	void checkPriority(vector<uima::Type> const & types, Type t1, Type t2) {
	LOG("Checking priority between " << t1.getName() << " and " << t2.getName() );
	int iPos1 = findLastPosition(types, t1);
	ASSERT_OR_THROWEXCEPTION( iPos1 >= 0);
	int iPos2 = findLastPosition(types, t2);
	ASSERT_OR_THROWEXCEPTION( iPos2 >= 0);
	LOG("Last position of annotation of type " << t1.getName() << ": " << iPos1);
	LOG("Last position of annotation of type " << t2.getName() << ": " << iPos2);

	ASSERT_OR_THROWEXCEPTION( iPos1 < iPos2 );
	}

	void checkIndex(CAS const & rTCAS) {
	uima::Type annType = rTCAS.getTypeSystem().getType(uima::CAS::TYPE_NAME_ANNOTATION);
	uima::Type tokType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_TOKEN_ANNOTATION);
	uima::Type sentType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_SENTENCE_ANNOTATION);
	uima::Type parType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_PARAGRAPH_ANNOTATION);
	// uima::Type docType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_DOCUMENT_ANNOTATION);
	uima::Type placeType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_PLACE_NAME_ANNOTATION);
	uima::Type lexType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_LEXICAL_ANNOTATION);
	uima::Type docStructType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_DOC_STRUCTURE_ANNOTATION);
	uima::Type nameType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_NAME_ANNOTATION);
	uima::Type mulTokType = rTCAS.getTypeSystem().getType(uima::TT::TYPE_NAME_MULTI_TOKEN_ANNOTATION);
	FSIndex ix = rTCAS.getIndexRepository().getIndex("TestIndex", annType);
	FSIterator it = ix.iterator();

	size_t i=0;
	vector<uima::Type> types;
	for (it.moveToFirst(); it.isValid(); it.moveToNext() ) {
	AnnotationFS annFS = (AnnotationFS) it.get();
	LOG("Annotation " << annFS.getType().getName());
	if (annFS.getBeginPosition() == 0) {
	types.push_back( it.get().getType() );
	LOG("Type " << i << ": " << types[i].getName() );
	++i;
	}
	}

	/*
	Normally longer annots come first when starting at same position.
	Override this for token (lextype) over sent & para (docstructtype)
	*/
	checkPriority( types, lexType, docStructType);

	// Override again for sentence over paragraph
	checkPriority( types, sentType, parType );

	}

	/* ----------------------------------------------------------------------- */
	/* Main routine */
	/* ----------------------------------------------------------------------- */

	int main(int argc, char * argv[]) /*
	---------------------------------- */
	{
	LOG("UIMATEST_TYPE_PRIORITY started");
	int iRetVal = 0;
	try {
	char const * config =
	"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
	"<taeDescription"
	" xmlns=\"http://uima.apache.org/resourceSpecifier\""
	" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
	" xsi:schemaLocation=\"http://uima.apache.org/resourceSpecifier http://uima.apache.org/resourceSpecifierSchema.xsd\""
	" >"
	" <frameworkImplementation>UIMA</frameworkImplementation>"
	" <primitive>false</primitive>"
	" <delegateAnalysisEngineSpecifiers>"
	" <delegateAnalysisEngine key=\"frost\">"
	//ee??? " <includeFile>tok.xml</includeFile>"
	" <import location=\"tok.xml\" />"
	" </delegateAnalysisEngine>"
	/* ee - reducing number of annotators used for testing
	" <delegateAnalysisEngine key=\"doxtract\">"
	" <includeFile>doxtract.xml</includeFile>"
	" </delegateAnalysisEngine>"
	*/
	" <delegateAnalysisEngine key=\"dump\">"
	//ee " <includeFile>dump.xml</includeFile>"
	" <import location=\"dump.xml\" />"
	" </delegateAnalysisEngine>"
	" </delegateAnalysisEngineSpecifiers>"
	" <analysisEngineMetaData>"
	" <name>UIMAAnnotator</name>"
	" <description></description>"
	" <version>1.0</version>"
	" <vendor>IBM Corporation</vendor>"
	" <flowConstraints>"
	" <fixedFlow>"
	" <node>frost</node>"
	//ee " <node>doxtract</node>"
	" <node>dump</node>"
	" </fixedFlow>"
	" </flowConstraints>"
	" <typePriorities>"

	" <priorityList>"
	" <type>uima.tt.LexicalAnnotation</type>"
	" <type>uima.tt.DocStructureAnnotation</type>"
	" </priorityList>"

	" <priorityList>"
	" <type>uima.tt.SentenceAnnotation</type> "
	" <type>uima.tt.ParagraphAnnotation</type>"
	" </priorityList>"
	/*
	" <priorityList>"
	" <type>uima.tt.MultiTokenAnnotation</type>"
	" <type>uima.tt.TokenAnnotation</type>"
	" </priorityList>"
	" <priorityList>"
	" <type>uima.tt.TokenAnnotation</type>"
	" <type>uima.tt.SentenceAnnotation</type>"
	" <type>uima.tt.ParagraphAnnotation</type>"
	" </priorityList>"
	" <priorityList>"
	" <type>uima.tt.TokenAnnotation</type>"
	" <type>uima.tcas.DocumentAnnotation</type>"
	" <type>uima.tt.ParagraphAnnotation</type>"
	" </priorityList>"
	" <priorityList>"
	" <type>uima.tt.TokenAnnotation</type>"
	" <type>uima.tt.PlaceName</type>"
	" </priorityList>"
	*/
	" </typePriorities>"
	" <fsIndexCollection>"
	" <imports> "
	" <import location=\"tt_indexes.xml\"/>"
	" </imports> "
	" <fsIndexes> "
	" <fsIndexDescription>"
	" <label>TestIndex</label>"
	" <typeName>uima.tcas.Annotation</typeName>"
	" <kind>sorted</kind>"
	" <keys>"
	" <fsIndexKey>"
	" <featureName>begin</featureName>"
	" <comparator>standard</comparator>"
	" </fsIndexKey>"
	" <fsIndexKey>"
	" <typePriority></typePriority>"
	" </fsIndexKey>"
	" </keys>"
	" </fsIndexDescription>"
	" </fsIndexes> "
	" </fsIndexCollection> "
	" <capabilities>"
	" <capability>"
	" <inputs></inputs>"
	" <outputs>"
	" <type>uima.tt.TokenAnnotation</type>"
	" <type>uima.tt.SentenceAnnotation</type>"
	" <type>uima.tt.ParagraphAnnotation</type>"
	" <type>uima.tt.NameAnnotation</type>"
	" </outputs>"
	" <languagesSupported>"
	" <language>en</language>"
	" </languagesSupported>"
	" </capability>"
	" </capabilities>"
	" </analysisEngineMetaData>"
	"</taeDescription>";

	icu::UnicodeString configUS(config);
	UChar const * buf = configUS.getBuffer();
	size_t len = configUS.length();

	uima::ResourceManager::createInstance("test");

	ErrorInfo errInfo;
	uima::TextAnalysisEngine * pEngine = TextAnalysisEngine::createTextAnalysisEngine(buf, len, errInfo);
	if (pEngine == NULL ) {
	LOG("Error: " << errInfo.asString());
	ASSERT_OR_THROWEXCEPTION(false);
	}
	ASSERT_OR_THROWEXCEPTION(EXISTS(pEngine));
	ASSERT_OR_THROWEXCEPTION( errInfo.getErrorId() == UIMA_ERR_NONE );

	CAS * tcas = pEngine->newCAS();
	ASSERT_OR_THROWEXCEPTION( EXISTS(tcas) );

	icu::UnicodeString us("London calling. The Clash.");
	tcas->setDocumentText(us.getBuffer(), us.length());
	tcas->getDocumentAnnotation().setLanguage("en");
	TyErrorId err = pEngine->process(*tcas);
	ASSERT_OR_THROWEXCEPTION( err == UIMA_ERR_NONE );

	checkIndex( *tcas );

	err = pEngine->destroy();
	ASSERT_OR_THROWEXCEPTION( err == UIMA_ERR_NONE );

	delete tcas;
	delete pEngine;
	LOG("UIMATEST_TYPE_PRIORITY finished");
	} catch (Exception & exc) {
	cerr << exc.asString() << endl;
	iRetVal = 1;
	} catch (...) {
	cerr << "Unexpected exception " << endl;
	iRetVal = 1;
	}
	return iRetVal;
	}

	/* <EOF> */