blob: 112333334a8953a3a5e3293b9a5646ed3ea7f4c7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.test;
import junit.framework.TestCase;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.admin.CASAdminException;
import org.apache.uima.cas.admin.FSIndexComparator;
import org.apache.uima.cas.admin.FSIndexRepositoryMgr;
import org.apache.uima.cas.admin.LinearTypeOrderBuilder;
import org.apache.uima.cas.admin.TypeSystemMgr;
public class CASTestSetup implements AnnotatorInitializer {
// Type system constants.
public static final String TOKEN_TYPE = "Token";
public static final String TOKEN_TYPE_FEAT = "type";
public static final String TOKEN_TYPE_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
+ TOKEN_TYPE_FEAT;
public static final String TOKEN_TYPE_TYPE = "TokenType";
public static final String WORD_TYPE = "Word";
public static final String ARRAYFSWITHSUBTYPE_TYPE = "ArrayFsWithSubtype";
public static final String ARRAYFSWITHSUBTYPE_TYPE_FEAT = "subArrayOfAnnot";
public static final String ARRAYFSWITHSUBTYPE_TYPE_FEAT_Q = ARRAYFSWITHSUBTYPE_TYPE + TypeSystem.FEATURE_SEPARATOR
+ ARRAYFSWITHSUBTYPE_TYPE_FEAT;
public static final String SEP_TYPE = "Separator";
public static final String EOS_TYPE = "EndOfSentence";
public static final String SENT_TYPE = "Sentence";
// public static final String INT_ARRAY_SUB = "IntArraySub";
public static final String INT_SUB_NAME = "intArrayName";
public static final String LEMMA_FEAT = "lemma";
public static final String LEMMA_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR + LEMMA_FEAT;
public static final String SENT_LEN_FEAT = "sentenceLength";
public static final String SENT_LEN_FEAT_Q = SENT_TYPE + TypeSystem.FEATURE_SEPARATOR
+ SENT_LEN_FEAT;
public static final String TOKEN_FLOAT_FEAT = "tokenFloatFeat";
public static final String TOKEN_FLOAT_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
+ TOKEN_FLOAT_FEAT;
public static final String LEMMA_LIST_FEAT = "lemmaList";
public static final String LEMMA_LIST_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
+ LEMMA_LIST_FEAT;
public static final String LANG_PAIR = "org.apache.lang.LanguagePair";
public static final String LANG1 = "lang1";
public static final String LANG2 = "lang2";
public static final String DESCR_FEAT = "description";
public static final String GROUP_1 = "org.apache.lang.Group1";
public static final String GROUP_2 = "org.apache.lang.Group2";
public static final String[] GROUP_1_LANGUAGES = { "Chinese", "Japanese", "Korean", "English",
"French", "German", "Italian", "Spanish", "Portuguese" };
public static final String[] GROUP_2_LANGUAGES = { "Arabic", "Czech", "Danish", "Dutch",
"Finnish", "Greek", "Hebrew", "Hungarian", "Norwegian", "Polish", "Portuguese", "Russian",
"Turkish" };
// Index name constants.
public static final String ANNOT_SET_INDEX = "Annotation Set Index";
public static final String ANNOT_BAG_INDEX = "Annotation Bag Index";
public static final String ANNOT_SORT_INDEX = "Annotation Sort Index";
/**
* Constructor for CASTestSetup.
*/
public CASTestSetup() {
super();
}
/**
* @see org.apache.uima.cas.test.AnnotatorInitializer#initTypeSystem(TypeSystemMgr)
*/
/* Types:
* TOP
* Token TOKEN_TYPE
* Word
* Separator
* EndOfSentence
* ArrayFSwithSubtype
* Annotation
* Sentence
*
*/
public void initTypeSystem(TypeSystemMgr tsm) {
// Add new types and features.
Type topType = tsm.getTopType();
Type annotType = tsm.getType(CAS.TYPE_NAME_ANNOTATION);
// assert(annotType != null);
tsm.addType(SENT_TYPE, annotType);
Type tokenType = tsm.addType(TOKEN_TYPE, annotType);
Type tokenTypeType = tsm.addType(TOKEN_TYPE_TYPE, topType);
tsm.addType(WORD_TYPE, tokenTypeType);
Type arrayFsWithSubtypeType = tsm.addType(ARRAYFSWITHSUBTYPE_TYPE, topType);
Type arrayOfAnnot = tsm.getArrayType(annotType);
tsm.addFeature(ARRAYFSWITHSUBTYPE_TYPE_FEAT, arrayFsWithSubtypeType, arrayOfAnnot);
tsm.addType(SEP_TYPE, tokenTypeType);
tsm.addType(EOS_TYPE, tokenTypeType);
tsm.addFeature(TOKEN_TYPE_FEAT, tokenType, tokenTypeType);
tsm.addFeature(TOKEN_FLOAT_FEAT, tokenType, tsm.getType(CAS.TYPE_NAME_FLOAT));
// Add a type that inherits from IntArray.
// tsm.addType(INT_ARRAY_SUB, tsm.getType(CAS.TYPE_NAME_INTEGER_ARRAY));
// tsm.addFeature(
// INT_SUB_NAME,
// tsm.getType(INT_ARRAY_SUB),
// tsm.getType(CAS.TYPE_NAME_STRING));
tsm.addFeature(LEMMA_FEAT, tokenType, tsm.getType(CAS.TYPE_NAME_STRING));
tsm.addFeature(SENT_LEN_FEAT, tsm.getType(SENT_TYPE), tsm.getType(CAS.TYPE_NAME_INTEGER));
tsm.addFeature(LEMMA_LIST_FEAT, tsm.getType(TOKEN_TYPE), tsm
.getType(CAS.TYPE_NAME_STRING_ARRAY));
Type group1 = tsm.addStringSubtype(GROUP_1, GROUP_1_LANGUAGES);
Type group2 = tsm.addStringSubtype(GROUP_2, GROUP_2_LANGUAGES);
Type langPair = tsm.addType(LANG_PAIR, topType);
tsm.addFeature(LANG1, langPair, group1);
tsm.addFeature(LANG2, langPair, group2);
Type stringType = tsm.getType(CAS.TYPE_NAME_STRING);
tsm.addFeature(DESCR_FEAT, langPair, stringType);
boolean exc = false;
try {
tsm.addType("some.new.Name", group1);
} catch (CASAdminException e) {
TestCase.assertTrue(e.getError() == CASAdminException.TYPE_IS_INH_FINAL);
exc = true;
}
TestCase.assertTrue(exc);
exc = false;
try {
tsm.addFeature("some.new.Name", group1, stringType);
} catch (CASAdminException e) {
TestCase.assertTrue(e.getError() == CASAdminException.TYPE_IS_FEATURE_FINAL);
exc = true;
}
TestCase.assertTrue(exc);
}
public void initIndexes(FSIndexRepositoryMgr irm, TypeSystem ts) {
FSIndexComparator comp = irm.createComparator();
Type annotation = ts.getType(CAS.TYPE_NAME_ANNOTATION);
comp.setType(annotation);
comp.addKey(annotation.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN),
FSIndexComparator.STANDARD_COMPARE);
comp.addKey(annotation.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END),
FSIndexComparator.REVERSE_STANDARD_COMPARE);
LinearTypeOrderBuilder tob = irm.createTypeSortOrder();
try {
tob.add(new String[] { CAS.TYPE_NAME_ANNOTATION, SENT_TYPE, TOKEN_TYPE });
comp.addKey(tob.getOrder(), FSIndexComparator.STANDARD_COMPARE);
} catch (CASException e) {
TestCase.assertTrue(false);
}
irm.createIndex(comp, ANNOT_BAG_INDEX, FSIndex.BAG_INDEX);
irm.createIndex(comp, ANNOT_SET_INDEX, FSIndex.SET_INDEX);
irm.createIndex(comp, ANNOT_SORT_INDEX, FSIndex.SORTED_INDEX);
}
}