| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.cas.test; |
| |
| import java.util.ArrayList; |
| |
| import junit.framework.TestCase; |
| |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.CASRuntimeException; |
| import org.apache.uima.cas.ConstraintFactory; |
| import org.apache.uima.cas.FSIndex; |
| import org.apache.uima.cas.FSIterator; |
| import org.apache.uima.cas.FSMatchConstraint; |
| import org.apache.uima.cas.FSStringConstraint; |
| import org.apache.uima.cas.FSTypeConstraint; |
| import org.apache.uima.cas.Feature; |
| import org.apache.uima.cas.FeaturePath; |
| import org.apache.uima.cas.FeatureStructure; |
| import org.apache.uima.cas.Type; |
| import org.apache.uima.cas.TypeSystem; |
| import org.apache.uima.cas.text.AnnotationFS; |
| |
| /** |
| * Class comment for FilteredIteratorTest.java goes here. |
| * |
| */ |
| public class FilteredIteratorTest extends TestCase { |
| |
| private CAS cas; |
| |
| private TypeSystem ts; |
| |
| private Type stringType; |
| |
| private Type tokenType; |
| |
| private Type intType; |
| |
| private Type tokenTypeType; |
| |
| private Type wordType; |
| |
| private Type sepType; |
| |
| private Type eosType; |
| |
| private Feature tokenTypeFeat; |
| |
| private Feature lemmaFeat; |
| |
| private Feature sentLenFeat; |
| |
| private Feature tokenFloatFeat; |
| |
| private Feature startFeature; |
| |
| private Type sentenceType; |
| |
| private Type annotationType; |
| |
| /** |
| * Constructor for FilteredIteratorTest. |
| * |
| * @param arg0 |
| */ |
| public FilteredIteratorTest(String arg0) { |
| super(arg0); |
| } |
| |
| public void setUp() { |
| try { |
| this.cas = CASInitializer.initCas(new CASTestSetup()); |
| assertTrue(cas != null); |
| this.ts = this.cas.getTypeSystem(); |
| assertTrue(ts != null); |
| } catch (Exception e) { |
| e.printStackTrace(); |
| assertTrue(false); |
| } |
| this.stringType = ts.getType(CAS.TYPE_NAME_STRING); |
| assertTrue(stringType != null); |
| this.tokenType = ts.getType(CASTestSetup.TOKEN_TYPE); |
| assertTrue(stringType != null); |
| this.intType = ts.getType(CAS.TYPE_NAME_INTEGER); |
| assertTrue(intType != null); |
| this.tokenTypeType = ts.getType(CASTestSetup.TOKEN_TYPE_TYPE); |
| assertTrue(tokenTypeType != null); |
| this.wordType = ts.getType(CASTestSetup.WORD_TYPE); |
| assertTrue(wordType != null); |
| this.sepType = ts.getType(CASTestSetup.SEP_TYPE); |
| assertTrue(sepType != null); |
| this.eosType = ts.getType(CASTestSetup.EOS_TYPE); |
| assertTrue(eosType != null); |
| this.tokenTypeFeat = ts.getFeatureByFullName(CASTestSetup.TOKEN_TYPE_FEAT_Q); |
| assertTrue(tokenTypeFeat != null); |
| this.lemmaFeat = ts.getFeatureByFullName(CASTestSetup.LEMMA_FEAT_Q); |
| assertTrue(lemmaFeat != null); |
| this.sentLenFeat = ts.getFeatureByFullName(CASTestSetup.SENT_LEN_FEAT_Q); |
| assertTrue(sentLenFeat != null); |
| this.tokenFloatFeat = ts.getFeatureByFullName(CASTestSetup.TOKEN_FLOAT_FEAT_Q); |
| assertTrue(tokenFloatFeat != null); |
| this.startFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_BEGIN); |
| assertTrue(startFeature != null); |
| this.sentenceType = ts.getType(CASTestSetup.SENT_TYPE); |
| assertTrue(sentenceType != null); |
| this.annotationType = ts.getType(CAS.TYPE_NAME_ANNOTATION); |
| assertTrue(annotationType != null); |
| } |
| |
| public void tearDown() { |
| this.cas = null; |
| this.ts = null; |
| this.stringType = null; |
| this.tokenType = null; |
| this.intType = null; |
| this.tokenTypeType = null; |
| this.wordType = null; |
| this.sepType = null; |
| this.eosType = null; |
| this.tokenTypeFeat = null; |
| this.lemmaFeat = null; |
| this.sentLenFeat = null; |
| this.tokenFloatFeat = null; |
| this.startFeature = null; |
| this.sentenceType = null; |
| this.annotationType = null; |
| } |
| |
| public void testIterator1() { |
| |
| try { |
| // cas.setDocumentText("A test."); can't set document text twice |
| } catch (CASRuntimeException e) { |
| assertTrue(false); |
| } |
| |
| try { |
| cas.setDocumentText("This is a test."); |
| } catch (CASRuntimeException e) { |
| assertTrue(false); |
| } |
| // create token and sentence annotations |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 0, 4)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 5, 7)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 8, 9)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 10, 14)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 14, 15)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(sentenceType, 0, 15)); |
| |
| // create filtered iterator over Tokens only |
| FSIterator<AnnotationFS> it = cas.getAnnotationIndex().iterator(); |
| FSTypeConstraint constraint = cas.getConstraintFactory().createTypeConstraint(); |
| constraint.add(tokenType); |
| |
| it = cas.createFilteredIterator(it, constraint); |
| |
| // do iteration |
| while (it.isValid()) { |
| AnnotationFS a = (AnnotationFS) it.get(); |
| assertTrue(a.getType().equals(tokenType)); |
| // System.out.println("Annotation type: " + a.getType().getName()); |
| // System.out.println("Covered text: " + a.getCoveredText()); |
| it.moveToNext(); |
| } |
| |
| // Count number of annotations. |
| it = cas.getAnnotationIndex().iterator(); |
| int countAll = 0; |
| for (it.moveToFirst(); it.isValid(); it.moveToNext()) { |
| ++countAll; |
| } |
| |
| // create filtered iterator over annotations |
| it = cas.getAnnotationIndex().iterator(); |
| constraint = cas.getConstraintFactory().createTypeConstraint(); |
| constraint.add(annotationType); |
| it = cas.createFilteredIterator(it, constraint); |
| |
| // do iteration |
| int countFiltered = 0; |
| while (it.isValid()) { |
| AnnotationFS a = (AnnotationFS) it.get(); |
| assertTrue(ts.subsumes(annotationType, a.getType())); |
| // System.out.println("Annotation type: " + a.getType().getName()); |
| // System.out.println("Covered text: " + a.getCoveredText()); |
| it.moveToNext(); |
| ++countFiltered; |
| } |
| assertTrue(countAll == countFiltered); |
| } |
| |
| public void testIterator1a() { |
| |
| try { |
| // cas.setDocumentText("A test."); can't set document text twice! |
| } catch (CASRuntimeException e) { |
| assertTrue(false); |
| } |
| |
| try { |
| cas.setDocumentText("This is a test."); |
| } catch (CASRuntimeException e) { |
| assertTrue(false); |
| } |
| // create token and sentence annotations |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 0, 4)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 5, 7)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 8, 9)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 10, 14)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 14, 15)); |
| cas.getIndexRepository().addFS(cas.createAnnotation(sentenceType, 0, 15)); |
| |
| // create filtered iterator over Tokens only |
| FSIterator<AnnotationFS> it = cas.getAnnotationIndex().iterator(); |
| FSTypeConstraint constraint = cas.getConstraintFactory().createTypeConstraint(); |
| constraint.add(tokenType.getName()); |
| it = cas.createFilteredIterator(it, constraint); |
| |
| // do iteration |
| while (it.isValid()) { |
| AnnotationFS a = (AnnotationFS) it.get(); |
| assertTrue(a.getType().equals(tokenType)); |
| // System.out.println("Annotation type: " + a.getType().getName()); |
| // System.out.println("Covered text: " + a.getCoveredText()); |
| it.moveToNext(); |
| } |
| } |
| |
| // test uses constraint compiler |
| /* |
| * public void testIterator1b() { |
| * |
| * try { cas.setDocumentText("A test."); } catch (CASRuntimeException e) { assertTrue(false); } |
| * ((CASMgr) cas).enableSetText(false); boolean exc = false; try { cas.setDocumentText("A |
| * test."); } catch (CASRuntimeException e) { assertTrue(e.getError() == |
| * CASRuntimeException.SET_DOC_TEXT_DISABLED); exc = true; } assertTrue(exc); ((CASMgr) |
| * cas).enableSetText(true); |
| * |
| * try { ((CASMgr) cas).setDocumentText("This is a test."); } catch (CASRuntimeException e) { |
| * assertTrue(false); } //create token and sentence annotations |
| * cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 0, 4)); |
| * cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 5, 7)); |
| * cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 8, 9)); |
| * cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 10, 14)); |
| * cas.getIndexRepository().addFS(cas.createAnnotation(tokenType, 14, 15)); |
| * cas.getIndexRepository().addFS(cas.createAnnotation(sentenceType, 0, 15)); |
| * |
| * //create filtered iterator over Tokens only FSIterator it = |
| * cas.getAnnotationIndex().iterator(); // FSTypeConstraint constraint = // |
| * cas.getConstraintFactory().createTypeConstraint(); // constraint.add(tokenType.getName()); |
| * |
| * FSMatchConstraint constraint = null; try { ConstraintParser parser = |
| * ConstraintParserFactory.getDefaultConstraintParser(); constraint = parser.parse("isa " + |
| * tokenType.getName()); } catch (Exception e) { e.printStackTrace(); assertTrue(false); } |
| * |
| * it = cas.createFilteredIterator(it, constraint); |
| * |
| * //do iteration while (it.isValid()) { AnnotationFS a = (AnnotationFS) it.get(); |
| * assertTrue(a.getType().equals(tokenType)); // System.out.println("Annotation type: " + |
| * a.getType().getName()); // System.out.println("Covered text: " + a.getCoveredText()); |
| * it.moveToNext(); } } |
| */ |
| |
| public void testIterator2() { |
| try { |
| cas.setDocumentText("This is a test with the word \"the\" in it."); |
| |
| // create token and sentence annotations |
| String type1 = "type1"; |
| String type2 = "type2"; |
| AnnotationFS token; |
| token = cas.createAnnotation(tokenType, 0, 4); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 5, 7); |
| token.setStringValue(lemmaFeat, "the"); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 8, 9); |
| token.setStringValue(lemmaFeat, type2); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 10, 14); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 14, 15); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 0, 15); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| |
| String lemma = "the"; |
| // create filtered iterator over Tokens of type 1 |
| FSIterator<AnnotationFS> it = cas.getAnnotationIndex(tokenType).iterator(); |
| FSStringConstraint type1Constraint = cas.getConstraintFactory().createStringConstraint(); |
| type1Constraint.equals(lemma); |
| FeaturePath path = cas.createFeaturePath(); |
| path.addFeature(lemmaFeat); |
| FSMatchConstraint cons = cas.getConstraintFactory().embedConstraint(path, type1Constraint); |
| it = cas.createFilteredIterator(it, cons); |
| |
| int count = 0; |
| for (it.moveToFirst(); it.isValid(); it.moveToNext()) { |
| ++count; |
| } |
| |
| // ///////////////////////////////////////////////////////////// |
| // Count instances of tokens with lemma "the". |
| |
| // Create an iterator over Token annotations. |
| FSIndex<AnnotationFS> tokenIndex = cas.getAnnotationIndex(tokenType); |
| FSIterator<AnnotationFS> tokenIt = tokenIndex.iterator(); |
| // Create a counter. |
| int theCount = 0; |
| // Iterate over the tokens. |
| for (tokenIt.moveToFirst(); tokenIt.isValid(); tokenIt.moveToNext()) { |
| AnnotationFS tok = (AnnotationFS) tokenIt.get(); |
| if (tok.getStringValue(lemmaFeat).equals(lemma)) { |
| ++theCount; |
| // System.out.println("Found token: " + tok.getCoveredText()); |
| } |
| } |
| assertTrue(count == theCount); |
| // System.out.println( |
| // "Number of tokens with \"" + lemma + "\": " + theCount); |
| // System.out.println("Number of tokens overall: " + tokenIndex.size()); |
| |
| // System.out.println("Count: " + count); |
| // assertTrue(count == 4); |
| |
| } catch (Exception e) { |
| e.printStackTrace(); |
| assertTrue(false); |
| } |
| } |
| |
| public void testIterator2a() { |
| try { |
| cas.setDocumentText("This is a test with the word \"the\" in it."); |
| |
| // create token and sentence annotations |
| String type1 = "type1"; |
| String type2 = "type2"; |
| AnnotationFS token; |
| token = cas.createAnnotation(tokenType, 0, 4); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 5, 7); |
| token.setStringValue(lemmaFeat, "the"); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 8, 9); |
| token.setStringValue(lemmaFeat, type2); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 10, 14); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 14, 15); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 0, 15); |
| token.setStringValue(lemmaFeat, type1); |
| cas.getIndexRepository().addFS(token); |
| |
| String lemma = "the"; |
| FSIterator<AnnotationFS> it = cas.getAnnotationIndex(tokenType).iterator(); |
| FSStringConstraint type1Constraint = cas.getConstraintFactory().createStringConstraint(); |
| type1Constraint.equals(lemma); |
| ArrayList<String> path = new ArrayList<String>(); |
| path.add(lemmaFeat.getShortName()); |
| FSMatchConstraint cons = cas.getConstraintFactory().embedConstraint(path, type1Constraint); |
| it = cas.createFilteredIterator(it, cons); |
| |
| int count = 0; |
| for (it.moveToFirst(); it.isValid(); it.moveToNext()) { |
| ++count; |
| } |
| |
| // ///////////////////////////////////////////////////////////// |
| // Count instances of tokens with lemma "the". |
| |
| // Create an iterator over Token annotations. |
| FSIndex<AnnotationFS> tokenIndex = cas.getAnnotationIndex(tokenType); |
| FSIterator<AnnotationFS> tokenIt = tokenIndex.iterator(); |
| // Create a counter. |
| int theCount = 0; |
| // Iterate over the tokens. |
| for (tokenIt.moveToFirst(); tokenIt.isValid(); tokenIt.moveToNext()) { |
| AnnotationFS tok = (AnnotationFS) tokenIt.get(); |
| if (tok.getStringValue(lemmaFeat).equals(lemma)) { |
| ++theCount; |
| // System.out.println("Found token: " + tok.getCoveredText()); |
| } |
| } |
| assertTrue(count == theCount); |
| // System.out.println( |
| // "Number of tokens with \"" + lemma + "\": " + theCount); |
| // System.out.println("Number of tokens overall: " + tokenIndex.size()); |
| |
| // System.out.println("Count: " + count); |
| // assertTrue(count == 4); |
| |
| } catch (Exception e) { |
| e.printStackTrace(); |
| assertTrue(false); |
| } |
| } |
| |
| public void testIterator2b() { |
| try { |
| cas.setDocumentText("This is a test with the word \"the\" in it."); |
| |
| FeatureStructure wordFS = this.cas.createFS(wordType); |
| FeatureStructure sepFS = this.cas.createFS(sepType); |
| FeatureStructure eosFS = this.cas.createFS(eosType); |
| |
| // create token and sentence annotations |
| String type1 = "type1"; |
| String type2 = "type2"; |
| AnnotationFS token; |
| token = cas.createAnnotation(tokenType, 0, 4); |
| token.setStringValue(lemmaFeat, type1); |
| token.setFeatureValue(tokenTypeFeat, wordFS); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 5, 7); |
| token.setStringValue(lemmaFeat, "the"); |
| token.setFeatureValue(tokenTypeFeat, sepFS); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 8, 9); |
| token.setStringValue(lemmaFeat, type2); |
| token.setFeatureValue(tokenTypeFeat, eosFS); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 10, 14); |
| token.setStringValue(lemmaFeat, type1); |
| token.setFeatureValue(tokenTypeFeat, wordFS); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 14, 15); |
| token.setStringValue(lemmaFeat, type1); |
| token.setFeatureValue(tokenTypeFeat, sepFS); |
| cas.getIndexRepository().addFS(token); |
| token = cas.createAnnotation(tokenType, 0, 15); |
| token.setStringValue(lemmaFeat, type1); |
| token.setFeatureValue(tokenTypeFeat, eosFS); |
| cas.getIndexRepository().addFS(token); |
| |
| FSIterator<AnnotationFS> it = cas.getAnnotationIndex(tokenType).iterator(); |
| |
| ConstraintFactory cf = this.cas.getConstraintFactory(); |
| FSTypeConstraint tc = cf.createTypeConstraint(); |
| tc.add(sepType); |
| tc.add(eosType.getName()); |
| ArrayList<String> path = new ArrayList<String>(); |
| path.add(tokenTypeFeat.getShortName()); |
| FSMatchConstraint cons = cf.embedConstraint(path, tc); |
| it = this.cas.createFilteredIterator(it, cons); |
| int count = 0; |
| for (it.moveToFirst(); it.isValid(); it.moveToNext()) { |
| ++count; |
| } |
| assertTrue(count == 4); |
| |
| } catch (Exception e) { |
| e.printStackTrace(); |
| assertTrue(false); |
| } |
| } |
| |
| // test uses constraint compiler |
| /* |
| * public void testIterator2c() { try { ((CASMgr) cas).setDocumentText( "This is a test with the |
| * word \"the\" in it."); |
| * |
| * FeatureStructure wordFS = this.cas.createFS(wordType); FeatureStructure sepFS = |
| * this.cas.createFS(sepType); FeatureStructure eosFS = this.cas.createFS(eosType); |
| * |
| * //create token and sentence annotations String type1 = "type1"; String type2 = "type2"; |
| * AnnotationFS token; token = cas.createAnnotation(tokenType, 0, 4); |
| * token.setStringValue(lemmaFeat, type1); token.setFeatureValue(tokenTypeFeat, wordFS); |
| * cas.getIndexRepository().addFS(token); token = cas.createAnnotation(tokenType, 5, 7); |
| * token.setStringValue(lemmaFeat, "the"); token.setFeatureValue(tokenTypeFeat, sepFS); |
| * cas.getIndexRepository().addFS(token); token = cas.createAnnotation(tokenType, 8, 9); |
| * token.setStringValue(lemmaFeat, type2); token.setFeatureValue(tokenTypeFeat, eosFS); |
| * cas.getIndexRepository().addFS(token); token = cas.createAnnotation(tokenType, 10, 14); |
| * token.setStringValue(lemmaFeat, type1); token.setFeatureValue(tokenTypeFeat, wordFS); |
| * cas.getIndexRepository().addFS(token); token = cas.createAnnotation(tokenType, 14, 15); |
| * token.setStringValue(lemmaFeat, type1); token.setFeatureValue(tokenTypeFeat, sepFS); |
| * cas.getIndexRepository().addFS(token); token = cas.createAnnotation(tokenType, 0, 15); |
| * token.setStringValue(lemmaFeat, type1); token.setFeatureValue(tokenTypeFeat, eosFS); |
| * cas.getIndexRepository().addFS(token); |
| * |
| * FSIterator it = cas.getAnnotationIndex(tokenType).iterator(); |
| * |
| * FSMatchConstraint cons = null; try { ConstraintParser parser = |
| * ConstraintParserFactory.getDefaultConstraintParser(); cons = parser.parse( |
| * tokenTypeFeat.getShortName() + " isa (" + sepType.getName() + "|" + eosType.getName() + ")"); } |
| * catch (Exception e) { assertTrue(false); } it = this.cas.createFilteredIterator(it, cons); int |
| * count = 0; for (it.moveToFirst(); it.isValid(); it.moveToNext()) { ++count; } assertTrue(count == |
| * 4); } catch (Exception e) { e.printStackTrace(); assertTrue(false); } } |
| * |
| * public static void main(String[] args) { FilteredIteratorTest test = new |
| * FilteredIteratorTest(null); test.run(); } |
| */ |
| |
| } |