| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.uima.cas.test; |
| |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.cas.CASException; |
| import org.apache.uima.cas.CASRuntimeException; |
| import org.apache.uima.cas.FSIndex; |
| import org.apache.uima.cas.FSIndexRepository; |
| import org.apache.uima.cas.FSIterator; |
| import org.apache.uima.cas.Feature; |
| import org.apache.uima.cas.FeatureStructure; |
| import org.apache.uima.cas.Type; |
| import org.apache.uima.cas.TypeSystem; |
| import org.apache.uima.cas.impl.FSIndexRepositoryImpl; |
| import org.apache.uima.cas.impl.FeatureImpl; |
| import org.apache.uima.cas.text.AnnotationFS; |
| import org.apache.uima.cas.text.AnnotationIndex; |
| import org.apache.uima.jcas.JCas; |
| import org.apache.uima.jcas.tcas.Annotation; |
| |
| import junit.framework.TestCase; |
| |
| |
| public class IndexRepositoryTest extends TestCase { |
| |
| CAS cas; |
| |
| TypeSystem typeSystem; |
| |
| FSIndexRepository indexRep; |
| |
| private String running; |
| |
| /* |
| * (non-Javadoc) |
| * |
| * @see junit.framework.TestCase#setUp() |
| */ |
| protected void setUp() throws Exception { |
| super.setUp(); |
| this.cas = CASInitializer.initCas(new CASTestSetup(), null); |
| this.typeSystem = this.cas.getTypeSystem(); |
| this.indexRep = this.cas.getIndexRepository(); |
| } |
| |
| public void tearDown() { |
| cas = null; |
| typeSystem = null; |
| indexRep = null; |
| } |
| |
| public void testMissingSofaRef() throws Exception { |
| JCas jcas = cas.getJCas(); |
| Annotation a = new Annotation(jcas, 0, 4); |
| FeatureImpl feat = (FeatureImpl) cas.getTypeSystem().getType(CAS.TYPE_NAME_ANNOTATION_BASE) |
| .getFeatureByBaseName(CAS.FEATURE_BASE_NAME_SOFA); |
| a._setFeatureValueNcNj(feat, null); |
| try { |
| jcas.addFsToIndexes(a); |
| } catch (CASRuntimeException e) { |
| assertEquals("SOFAREF_NOT_SET", e.getMessageKey()); |
| return; |
| } |
| fail("required exception not thrown"); // fail |
| } |
| |
| public void testDefaultBagIndex() throws Exception { |
| // create an instance of a non-annotation type |
| Type tokenTypeType = this.typeSystem.getType(CASTestSetup.TOKEN_TYPE_TYPE); |
| FeatureStructure tokenTypeFs1 = this.cas.createFS(tokenTypeType); |
| assertFalse(tokenTypeFs1 instanceof AnnotationFS); |
| |
| // add to indexes |
| this.indexRep.addFS(tokenTypeFs1); |
| |
| // now try to retrieve |
| FSIterator<FeatureStructure> iter = this.indexRep.getAllIndexedFS(tokenTypeType); |
| assertTrue(iter.hasNext()); |
| assertEquals(tokenTypeFs1, iter.next()); |
| assertFalse(iter.hasNext()); |
| |
| // add a second instance |
| FeatureStructure tokenTypeFs2 = this.cas.createFS(tokenTypeType); |
| assertFalse(tokenTypeFs2 instanceof AnnotationFS); |
| this.indexRep.addFS(tokenTypeFs2); |
| |
| // now there should be two instances in the index |
| FSIterator<FeatureStructure> iter2 = this.indexRep.getAllIndexedFS(tokenTypeType); |
| assertTrue(iter2.hasNext()); |
| iter2.next(); |
| assertTrue(iter2.hasNext()); |
| iter2.next(); |
| assertFalse(iter.hasNext()); |
| } |
| |
| public void testSetIndex() throws Exception { |
| Feature beginFeat = this.typeSystem.getFeatureByFullName(CASTestSetup.TOKEN_TYPE + ":begin"); |
| // create an instance of an annotation type |
| Type tokenTypeType = this.typeSystem.getType(CASTestSetup.TOKEN_TYPE); |
| FeatureStructure tokenTypeFs1 = this.cas.createFS(tokenTypeType); |
| assertTrue(tokenTypeFs1 instanceof AnnotationFS); |
| tokenTypeFs1.setIntValue(beginFeat, 17); |
| |
| FeatureStructure tokenTypeFs2 = this.cas.createFS(tokenTypeType); |
| assertTrue(tokenTypeFs2 instanceof AnnotationFS); |
| tokenTypeFs2.setIntValue(beginFeat, 17); |
| |
| cas.addFsToIndexes(tokenTypeFs1); |
| cas.addFsToIndexes(tokenTypeFs2); |
| |
| FSIndexRepository ir = cas.getIndexRepository(); |
| FSIndex<FeatureStructure> index = ir.getIndex(CASTestSetup.ANNOT_SET_INDEX); |
| assertEquals(1, index.size()); |
| |
| index = ir.getIndex(CASTestSetup.ANNOT_SORT_INDEX); |
| assertEquals(2, index.size()); |
| |
| } |
| |
| /** |
| * To test non-normal case, change Eclipse run config by adding the jvm arg: |
| * -Duima.allow_duplicate_add_to_indexes |
| * @throws CASException |
| */ |
| public void testDupFsIndex() throws CASException { |
| cas.setSofaDataString("something", "text"); // otherwise triggers failure in addFsToIndex - no sofa ref |
| JCas jcas = cas.getJCas(); |
| Annotation a = new Annotation(jcas, 0, 4); |
| cas.addFsToIndexes(a); |
| cas.addFsToIndexes(a); |
| cas.addFsToIndexes(a); |
| int expected = /*FSIndexRepositoryImpl.IS_ALLOW_DUP_ADD_2_INDEXES ? 4 :*/ 2; |
| assertEquals(expected, cas.getIndexRepository().getIndex(CASTestSetup.ANNOT_SORT_INDEX).size()); |
| assertEquals(expected, cas.getIndexRepository().getIndex(CASTestSetup.ANNOT_BAG_INDEX).size()); |
| assertEquals(expected, cas.getIndexRepository().getIndex(CAS.STD_ANNOTATION_INDEX).size()); |
| } |
| |
| public static int NBR_ITEMS = 40000; |
| |
| public void testRemovalSpeed() throws Exception { |
| // create an instance of an annotation type |
| Feature beginFeat = this.typeSystem.getFeatureByFullName(CASTestSetup.TOKEN_TYPE + ":begin"); |
| Type fsType = this.typeSystem.getType(CASTestSetup.TOKEN_TYPE); |
| FeatureStructure[] fsa = new FeatureStructure[NBR_ITEMS]; |
| // create 40000 tokens |
| for (int i = 0; i < fsa.length; i++) { |
| fsa[i] = this.cas.createFS(fsType); |
| fsa[i].setIntValue(beginFeat, i); |
| } |
| |
| // warmup and jit |
| timeAdd2Indexes(fsa, false); |
| timeRemoveFromIndexes(fsa); |
| |
| long a2i = timeAdd2Indexes(fsa, false); |
| long rfi = timeRemoveFromIndexes(fsa); |
| |
| long a2i2 = timeAdd2Indexes(fsa, false); |
| long rfir = timeRemoveFromIndexesReverse(fsa); |
| |
| System.out.format("Timing add/remv from indexes: add1: %,d microsec, add2: %,d microsec, rmv: %,d microsec, rmvReversed: %,d microsec%n", |
| a2i/1000, a2i2/1000, rfi/1000, rfir/1000); |
| // big loop for doing profiling by hand and checking space recovery by hand |
| |
| // for (int i = 0; i < 10000; i++) { |
| // timeAdd2Indexes(fsa); |
| // timeRemoveFromIndexesReverse(fsa); |
| // } |
| } |
| |
| public void testAddSpeed() { |
| running = "testAddSpeed - 2 sorted, 1 set, 1 bag"; |
| runAddSpeed(); |
| } |
| |
| public void testAddSpeedSorted() { |
| FSIndexRepositoryImpl ir = (FSIndexRepositoryImpl) cas.getIndexRepository(); |
| ir.removeIndex(CASTestSetup.ANNOT_SET_INDEX); |
| ir.removeIndex(CASTestSetup.ANNOT_SORT_INDEX); |
| ir.removeIndex(CASTestSetup.ANNOT_BAG_INDEX); |
| // ir.removeIndex(CAS.STD_ANNOTATION_INDEX); |
| running = "testAddSpeedSorted"; |
| runAddSpeed(); |
| } |
| |
| private void runAddSpeed() { |
| // create an instance of an annotation type |
| Feature beginFeat = this.typeSystem.getFeatureByFullName(CASTestSetup.TOKEN_TYPE + ":begin"); |
| Type fsType = this.typeSystem.getType(CASTestSetup.TOKEN_TYPE); |
| FeatureStructure[] fsa = new FeatureStructure[NBR_ITEMS]; |
| // create 40000 tokens |
| for (int i = 0; i < fsa.length; i++) { |
| fsa[i] = this.cas.createFS(fsType); |
| fsa[i].setIntValue(beginFeat, i); |
| } |
| |
| // warmup and jit |
| long prev = Long.MAX_VALUE; |
| for (int i = 0; i < 10; i++) { |
| cas.getIndexRepository().removeAllIncludingSubtypes(cas.getTypeSystem().getTopType()); |
| long t = timeAdd2Indexes(fsa, false); |
| if (t < prev) { |
| System.out.format("%s Iteration %,d Add Forward 40K took %,d microsec%n", running, i, t/1000); |
| prev = t; |
| } |
| } |
| |
| prev = Long.MAX_VALUE; |
| for (int i = 0; i < 10; i++) { |
| cas.getIndexRepository().removeAllIncludingSubtypes(cas.getTypeSystem().getTopType()); |
| long t = timeAdd2Indexes(fsa, true); |
| if (t < prev) { |
| System.out.format("%s Iteration %,d Add Reverse 40K took %,d microsec%n", running, i, t/1000); |
| prev = t; |
| } |
| } |
| |
| } |
| |
| public void testRemovalSpeedBagAlone() throws Exception { |
| |
| FSIndexRepositoryImpl ir = (FSIndexRepositoryImpl) cas.getIndexRepository(); |
| // run with bag only |
| ir.removeIndex(CASTestSetup.ANNOT_SET_INDEX); |
| ir.removeIndex(CASTestSetup.ANNOT_SORT_INDEX); |
| ir.removeIndex(CAS.STD_ANNOTATION_INDEX); |
| |
| // create 40000 token-types |
| Type fsType = this.typeSystem.getType(CASTestSetup.TOKEN_TYPE_TYPE); |
| // Feature beginFeat = typeSystem.getFeatureByFullName("Token:begin"); |
| FeatureStructure[] fsa = new FeatureStructure[NBR_ITEMS]; |
| for (int i = 0; i < fsa.length; i++) { |
| fsa[i] = this.cas.createFS(fsType); |
| // fsa[i].setIntValue(beginFeat, i); |
| } |
| |
| for (int iii = 0; iii < 3 /*10000*/; iii++) { // change to 10000 for iterations |
| |
| cas.getIndexRepository().removeAllIncludingSubtypes(cas.getTypeSystem().getTopType()); |
| |
| // this.cas = CASInitializer.initCas(new CASTestSetup()); |
| // this.typeSystem = this.cas.getTypeSystem(); |
| // this.indexRep = this.cas.getIndexRepository(); |
| |
| // warmup and jit |
| timeAdd2Indexes(fsa, false); |
| timeRemoveFromIndexes(fsa); |
| // timeAdd2Indexes(fsa); |
| // timeRemoveFromIndexes(fsa); |
| cas.getIndexRepository().removeAllIncludingSubtypes(cas.getTypeSystem().getTopType()); |
| System.gc(); |
| |
| long a2i = timeAdd2Indexes(fsa, false); |
| // Thread.currentThread().sleep(1000*60*60); // for using yourkit to investigate memory sizes |
| long rfi = timeRemoveFromIndexes(fsa); |
| |
| long a2i2 = timeAdd2Indexes(fsa, false); |
| long rfir = timeRemoveFromIndexesReverse(fsa); |
| |
| // if (iii == 600) { |
| // System.out.println("debug stop"); |
| // } |
| if (iii < 10 || (iii % 200) == 0) { |
| System.out.format("%,d Timing add/remv from bag indexes: add1: %,d microsec, add2: %,d microsec, rmv: %,d microsec, rmvReversed: %,d microsec%n", |
| iii, a2i/1000, a2i2/1000, rfi/1000, rfir/1000); |
| } |
| } |
| } |
| |
| private long timeAdd2Indexes (FeatureStructure[] fsa, boolean reverse) { |
| long start = System.nanoTime(); |
| if (reverse) { |
| AnnotationIndex<AnnotationFS> annotIndex = cas.getAnnotationIndex(); |
| for (int i = fsa.length - 1; i >= 0; i--) { |
| cas.addFsToIndexes(fsa[i]); |
| if ((i % 10000) == 9999) { |
| annotIndex.size(); // forces batch add to indexes |
| } |
| } |
| } else { |
| for (int i = 0; i < fsa.length; i++) { |
| cas.addFsToIndexes(fsa[i]); |
| } |
| } |
| return System.nanoTime() - start; |
| } |
| |
| private long timeRemoveFromIndexes (FeatureStructure[] fsa) { |
| long start = System.nanoTime(); |
| for (int i = 0; i < fsa.length; i++) { |
| cas.removeFsFromIndexes(fsa[i]); |
| } |
| return System.nanoTime() - start; |
| } |
| |
| private long timeRemoveFromIndexesReverse (FeatureStructure[] fsa) { |
| long start = System.nanoTime(); |
| for (int i = fsa.length -1; i >= 0; i--) { |
| cas.removeFsFromIndexes(fsa[i]); |
| } |
| return System.nanoTime() - start; |
| } |
| |
| } |