Merge pull request #136 from apache/feature/UIMA-6291-Improve-uimaFIT-benchmarking-module
[UIMA-6291] Improve uimaFIT benchmarking module
diff --git a/uimafit-benchmark/pom.xml b/uimafit-benchmark/pom.xml
index 4b7d651..6275e24 100644
--- a/uimafit-benchmark/pom.xml
+++ b/uimafit-benchmark/pom.xml
@@ -56,7 +56,7 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<scope>test</scope>
- </dependency>
+ </dependency>
</dependencies>
<licenses>
<license>
diff --git a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Batch.java b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Batch.java
index a4fd351..feb7a8b 100644
--- a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Batch.java
+++ b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Batch.java
@@ -63,6 +63,7 @@
sb.append(String.format("min: %4.0f ", stats.getMin()));
sb.append(String.format("max: %4.0f ", stats.getMax()));
sb.append(String.format("median: %4.0f ", stats.getPercentile(50)));
+ sb.append(String.format("cumulative: %6.0f ", stats.getSum()));
sb.append(String.format("fail: %4d ", failures));
sb.append("]");
return sb.toString();
diff --git a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Benchmark.java b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Benchmark.java
index b589e92..b99a9ab 100644
--- a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Benchmark.java
+++ b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/Benchmark.java
@@ -18,6 +18,8 @@
*/
package org.apache.uima.fit.benchmark;
+import static java.util.Comparator.comparing;
+
import java.util.ArrayList;
import java.util.List;
import java.util.function.IntConsumer;
@@ -27,123 +29,146 @@
import org.apache.commons.lang3.StringUtils;
public class Benchmark {
- private IntConsumer initializer = t -> {};
- private RunnableWithExceptions subject;
-
- private String name;
- private int baseRepeat = 20;
- private int repeatIncrementTimes;
-
- private int baseMagnitude = 1;
- private int incrementTimes;
- private IntFunction<Integer> magnitudeIncrement = t -> t;
- private LongSupplier timer = () -> System.currentTimeMillis();
-
- private List<Batch> batches = new ArrayList<>();
+ private IntConsumer initializer = t -> {
+ };
+ private RunnableWithExceptions subject;
- public Benchmark(String aName, Benchmark aTemplate) {
- name = aName;
-
- initializer = aTemplate.initializer;
- subject = aTemplate.subject;
-
- baseRepeat = aTemplate.baseRepeat;
- repeatIncrementTimes = aTemplate.repeatIncrementTimes;
-
- baseMagnitude = aTemplate.baseMagnitude;
- incrementTimes = aTemplate.incrementTimes;
- magnitudeIncrement = aTemplate.magnitudeIncrement;
- timer = aTemplate.timer;
- }
+ private final String name;
- public Benchmark(String aName) {
- name = aName;
- }
+ private boolean verbose = false;
- public Benchmark timer(LongSupplier aTimer)
- {
- timer = aTimer;
- return this;
- }
+ private int baseRepeat = 20;
+ private int repeatIncrementTimes;
- public Benchmark repeat(int aRepeat)
- {
- baseRepeat = aRepeat;
- return this;
- }
+ private int baseMagnitude = 1;
+ private int incrementTimes;
+ private IntFunction<Integer> magnitudeIncrement = t -> t;
+ private LongSupplier timer = () -> System.currentTimeMillis();
- public Benchmark magnitude(int aMagnitude)
- {
- baseMagnitude = aMagnitude;
- return this;
- }
+ private List<Batch> batches = new ArrayList<>();
- public Benchmark magnitudeIncrement(IntFunction<Integer> aIncrement)
- {
- magnitudeIncrement = aIncrement;
- return this;
- }
+ public Benchmark(String aName, Benchmark aTemplate) {
+ name = aName;
- public Benchmark incrementTimes(int aTimes)
- {
- incrementTimes = aTimes;
- return this;
- }
+ initializer = aTemplate.initializer;
+ subject = aTemplate.subject;
- public Benchmark initialize(IntConsumer aPieceOfCode)
- {
- initializer = aPieceOfCode;
- return this;
- }
-
- public Benchmark measure(RunnableWithExceptions aPieceOfCode)
- {
- subject = aPieceOfCode;
- return this;
- }
-
- private Batch runBatch(int aMagnitude)
- {
- Batch batch = new Batch(aMagnitude);
-
- initializer.accept(aMagnitude);
- for (int i = 0; i < baseRepeat; i++) {
-
- long startTime = timer.getAsLong();
- try {
- subject.run();
- batch.addMeasurement(new Measurement(i, timer.getAsLong() - startTime));
- }
- catch (Exception e) {
- batch.addMeasurement(new Measurement(i, timer.getAsLong() - startTime, e));
- }
+ baseRepeat = aTemplate.baseRepeat;
+ repeatIncrementTimes = aTemplate.repeatIncrementTimes;
+
+ baseMagnitude = aTemplate.baseMagnitude;
+ incrementTimes = aTemplate.incrementTimes;
+ magnitudeIncrement = aTemplate.magnitudeIncrement;
+ timer = aTemplate.timer;
+ }
+
+ public Benchmark(String aName) {
+ name = aName;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public Benchmark timer(LongSupplier aTimer) {
+ timer = aTimer;
+ return this;
+ }
+
+ public Benchmark repeat(int aRepeat) {
+ baseRepeat = aRepeat;
+ return this;
+ }
+
+ public Benchmark magnitude(int aMagnitude) {
+ baseMagnitude = aMagnitude;
+ return this;
+ }
+
+ public Benchmark magnitudeIncrement(IntFunction<Integer> aIncrement) {
+ magnitudeIncrement = aIncrement;
+ return this;
+ }
+
+ public Benchmark incrementTimes(int aTimes) {
+ incrementTimes = aTimes;
+ return this;
+ }
+
+ public Benchmark initialize(IntConsumer aPieceOfCode) {
+ initializer = aPieceOfCode;
+ return this;
+ }
+
+ public Benchmark measure(RunnableWithExceptions aPieceOfCode) {
+ subject = aPieceOfCode;
+ return this;
+ }
+
+ private Batch runBatch(int aMagnitude) {
+ Batch batch = new Batch(aMagnitude);
+
+ initializer.accept(aMagnitude);
+ for (int i = 0; i < baseRepeat; i++) {
+
+ long startTime = timer.getAsLong();
+ try {
+ subject.run();
+ batch.addMeasurement(new Measurement(i, timer.getAsLong() - startTime));
+ } catch (Exception e) {
+ batch.addMeasurement(new Measurement(i, timer.getAsLong() - startTime, e));
}
-
- return batch;
}
-
- public void run()
- {
+
+ return batch;
+ }
+
+ public void run() {
+ if (verbose) {
System.out.printf("%n%s%n", StringUtils.repeat("=", name.length()));
System.out.printf("%s%n", name);
System.out.printf("%s%n", StringUtils.repeat("=", name.length()));
-
- int magnitude = baseMagnitude;
- int n = 0;
-
- System.out.print("Running benchmark... ");
- do {
- if (magnitude > 0) {
- System.out.printf("%d ", magnitude);
- }
- batches.add(runBatch(magnitude));
- magnitude = magnitudeIncrement.apply(magnitude);
- n++;
- } while (n < incrementTimes);
- System.out.printf("%n%n");
-
+ }
+ else {
+ System.out.printf("%s: ", name);
+ }
+
+ int magnitude = baseMagnitude;
+ int n = 0;
+
+ System.out.print("Running benchmark... ");
+ do {
+ if (magnitude > 0) {
+ System.out.printf("%d ", magnitude);
+ }
+ batches.add(runBatch(magnitude));
+ magnitude = magnitudeIncrement.apply(magnitude);
+ n++;
+ } while (n < incrementTimes);
+ System.out.printf("%n");
+
+ if (verbose) {
for (Batch b : batches) {
System.out.printf("%s%n", b);
}
}
- }
\ No newline at end of file
+ }
+
+ public List<Batch> getBatches() {
+ return batches;
+ }
+
+ public long getMaxDuration() {
+ return getBatches().stream().flatMap(b -> b.getMeasurements().stream()).max(comparing(Measurement::getDuration))
+ .get().getDuration();
+ }
+
+ public long getCumulativeDuration() {
+ return getBatches().stream().flatMap(b -> b.getMeasurements().stream()).mapToLong(Measurement::getDuration).sum();
+ }
+
+ public Measurement getSlowestMeasurement() {
+ return getBatches().stream().flatMap(b -> b.getMeasurements().stream()).max(comparing(Measurement::getDuration))
+ .get();
+ }
+}
\ No newline at end of file
diff --git a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/BenchmarkGroup.java b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/BenchmarkGroup.java
new file mode 100644
index 0000000..e6bf4e9
--- /dev/null
+++ b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/BenchmarkGroup.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.fit.benchmark;
+
+import static java.util.Comparator.comparing;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class BenchmarkGroup {
+ private final String name;
+ private final List<Benchmark> benchmarks = new ArrayList<>();
+
+ public BenchmarkGroup(String aName) {
+ name = aName;
+ }
+
+ public BenchmarkGroup add(Benchmark aBenchmark) {
+ benchmarks.add(aBenchmark);
+ return this;
+ }
+
+ public void runAll() {
+ System.out.printf(">>>>>>>>>>>>>>>>>>%n");
+ System.out.printf("GROUP: %s%n", name);
+
+ for (Benchmark benchmark : benchmarks) {
+ benchmark.run();
+ }
+
+ System.out.printf("%n%nSorted by execution time:%n");
+ benchmarks.stream()
+ .sorted(comparing(Benchmark::getCumulativeDuration))
+ .forEach(benchmark -> {
+ Measurement slowest = benchmark.getSlowestMeasurement();
+ System.out.printf("%6dms / %4dms -- %s%n", benchmark.getCumulativeDuration(), slowest.getDuration(),
+ benchmark.getName());
+ });
+
+ System.out.printf(">>>>>>>>>>>>>>>>>>%n%n");
+ }
+}
diff --git a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CachingRandomJCasProvider.java b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CachingRandomJCasProvider.java
new file mode 100644
index 0000000..1272dd4
--- /dev/null
+++ b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CachingRandomJCasProvider.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.fit.benchmark;
+
+import static org.apache.uima.fit.benchmark.CasInitializationUtils.initRandomCas;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+public class CachingRandomJCasProvider {
+ private static final long RANDOM_SEED = 12345l;
+
+ private final Map<Integer, JCas> cache = new HashMap<>();
+
+ private JCas preparedJCas;
+
+ public void prepare(int size) {
+ JCas cachedJCas = cache.get(size);
+
+ if (cachedJCas == null) {
+ try {
+ cachedJCas = JCasFactory.createJCas();
+ } catch (ResourceInitializationException | CASException e) {
+ throw new RuntimeException(e);
+ }
+
+ initRandomCas(cachedJCas.getCas(), size, RANDOM_SEED);
+ cache.put(size, cachedJCas);
+ }
+
+ preparedJCas = cachedJCas;
+ }
+
+ public JCas get() {
+ return preparedJCas;
+ }
+}
diff --git a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CasInitializationUtils.java b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CasInitializationUtils.java
index e9e53c2..72149bb 100644
--- a/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CasInitializationUtils.java
+++ b/uimafit-benchmark/src/main/java/org/apache/uima/fit/benchmark/CasInitializationUtils.java
@@ -30,17 +30,15 @@
public final class CasInitializationUtils
{
- private static final long RANDOM_SEED = 12345l;
-
private CasInitializationUtils()
{
// No instances
}
- public static void initRandomCas(CAS cas, int size)
+ public static void initRandomCas(CAS cas, int size, long seed)
{
cas.reset();
- Random rnd = new Random(RANDOM_SEED);
+ Random rnd = new Random(seed);
List<Type> types = new ArrayList<Type>();
types.add(cas.getTypeSystem().getType(Token.class.getName()));
types.add(cas.getTypeSystem().getType(Sentence.class.getName()));
diff --git a/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/CasUtilBenchmark.java b/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/CasUtilBenchmark.java
deleted file mode 100644
index fc39c1f..0000000
--- a/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/CasUtilBenchmark.java
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.uima.fit.benchmark;
-
-import static org.apache.uima.fit.benchmark.CasInitializationUtils.initRandomCas;
-import static org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription;
-import static org.apache.uima.fit.util.CasUtil.getType;
-import static org.apache.uima.fit.util.CasUtil.indexCovered;
-import static org.apache.uima.fit.util.CasUtil.indexCovering;
-import static org.apache.uima.fit.util.CasUtil.select;
-import static org.apache.uima.fit.util.CasUtil.selectAll;
-import static org.apache.uima.fit.util.CasUtil.selectCovered;
-import static org.apache.uima.fit.util.CasUtil.selectCovering;
-import static org.apache.uima.fit.util.CasUtil.selectFS;
-import static org.apache.uima.fit.util.CasUtil.selectOverlapping;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.cas.text.AnnotationPredicates;
-import org.apache.uima.util.CasCreationUtils;
-import org.junit.Before;
-import org.junit.Test;
-
-public class CasUtilBenchmark {
- private CAS cas;
-
- private static final String TYPE_NAME_TOKEN = "org.apache.uima.fit.type.Token";
- private static final String TYPE_NAME_SENTENCE = "org.apache.uima.fit.type.Sentence";
-
- @Before
- public void setup() throws Exception {
- if (cas == null) {
- cas = CasCreationUtils.createCas(createTypeSystemDescription(), null, null);
- }
- else {
- cas.reset();
- }
- }
-
- @Test
- public void benchmarkSelect() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(cas, n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(5);
-
- new Benchmark("CAS select Token", template)
- .measure(() -> select(cas, getType(cas, TYPE_NAME_TOKEN)))
- .run();
-
- new Benchmark("CAS select Token v3", template)
- .measure(() -> cas.select(getType(cas, TYPE_NAME_TOKEN)))
- .run();
-
- new Benchmark("CAS select Token and iterate", template)
- .measure(() -> select(cas, getType(cas, TYPE_NAME_TOKEN)).forEach(v -> {}))
- .run();
-
- new Benchmark("CAS select Token and iterate v3", template)
- .measure(() -> cas.select(getType(cas, TYPE_NAME_TOKEN)).forEach(v -> {}))
- .run();
-
- new Benchmark("CAS select Sentence", template)
- .measure(() -> select(cas, getType(cas, TYPE_NAME_SENTENCE)))
- .run();
-
- new Benchmark("CAS select Sentence and iterate", template)
- .measure(() -> select(cas, getType(cas, TYPE_NAME_SENTENCE)).forEach(v -> {}))
- .run();
-
- new Benchmark("CAS select TOP", template)
- .measure(() -> selectFS(cas, getType(cas, CAS.TYPE_NAME_TOP)))
- .run();
-
- new Benchmark("CAS select TOP and iterate", template)
- .measure(() -> selectFS(cas, getType(cas, CAS.TYPE_NAME_TOP)).forEach(v -> {}))
- .run();
-
- new Benchmark("CAS select TOP and iterate v3", template)
- .measure(() -> cas.select(getType(cas, CAS.TYPE_NAME_TOP)).forEach(v -> {}))
- .run();
-
- new Benchmark("CAS select ALL", template)
- .measure(() -> selectAll(cas))
- .run();
-
- new Benchmark("CAS select ALL and iterate", template)
- .measure(() -> selectAll(cas).forEach(v -> {}))
- .run();
-
- new Benchmark("CAS select ALL and iterate v3", template)
- .measure(() -> cas.select().forEach(v -> {}))
- .run();
- }
-
- @Test
- public void benchmarkSelectOverlapping() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(cas, n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(4);
-
- new Benchmark("CAS selectOverlapping", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- select(cas, sentenceType).forEach(s -> selectOverlapping(cas, tokenType, s).forEach(t -> {}));
- })
- .run();
-
- new Benchmark("CAS selectOverlapping v3", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- cas.select(sentenceType).forEach(s -> cas.select(tokenType)
- .filter(t -> AnnotationPredicates.overlapping((AnnotationFS) t, (AnnotationFS)s))
- .forEach(t -> {}));
- })
- .run();
- }
-
- @Test
- public void benchmarkSelectCovered() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(cas, n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(4);
-
- new Benchmark("CAS selectCovered", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- select(cas, sentenceType).forEach(s -> selectCovered(tokenType, s).forEach(t -> {}));
- })
- .run();
-
- new Benchmark("CAS selectCovered v3", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- cas.select(sentenceType).forEach(s -> cas.select(tokenType).coveredBy((AnnotationFS) s).forEach(t -> {}));
- })
- .run();
-
- new Benchmark("CAS indexCovered", template)
- .measure(() -> indexCovered(cas, getType(cas, TYPE_NAME_SENTENCE), getType(cas, TYPE_NAME_TOKEN))
- .forEach((s, l) -> l.forEach(t -> {})))
- .run();
- }
-
- @Test
- public void benchmarkSelectCovering() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(cas, n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(3);
-
- new Benchmark("CAS selectCovering", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- select(cas, tokenType).forEach(t -> selectCovering(sentenceType, t));
- })
- .run();
-
- new Benchmark("CAS selectCovering", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- select(cas, tokenType).forEach(s -> selectCovering(sentenceType, s));
- })
- .run();
-
- new Benchmark("CAS selectCovering v3", template)
- .measure(() -> {
- Type sentenceType = getType(cas, TYPE_NAME_SENTENCE);
- Type tokenType = getType(cas, TYPE_NAME_TOKEN);
- cas.select(tokenType).forEach(t -> cas.select(sentenceType).covering((AnnotationFS) t).forEach(s -> {}));
- })
- .run();
-
- new Benchmark("CAS indexCovering", template)
- .measure(() -> indexCovering(cas, getType(cas, TYPE_NAME_TOKEN), getType(cas, TYPE_NAME_SENTENCE))
- .forEach((t, l) -> l.forEach(s -> {})))
- .run();
- }
-}
diff --git a/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/JCasUtilBenchmark.java b/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/JCasUtilBenchmark.java
deleted file mode 100644
index 9b065ac..0000000
--- a/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/JCasUtilBenchmark.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.uima.fit.benchmark;
-
-import static org.apache.uima.fit.benchmark.CasInitializationUtils.initRandomCas;
-import static org.apache.uima.fit.util.JCasUtil.indexCovered;
-import static org.apache.uima.fit.util.JCasUtil.indexCovering;
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.apache.uima.fit.util.JCasUtil.selectAll;
-import static org.apache.uima.fit.util.JCasUtil.selectCovered;
-import static org.apache.uima.fit.util.JCasUtil.selectCovering;
-import static org.apache.uima.fit.util.JCasUtil.selectOverlapping;
-
-import org.apache.uima.cas.text.AnnotationPredicates;
-import org.apache.uima.fit.factory.JCasFactory;
-import org.apache.uima.fit.type.Sentence;
-import org.apache.uima.fit.type.Token;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.TOP;
-import org.junit.Before;
-import org.junit.Test;
-
-public class JCasUtilBenchmark {
- private JCas jcas;
-
- @Before
- public void setup() throws Exception {
- if (jcas == null) {
- jcas = JCasFactory.createJCas();
- }
- else {
- jcas.reset();
- }
- }
-
- @Test
- public void benchmarkSelect() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(jcas.getCas(), n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(5);
-
- new Benchmark("JCas select Token", template)
- .measure(() -> select(jcas, Token.class))
- .run();
-
- new Benchmark("JCas select Token v3", template)
- .measure(() -> jcas.select(Token.class))
- .run();
-
- new Benchmark("JCas select Token and iterate", template)
- .measure(() -> select(jcas, Token.class).forEach(v -> {}))
- .run();
-
- new Benchmark("JCas select Token and iterate v3", template)
- .measure(() -> jcas.select(Token.class).forEach(v -> {}))
- .run();
-
- new Benchmark("JCas select Sentence", template)
- .measure(() -> select(jcas, Sentence.class))
- .run();
-
- new Benchmark("JCas select Sentence and iterate", template)
- .measure(() -> select(jcas, Sentence.class).forEach(v -> {}))
- .run();
-
- new Benchmark("JCas select TOP", template)
- .measure(() -> select(jcas, TOP.class))
- .run();
-
- new Benchmark("JCas select TOP and iterate", template)
- .measure(() -> select(jcas, TOP.class).forEach(v -> {}))
- .run();
-
- new Benchmark("JCas select TOP and iterate v3", template)
- .measure(() -> jcas.select(TOP.class).forEach(v -> {}))
- .run();
-
- new Benchmark("JCas select ALL", template)
- .measure(() -> selectAll(jcas))
- .run();
-
- new Benchmark("JCas select ALL and iterate", template)
- .measure(() -> selectAll(jcas).forEach(v -> {}))
- .run();
- new Benchmark("JCas select ALL and iterate v3", template)
- .measure(() -> jcas.select().forEach(v -> {}))
- .run();
- }
-
- @Test
- public void benchmarkSelectOverlapping() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(jcas.getCas(), n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(4);
-
- new Benchmark("CAS selectOverlapping", template)
- .measure(() -> {
- select(jcas, Sentence.class).forEach(s -> selectOverlapping(jcas, Token.class, s)
- .forEach(t -> {}));
- })
- .run();
-
- new Benchmark("CAS overlapping via index v3 (stream)", template)
- .measure(() -> {
- jcas.getAnnotationIndex(Sentence.class).forEach(s -> jcas.getAnnotationIndex(Token.class)
- .stream()
- .filter(t -> AnnotationPredicates.overlapping(t, s))
- .forEach(t -> {}));
- })
- .run();
-
- new Benchmark("CAS overlapping via index v3 (forEach)", template)
- .measure(() -> {
- jcas.getAnnotationIndex(Sentence.class).forEach(s -> jcas.getAnnotationIndex(Token.class)
- .forEach(t -> AnnotationPredicates.overlapping(t, s)));
- })
- .run();
-
- new Benchmark("CAS selectOverlapping v3", template)
- .measure(() -> {
- jcas.select(Sentence.class).forEach(s -> jcas.select(Token.class)
- .filter(t -> AnnotationPredicates.overlapping(t, s))
- .forEach(t -> {}));
- })
- .run();
- }
-
- @Test
- public void benchmarkSelectCovered() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(jcas.getCas(), n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(4);
-
- new Benchmark("JCas selectCovered", template)
- .measure(() -> select(jcas, Sentence.class).forEach(s -> selectCovered(Token.class, s)))
- .run();
-
- new Benchmark("JCas selectCovered v3", template)
- .measure(() -> {
- jcas.select(Sentence.class).forEach(s -> jcas.select(Token.class).coveredBy(s).forEach(t -> {}));
- })
- .run();
-
- new Benchmark("JCas indexCovered", template)
- .measure(() -> indexCovered(jcas, Sentence.class, Token.class).forEach((s, l) -> l.forEach(t -> {})))
- .run();
- }
-
- @Test
- public void benchmarkSelectCovering() {
- Benchmark template = new Benchmark("TEMPLATE")
- .initialize(n -> initRandomCas(jcas.getCas(), n))
- .magnitude(10)
- .magnitudeIncrement(count -> count * 10)
- .incrementTimes(3);
-
- new Benchmark("JCas selectCovering", template)
- .measure(() -> select(jcas, Token.class).forEach(t -> selectCovering(Sentence.class, t)))
- .run();
-
- new Benchmark("JCas selectCovering v3", template)
- .measure(() -> {
- jcas.select(Token.class).forEach(t -> jcas.select(Sentence.class).covering(t).forEach(s -> {}));
- })
- .run();
-
- new Benchmark("JCas indexCovering", template)
- .measure(() -> indexCovering(jcas, Token.class, Sentence.class).forEach((t, l) -> l.forEach(s -> {})))
- .run();
- }
-}
diff --git a/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/SelectBenchmark.java b/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/SelectBenchmark.java
new file mode 100644
index 0000000..4db586f
--- /dev/null
+++ b/uimafit-benchmark/src/test/java/org/apache/uima/fit/benchmark/SelectBenchmark.java
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.fit.benchmark;
+
+import static org.apache.uima.cas.text.AnnotationPredicates.colocated;
+import static org.apache.uima.cas.text.AnnotationPredicates.coveredBy;
+import static org.apache.uima.cas.text.AnnotationPredicates.covering;
+import static org.apache.uima.cas.text.AnnotationPredicates.overlapping;
+import static org.apache.uima.fit.util.CasUtil.getType;
+import static org.apache.uima.fit.util.CasUtil.select;
+import static org.apache.uima.fit.util.JCasUtil.select;
+import static org.apache.uima.fit.util.JCasUtil.selectCovered;
+import static org.apache.uima.fit.util.JCasUtil.selectOverlapping;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.fit.type.Sentence;
+import org.apache.uima.fit.type.Token;
+import org.apache.uima.fit.util.CasUtil;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.cas.TOP;
+import org.junit.Test;
+
+public class SelectBenchmark {
+ private static final String TYPE_NAME_TOKEN = "org.apache.uima.fit.type.Token";
+ private static final String TYPE_NAME_SENTENCE = "org.apache.uima.fit.type.Sentence";
+
+ @Test
+ public void benchmarkSelect() {
+ CachingRandomJCasProvider casProvider = new CachingRandomJCasProvider();
+
+ Benchmark template = new Benchmark("TEMPLATE")
+ .initialize(casProvider::prepare)
+ .repeat(1_000)
+ .magnitude(10)
+ .magnitudeIncrement(count -> count * 10)
+ .incrementTimes(5);
+
+ new BenchmarkGroup("select")
+ .add(new Benchmark("WARM-UP", template)
+ .measure(() -> casProvider.get().select().forEach(t -> {})))
+ .add(new Benchmark("JCasUtil.selectAll(JCAS).forEach(x -> {})", template)
+ .measure(() -> JCasUtil.selectAll(casProvider.get()).forEach(x -> {})))
+ .add(new Benchmark("JCAS.select().forEach(x -> {})", template)
+ .measure(() -> casProvider.get().select().forEach(x -> {})))
+ .add(new Benchmark("JCasUtil.select(JCAS, TOP.class).forEach(x -> {})", template)
+ .measure(() -> JCasUtil.select(casProvider.get(), TOP.class).forEach(x -> {})))
+ .add(new Benchmark("JCAS.select(TOP.class).forEach(x -> {})", template)
+ .measure(() -> casProvider.get().select(TOP.class).forEach(x -> {})))
+ .add(new Benchmark("JCasUtil.select(JCAS, Token.class).forEach(x -> {})", template)
+ .measure(() -> JCasUtil.select(casProvider.get(), Token.class).forEach(x -> {})))
+ .add(new Benchmark("JCAS.select(Token.class).forEach(x -> {})", template)
+ .measure(() -> casProvider.get().select(Token.class).forEach(x -> {})))
+ .runAll();
+ }
+
+ @Test
+ public void benchmarkSelectOverlapping() {
+ CachingRandomJCasProvider casProvider = new CachingRandomJCasProvider();
+
+ Benchmark template = new Benchmark("TEMPLATE")
+ .initialize(casProvider::prepare)
+ .repeat(25)
+ .magnitude(10)
+ .magnitudeIncrement(count -> count * 10)
+ .incrementTimes(3);
+
+ new BenchmarkGroup("select overlapping")
+ .add(new Benchmark("WARM-UP", template)
+ .measure(() -> casProvider.get().select().forEach(t -> {})))
+ .add(new Benchmark("selectOverlapping(JCAS, Token.class, s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ selectOverlapping(Token.class, s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("CAS.select(Token.class).filter(t -> overlapping(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class)
+ .filter(t -> overlapping(t, s))
+ .forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.getAnnotationIndex(Token.class).stream().filter(t -> overlapping(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().getAnnotationIndex(Token.class).stream()
+ .filter(t -> overlapping(t, s))
+ .forEach(t -> {}));
+ }))
+ .runAll();
+ }
+
+ @Test
+ public void benchmarkSelectCoveredBy() {
+ CachingRandomJCasProvider casProvider = new CachingRandomJCasProvider();
+
+ Benchmark template = new Benchmark("TEMPLATE")
+ .initialize(casProvider::prepare)
+ .repeat(25)
+ .magnitude(10)
+ .magnitudeIncrement(count -> count * 10)
+ .incrementTimes(3);
+
+ new BenchmarkGroup("select covered by")
+ .add(new Benchmark("WARM-UP", template)
+ .measure(() -> casProvider.get().select().forEach(t -> {})))
+ .add(new Benchmark("selectCovered(Token.class, s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ selectCovered(Token.class, s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).coveredBy(s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class).coveredBy(s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.getAnnotationIndex(Token.class).select().coveredBy(s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().getAnnotationIndex(Token.class).select().coveredBy(s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).filter(t -> coveredBy(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class)
+ .filter(t -> coveredBy(t, s))
+ .forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.getAnnotationIndex(Token.class).stream().filter(t -> coveredBy(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().getAnnotationIndex(Token.class).stream()
+ .filter(t -> coveredBy(t, s))
+ .forEach(t -> {}));
+ }))
+ .runAll();
+ }
+
+ @Test
+ public void benchmarkSelectCovering() {
+ CachingRandomJCasProvider casProvider = new CachingRandomJCasProvider();
+
+ Benchmark template = new Benchmark("TEMPLATE")
+ .initialize(casProvider::prepare)
+ .repeat(25)
+ .magnitude(10)
+ .magnitudeIncrement(count -> count * 10)
+ .incrementTimes(3);
+
+ new BenchmarkGroup("select covering")
+ .add(new Benchmark("WARM-UP", template)
+ .measure(() -> casProvider.get().select().forEach(t -> {})))
+ .add(new Benchmark("JCasUtil.selectCovering(Token.class, s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ JCasUtil.selectCovering(Token.class, s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("CasUtil.selectCovering(tokenType, s).forEach(t -> {})", template)
+ .measure(() -> {
+ CAS cas = casProvider.get().getCas();
+ select(cas, getType(cas, TYPE_NAME_SENTENCE)).forEach(s ->
+ CasUtil.selectCovering(getType(cas, TYPE_NAME_TOKEN), s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).covering(s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class).covering(s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("CAS.getAnnotationIndex(getType(cas, TYPE_NAME_TOKEN)).select().covering(s).forEach(t -> {})", template)
+ .measure(() -> {
+ CAS cas = casProvider.get().getCas();
+ select(cas, getType(cas, TYPE_NAME_SENTENCE)).forEach(s ->
+ casProvider.get().getAnnotationIndex(getType(cas, TYPE_NAME_TOKEN)).select().covering(s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.getAnnotationIndex(Token.class).select().covering(s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().getAnnotationIndex(Token.class).select().covering(s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).filter(t -> covering(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class)
+ .filter(t -> covering(t, s))
+ .forEach(t -> {}));
+ }))
+ .add(new Benchmark("CAS.getAnnotationIndex(getType(cas, TYPE_NAME_TOKEN)).stream().filter(t -> covering(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ CAS cas = casProvider.get().getCas();
+ select(cas, getType(cas, TYPE_NAME_SENTENCE)).forEach(s ->
+ casProvider.get().getAnnotationIndex(getType(cas, TYPE_NAME_TOKEN)).stream()
+ .filter(t -> covering(t, s))
+ .forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.getAnnotationIndex(Token.class).stream().filter(t -> covering(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().getAnnotationIndex(Token.class).stream()
+ .filter(t -> covering(t, s))
+ .forEach(t -> {}));
+ }))
+ .runAll();
+ }
+
+ @Test
+ public void benchmarkSelectAt() {
+ CachingRandomJCasProvider casProvider = new CachingRandomJCasProvider();
+
+ Benchmark template = new Benchmark("TEMPLATE")
+ .initialize(casProvider::prepare)
+ .repeat(25)
+ .magnitude(10)
+ .magnitudeIncrement(count -> count * 10)
+ .incrementTimes(3);
+
+ new BenchmarkGroup("select at")
+ .add(new Benchmark("WARM-UP", template)
+ .measure(() -> casProvider.get().select().forEach(t -> {})))
+ .add(new Benchmark("JCasUtil.selectAt(CAS, Token.class, s.getBegin(), s.getEnd()).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ JCasUtil.selectAt(casProvider.get(), Token.class, s.getBegin(), s.getEnd()).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).at(s.getBegin(), s.getEnd()).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class).at(s.getBegin(), s.getEnd()).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).at(s).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class).at(s).forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.select(Token.class).filter(t -> colocated(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().select(Token.class)
+ .filter(t -> colocated(t, s))
+ .forEach(t -> {}));
+ }))
+ .add(new Benchmark("JCAS.getAnnotationIndex(Token.class).stream().filter(t -> colocated(t, s)).forEach(t -> {})", template)
+ .measure(() -> {
+ select(casProvider.get(), Sentence.class).forEach(s ->
+ casProvider.get().getAnnotationIndex(Token.class).stream()
+ .filter(t -> colocated(t, s))
+ .forEach(t -> {}));
+ }))
+ .runAll();
+ }
+}