blob: f548e20287bd7deb4093d2bfb80cf842b8f9efcf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.jmh;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.text.similarity.LongestCommonSubsequence;
import org.apache.commons.text.similarity.SimilarityScore;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
/**
* Performance analysis for LongestCommonSubsequence
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 5, time = 1)
@Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx512M"})
public class LongestCommonSubsequencePerformance {
@State(Scope.Benchmark)
public static class InputData {
final List<Pair<CharSequence, CharSequence>> inputs = new ArrayList<>();
@Setup(Level.Trial)
public void setup() {
final ClassLoader classloader = Thread.currentThread().getContextClassLoader();
try (InputStream is = classloader.getResourceAsStream("lcs-perf-analysis-inputs.csv");
InputStreamReader isr = new InputStreamReader(Objects.requireNonNull(is));
BufferedReader br = new BufferedReader(isr)) {
String line;
while ((line = br.readLine()) != null && !line.trim().isEmpty()) {
line = line.trim();
final int indexOfComma = line.indexOf(',');
final String inputA = line.substring(0, indexOfComma);
final String inputB = line.substring(1 + indexOfComma);
this.inputs.add(ImmutablePair.of(inputA, inputB));
}
} catch (final IOException exception) {
throw new RuntimeException(exception.getMessage(), exception.getCause());
}
}
}
@Benchmark
public void testLCSLenBaseline(final InputData data) {
final BaselineLongestCommonSubsequence lcs = new BaselineLongestCommonSubsequence();
for (final Pair<CharSequence, CharSequence> input : data.inputs) {
lcs.apply(input.getLeft(), input.getRight());
}
}
@Benchmark
public void testLCSBaseline(final InputData data) {
final BaselineLongestCommonSubsequence lcs = new BaselineLongestCommonSubsequence();
for (final Pair<CharSequence, CharSequence> input : data.inputs) {
lcs.longestCommonSubsequence(input.getLeft(), input.getRight());
}
}
@Benchmark
public void testLCSLen(final InputData data) {
final LongestCommonSubsequence lcs = new LongestCommonSubsequence();
for (final Pair<CharSequence, CharSequence> input : data.inputs) {
lcs.apply(input.getLeft(), input.getRight());
}
}
@Benchmark
public void testLCS(final InputData data) {
final LongestCommonSubsequence lcs = new LongestCommonSubsequence();
for (final Pair<CharSequence, CharSequence> input : data.inputs) {
lcs.longestCommonSubsequence(input.getLeft(), input.getRight());
}
}
/**
* Older implementation of LongestCommonSubsequence.
* Code is copied from Apache Commons Text version 1.10.0-SNAPSHOT
*/
private static class BaselineLongestCommonSubsequence implements SimilarityScore<Integer> {
@Override
public Integer apply(final CharSequence left, final CharSequence right) {
if (left == null || right == null) {
throw new IllegalArgumentException("Inputs must not be null");
}
return longestCommonSubsequence(left, right).length();
}
public CharSequence longestCommonSubsequence(final CharSequence left, final CharSequence right) {
if (left == null || right == null) {
throw new IllegalArgumentException("Inputs must not be null");
}
final StringBuilder longestCommonSubstringArray = new StringBuilder(Math.max(left.length(), right.length()));
final int[][] lcsLengthArray = longestCommonSubstringLengthArray(left, right);
int i = left.length() - 1;
int j = right.length() - 1;
int k = lcsLengthArray[left.length()][right.length()] - 1;
while (k >= 0) {
if (left.charAt(i) == right.charAt(j)) {
longestCommonSubstringArray.append(left.charAt(i));
i = i - 1;
j = j - 1;
k = k - 1;
} else if (lcsLengthArray[i + 1][j] < lcsLengthArray[i][j + 1]) {
i = i - 1;
} else {
j = j - 1;
}
}
return longestCommonSubstringArray.reverse().toString();
}
public int[][] longestCommonSubstringLengthArray(final CharSequence left, final CharSequence right) {
final int[][] lcsLengthArray = new int[left.length() + 1][right.length() + 1];
for (int i = 0; i < left.length(); i++) {
for (int j = 0; j < right.length(); j++) {
if (i == 0) {
lcsLengthArray[i][j] = 0;
}
if (j == 0) {
lcsLengthArray[i][j] = 0;
}
if (left.charAt(i) == right.charAt(j)) {
lcsLengthArray[i + 1][j + 1] = lcsLengthArray[i][j] + 1;
} else {
lcsLengthArray[i + 1][j + 1] = Math.max(lcsLengthArray[i + 1][j], lcsLengthArray[i][j + 1]);
}
}
}
return lcsLengthArray;
}
}
}