| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.text.diff; |
| |
| /** |
| * <p> |
| * It is guaranteed that the comparisons will always be done as |
| * {@code o1.equals(o2)} where {@code o1} belongs to the first |
| * sequence and {@code o2} belongs to the second sequence. This can |
| * be important if subclassing is used for some elements in the first |
| * sequence and the {@code equals} method is specialized. |
| * </p> |
| * <p> |
| * Comparison can be seen from two points of view: either as giving the smallest |
| * modification allowing to transform the first sequence into the second one, or |
| * as giving the longest sequence which is a subsequence of both initial |
| * sequences. The {@code equals} method is used to compare objects, so any |
| * object can be put into sequences. Modifications include deleting, inserting |
| * or keeping one object, starting from the beginning of the first sequence. |
| * </p> |
| * <p> |
| * This class implements the comparison algorithm, which is the very efficient |
| * algorithm from Eugene W. Myers |
| * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps"> |
| * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces |
| * the shortest possible {@link EditScript edit script} containing all the |
| * {@link EditCommand commands} needed to transform the first sequence into |
| * the second one. |
| * |
| * <p> |
| * This code has been adapted from Apache Commons Collections 4.0. |
| * </p> |
| * |
| * @see EditScript |
| * @see EditCommand |
| * @see CommandVisitor |
| * @since 1.0 |
| */ |
| public class StringsComparator { |
| |
| /** |
| * First character sequence. |
| */ |
| private final String left; |
| /** |
| * Second character sequence. |
| */ |
| private final String right; |
| /** |
| * Temporary array. |
| */ |
| private final int[] vDown; |
| /** |
| * Temporary array. |
| */ |
| private final int[] vUp; |
| |
| /** |
| * Simple constructor. |
| * <p> |
| * Creates a new instance of StringsComparator. |
| * </p> |
| * <p> |
| * It is <em>guaranteed</em> that the comparisons will always be done as |
| * {@code o1.equals(o2)} where {@code o1} belongs to the first |
| * sequence and {@code o2} belongs to the second sequence. This can be |
| * important if subclassing is used for some elements in the first sequence |
| * and the {@code equals} method is specialized. |
| * </p> |
| * |
| * @param left first character sequence to be compared |
| * @param right second character sequence to be compared |
| */ |
| public StringsComparator(final String left, final String right) { |
| this.left = left; |
| this.right = right; |
| |
| final int size = left.length() + right.length() + 2; |
| vDown = new int[size]; |
| vUp = new int[size]; |
| } |
| |
| /** |
| * Get the {@link EditScript} object. |
| * <p> |
| * It is guaranteed that the objects embedded in the {@link InsertCommand |
| * insert commands} come from the second sequence and that the objects |
| * embedded in either the {@link DeleteCommand delete commands} or |
| * {@link KeepCommand keep commands} come from the first sequence. This can |
| * be important if subclassing is used for some elements in the first |
| * sequence and the {@code equals} method is specialized. |
| * </p> |
| * |
| * @return The edit script resulting from the comparison of the two |
| * sequences |
| */ |
| public EditScript<Character> getScript() { |
| final EditScript<Character> script = new EditScript<>(); |
| buildScript(0, left.length(), 0, right.length(), script); |
| return script; |
| } |
| |
| /** |
| * Build an edit script. |
| * |
| * @param start1 the begin of the first sequence to be compared |
| * @param end1 the end of the first sequence to be compared |
| * @param start2 the begin of the second sequence to be compared |
| * @param end2 the end of the second sequence to be compared |
| * @param script the edited script |
| */ |
| private void buildScript(final int start1, final int end1, final int start2, final int end2, |
| final EditScript<Character> script) { |
| final Snake middle = getMiddleSnake(start1, end1, start2, end2); |
| |
| if (middle == null |
| || middle.getStart() == end1 && middle.getDiag() == end1 - end2 |
| || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) { |
| |
| int i = start1; |
| int j = start2; |
| while (i < end1 || j < end2) { |
| if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) { |
| script.append(new KeepCommand<>(left.charAt(i))); |
| ++i; |
| ++j; |
| } else { |
| if (end1 - start1 > end2 - start2) { |
| script.append(new DeleteCommand<>(left.charAt(i))); |
| ++i; |
| } else { |
| script.append(new InsertCommand<>(right.charAt(j))); |
| ++j; |
| } |
| } |
| } |
| |
| } else { |
| |
| buildScript(start1, middle.getStart(), |
| start2, middle.getStart() - middle.getDiag(), |
| script); |
| for (int i = middle.getStart(); i < middle.getEnd(); ++i) { |
| script.append(new KeepCommand<>(left.charAt(i))); |
| } |
| buildScript(middle.getEnd(), end1, |
| middle.getEnd() - middle.getDiag(), end2, |
| script); |
| } |
| } |
| |
| /** |
| * Get the middle snake corresponding to two subsequences of the |
| * main sequences. |
| * <p> |
| * The snake is found using the MYERS Algorithm (this algorithms has |
| * also been implemented in the GNU diff program). This algorithm is |
| * explained in Eugene Myers article: |
| * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps"> |
| * An O(ND) Difference Algorithm and Its Variations</a>. |
| * </p> |
| * |
| * @param start1 the begin of the first sequence to be compared |
| * @param end1 the end of the first sequence to be compared |
| * @param start2 the begin of the second sequence to be compared |
| * @param end2 the end of the second sequence to be compared |
| * @return The middle snake |
| */ |
| private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) { |
| // Myers Algorithm |
| // Initialisations |
| final int m = end1 - start1; |
| final int n = end2 - start2; |
| if (m == 0 || n == 0) { |
| return null; |
| } |
| |
| final int delta = m - n; |
| final int sum = n + m; |
| final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2; |
| vDown[1 + offset] = start1; |
| vUp[1 + offset] = end1 + 1; |
| |
| for (int d = 0; d <= offset; ++d) { |
| // Down |
| for (int k = -d; k <= d; k += 2) { |
| // First step |
| |
| final int i = k + offset; |
| if (k == -d || k != d && vDown[i - 1] < vDown[i + 1]) { |
| vDown[i] = vDown[i + 1]; |
| } else { |
| vDown[i] = vDown[i - 1] + 1; |
| } |
| |
| int x = vDown[i]; |
| int y = x - start1 + start2 - k; |
| |
| while (x < end1 && y < end2 && left.charAt(x) == right.charAt(y)) { |
| vDown[i] = ++x; |
| ++y; |
| } |
| // Second step |
| if (delta % 2 != 0 && delta - d <= k && k <= delta + d) { |
| if (vUp[i - delta] <= vDown[i]) { // NOPMD |
| return buildSnake(vUp[i - delta], k + start1 - start2, end1, end2); |
| } |
| } |
| } |
| |
| // Up |
| for (int k = delta - d; k <= delta + d; k += 2) { |
| // First step |
| final int i = k + offset - delta; |
| if (k == delta - d |
| || k != delta + d && vUp[i + 1] <= vUp[i - 1]) { |
| vUp[i] = vUp[i + 1] - 1; |
| } else { |
| vUp[i] = vUp[i - 1]; |
| } |
| |
| int x = vUp[i] - 1; |
| int y = x - start1 + start2 - k; |
| while (x >= start1 && y >= start2 |
| && left.charAt(x) == right.charAt(y)) { |
| vUp[i] = x--; |
| y--; |
| } |
| // Second step |
| if (delta % 2 == 0 && -d <= k && k <= d) { |
| if (vUp[i] <= vDown[i + delta]) { // NOPMD |
| return buildSnake(vUp[i], k + start1 - start2, end1, end2); |
| } |
| } |
| } |
| } |
| |
| // this should not happen |
| throw new RuntimeException("Internal Error"); |
| } |
| |
| /** |
| * Build a snake. |
| * |
| * @param start the value of the start of the snake |
| * @param diag the value of the diagonal of the snake |
| * @param end1 the value of the end of the first sequence to be compared |
| * @param end2 the value of the end of the second sequence to be compared |
| * @return The snake built |
| */ |
| private Snake buildSnake(final int start, final int diag, final int end1, final int end2) { |
| int end = start; |
| while (end - diag < end2 |
| && end < end1 |
| && left.charAt(end) == right.charAt(end - diag)) { |
| ++end; |
| } |
| return new Snake(start, end, diag); |
| } |
| |
| /** |
| * This class is a simple placeholder to hold the end part of a path |
| * under construction in a {@link StringsComparator StringsComparator}. |
| */ |
| private static class Snake { |
| |
| /** Start index. */ |
| private final int start; |
| |
| /** End index. */ |
| private final int end; |
| |
| /** Diagonal number. */ |
| private final int diag; |
| |
| /** |
| * Simple constructor. Creates a new instance of Snake with specified indices. |
| * |
| * @param start start index of the snake |
| * @param end end index of the snake |
| * @param diag diagonal number |
| */ |
| Snake(final int start, final int end, final int diag) { |
| this.start = start; |
| this.end = end; |
| this.diag = diag; |
| } |
| |
| /** |
| * Get the start index of the snake. |
| * |
| * @return start index of the snake |
| */ |
| public int getStart() { |
| return start; |
| } |
| |
| /** |
| * Get the end index of the snake. |
| * |
| * @return end index of the snake |
| */ |
| public int getEnd() { |
| return end; |
| } |
| |
| /** |
| * Get the diagonal number of the snake. |
| * |
| * @return diagonal number of the snake |
| */ |
| public int getDiag() { |
| return diag; |
| } |
| } |
| |
| } |