wicket-examples/src/test/wicket/examples/displaytag/list/Diff.java - wicket - Git at Google

 /*
  * $Log$
  * Revision 1.1  2005/03/25 16:28:47  eelco12
  * moved packages from examples/contrib to wicket-stuff project
  *
  * Revision 1.2  2005/02/28 22:06:07  eelco12
  * System.err.println & printStackTrace -> log.error
  *
  * Revision 1.1  2005/02/28 21:38:56  eelco12
  * issue 1145270
  * moved classes around a bit to make clear that they are used for certain tests only.
  *
  * Revision 1.1  2005/02/21 08:58:30  dashorst
  * Preparing for release 0.9.18-beta
  *
  * Revision 1.1  2005/01/26 09:27:54  eelco12
  * refactoring:
  * package rename table -> list
  * rename *Table* -> (Pageable)ListView
  *
  * Revision 1.2  2004/12/28 12:27:18  eelco12
  * javadoc fixes (to stop Eclipse from complaining)
  *
  * Revision 1.1  2004/12/19 17:19:41  eelco12
  * package rename
  *
  * Revision 1.1  2004/11/29 22:10:27  jdonnerstag
  * replaced Table with ListView
  *
  * Revision 1.6  2003/03/06 22:51:32  stuart
  * Convert to CVS
  *
  * Revision 1.5  2002/07/19  19:14:40  stuart
  * fix reverseScript, make change ctor public, update docs
  *
  * Revision 1.4  2002/04/09  17:53:39  stuart
  * More flexible interface for diff() function.
  *
  * Revision 1.3  2000/03/03 21:58:03  stuart
  * move discard_confusing_lines and shift_boundaries to class file_data
  *
  * Revision 1.2  2000/03/02  16:37:38  stuart
  * Add GPL and copyright
  *
  */
 package wicket.examples.displaytag.list;

 import java.util.Hashtable;

 /**
  * A class to compare vectors of objects. The result of comparison is a list of
  * <code>change</code> objects which form an edit script. The objects compared are
  * traditionally lines of text from two files. Comparison options such as "ignore
  * whitespace" are implemented by modifying the <code>equals</code> and
  * <code>hashcode</code> methods for the objects compared.
  * <p>
  * The basic algorithm is described in:</br> "An O(ND) Difference Algorithm and its
  * Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, p 251.
  * <p>
  * This class outputs different results from GNU diff 1.15 on some inputs. Our results are
  * actually better (smaller change list, smaller total size of changes), but it would be
  * nice to know why. Perhaps there is a memory overwrite bug in GNU diff 1.15.
  * @author Stuart D. Gathman, translated from GNU diff 1.15 Copyright (C) 2000 Business
  *         Management Systems, Inc.
  *         <p>
  *         This program is free software; you can redistribute it and/or modify it under
  *         the terms of the GNU General Public License as published by the Free Software
  *         Foundation; either version 1, or (at your option) any later version.
  *         <p>
  *         This program is distributed in the hope that it will be useful, but WITHOUT ANY
  *         WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
  *         PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *         <p>
  *         You should have received a copy of the <a href=COPYING.txt> GNU General Public
  *         License </a> along with this program; if not, write to the Free Software
  *         Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */

 class Diff
 {

 	/**
 	 * Prepare to find differences between two arrays. Each element of the arrays is
 	 * translated to an "equivalence number" based on the result of <code>equals</code>.
 	 * The original Object arrays are no longer needed for computing the differences. They
 	 * will be needed again later to print the results of the comparison as an edit script,
 	 * if desired.
 	 * @param a
 	 * @param b
 	 */
 	public Diff(Object[] a, Object[] b)
 	{
 		Hashtable h = new Hashtable(a.length + b.length);
 		filevec[0] = new file_data(a, h);
 		filevec[1] = new file_data(b, h);
 	}

 	/**
 	 * 1 more than the maximum equivalence value used for this or its sibling file.
 	 */
 	private int equiv_max = 1;

 	/**
 	 * When set to true, the comparison uses a heuristic to speed it up. With this
 	 * heuristic, for files with a constant small density of changes, the algorithm is
 	 * linear in the file size.
 	 */
 	public boolean heuristic = false;

 	/**
 	 * When set to true, the algorithm returns a guarranteed minimal set of changes. This
 	 * makes things slower, sometimes much slower.
 	 */
 	public boolean no_discards = false;

 	private int[] xvec, yvec; /* Vectors being compared. */

 	private int[] fdiag; /*
 								 * Vector, indexed by diagonal, containing the X coordinate of the
 								 * point furthest along the given diagonal in the forward search
 								 * of the edit matrix.
 								 */

 	private int[] bdiag; /*
 								 * Vector, indexed by diagonal, containing the X coordinate of the
 								 * point furthest along the given diagonal in the backward search
 								 * of the edit matrix.
 								 */

 	private int fdiagoff, bdiagoff;

 	private final file_data[] filevec = new file_data[2];

 	private int cost;

 	/**
 	 * Find the midpoint of the shortest edit script for a specified portion of the two
 	 * files. We scan from the beginnings of the files, and simultaneously from the ends,
 	 * doing a breadth-first search through the space of edit-sequence. When the two
 	 * searches meet, we have found the midpoint of the shortest edit sequence. The value
 	 * returned is the number of the diagonal on which the midpoint lies. The diagonal
 	 * number equals the number of inserted lines minus the number of deleted lines
 	 * (counting only lines before the midpoint). The edit cost is stored into COST; this
 	 * is the total number of lines inserted or deleted (counting only lines before the
 	 * midpoint). This function assumes that the first lines of the specified portions of
 	 * the two files do not match, and likewise that the last lines do not match. The
 	 * caller must trim matching lines from the beginning and end of the portions it is
 	 * going to specify. Note that if we return the "wrong" diagonal value, or if the value
 	 * of bdiag at that diagonal is "wrong", the worst this can do is cause suboptimal diff
 	 * output. It cannot cause incorrect diff output.
 	 * @param xoff x offset
 	 * @param xlim x limit
 	 * @param yoff y offset
 	 * @param ylim y limit
 	 * @return the midpoint of the shortest edit script for a specified portion of the two
 	 *         files
 	 */

 	private int diag(int xoff, int xlim, int yoff, int ylim)
 	{
 		final int[] fd = fdiag; // Give the compiler a chance.
 		final int[] bd = bdiag; // Additional help for the compiler.
 		final int[] xv = xvec; // Still more help for the compiler.
 		final int[] yv = yvec; // And more and more . . .
 		final int dmin = xoff - ylim; // Minimum valid diagonal.
 		final int dmax = xlim - yoff; // Maximum valid diagonal.
 		final int fmid = xoff - yoff; // Center diagonal of top-down search.
 		final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
 		int fmin = fmid, fmax = fmid; // Limits of top-down search.
 		int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
 		/*
 		 * True if southeast corner is on an odd diagonal with respect to the northwest.
 		 */
 		final boolean odd = (fmid - bmid & 1) != 0;

 		fd[fdiagoff + fmid] = xoff;
 		bd[bdiagoff + bmid] = xlim;

 		for (int c = 1;; ++c)
 		{
 			int d; /* Active diagonal. */
 			boolean big_snake = false;

 			/* Extend the top-down search by an edit step in each diagonal. */
 			if (fmin > dmin)
 				fd[fdiagoff + --fmin - 1] = -1;
 			else
 				++fmin;
 			if (fmax < dmax)
 				fd[fdiagoff + ++fmax + 1] = -1;
 			else
 				--fmax;
 			for (d = fmax; d >= fmin; d -= 2)
 			{
 				int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];

 				if (tlo >= thi)
 					x = tlo + 1;
 				else
 					x = thi;
 				oldx = x;
 				y = x - d;
 				while (x < xlim && y < ylim && xv[x] == yv[y])
 				{
 					++x;
 					++y;
 				}
 				if (x - oldx > 20)
 					big_snake = true;
 				fd[fdiagoff + d] = x;
 				if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
 				{
 					cost = 2 * c - 1;
 					return d;
 				}
 			}

 			/* Similar extend the bottom-up search. */
 			if (bmin > dmin)
 				bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
 			else
 				++bmin;
 			if (bmax < dmax)
 				bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
 			else
 				--bmax;
 			for (d = bmax; d >= bmin; d -= 2)
 			{
 				int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];

 				if (tlo < thi)
 					x = tlo;
 				else
 					x = thi - 1;
 				oldx = x;
 				y = x - d;
 				while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
 				{
 					--x;
 					--y;
 				}
 				if (oldx - x > 20)
 					big_snake = true;
 				bd[bdiagoff + d] = x;
 				if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
 				{
 					cost = 2 * c;
 					return d;
 				}
 			}

 			/*
 			 * Heuristic: check occasionally for a diagonal that has made lots of progress
 			 * compared with the edit distance. If we have any such, find the one that has
 			 * made the most progress and return it as if it had succeeded. With this
 			 * heuristic, for files with a constant small density of changes, the algorithm
 			 * is linear in the file size.
 			 */

 			if (c > 200 && big_snake && heuristic)
 			{
 				int best = 0;
 				int bestpos = -1;

 				for (d = fmax; d >= fmin; d -= 2)
 				{
 					int dd = d - fmid;
 					if ((fd[fdiagoff + d] - xoff) * 2 - dd > 12 * (c + (dd > 0 ? dd : -dd)))
 					{
 						if (fd[fdiagoff + d] * 2 - dd > best && fd[fdiagoff + d] - xoff > 20
 								&& fd[fdiagoff + d] - d - yoff > 20)
 						{
 							int k;
 							int x = fd[fdiagoff + d];

 							/*
 							 * We have a good enough best diagonal; now insist that it end with a
 							 * significant snake.
 							 */
 							for (k = 1; k <= 20; k++)
 								if (xvec[x - k] != yvec[x - d - k])
 									break;

 							if (k == 21)
 							{
 								best = fd[fdiagoff + d] * 2 - dd;
 								bestpos = d;
 							}
 						}
 					}
 				}
 				if (best > 0)
 				{
 					cost = 2 * c - 1;
 					return bestpos;
 				}

 				best = 0;
 				for (d = bmax; d >= bmin; d -= 2)
 				{
 					int dd = d - bmid;
 					if ((xlim - bd[bdiagoff + d]) * 2 + dd > 12 * (c + (dd > 0 ? dd : -dd)))
 					{
 						if ((xlim - bd[bdiagoff + d]) * 2 + dd > best && xlim - bd[bdiagoff + d] > 20
 								&& ylim - (bd[bdiagoff + d] - d) > 20)
 						{
 							/*
 							 * We have a good enough best diagonal; now insist that it end with a
 							 * significant snake.
 							 */
 							int k;
 							int x = bd[bdiagoff + d];

 							for (k = 0; k < 20; k++)
 								if (xvec[x + k] != yvec[x - d + k])
 									break;
 							if (k == 20)
 							{
 								best = (xlim - bd[bdiagoff + d]) * 2 + dd;
 								bestpos = d;
 							}
 						}
 					}
 				}
 				if (best > 0)
 				{
 					cost = 2 * c - 1;
 					return bestpos;
 				}
 			}
 		}
 	}

 	/**
 	 * Compare in detail contiguous subsequences of the two files which are known, as a
 	 * whole, to match each other. The results are recorded in the vectors
 	 * filevec[N].changed_flag, by storing a 1 in the element for each line that is an
 	 * insertion or deletion. The subsequence of file 0 is [XOFF, XLIM) and likewise for
 	 * file 1. Note that XLIM, YLIM are exclusive bounds. All line numbers are origin-0 and
 	 * discarded lines are not counted.
 	 * @param xoff
 	 * @param xlim
 	 * @param yoff
 	 * @param ylim
 	 */

 	private void compareseq(int xoff, int xlim, int yoff, int ylim)
 	{
 		/* Slide down the bottom initial diagonal. */
 		while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff])
 		{
 			++xoff;
 			++yoff;
 		}
 		/* Slide up the top initial diagonal. */
 		while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1])
 		{
 			--xlim;
 			--ylim;
 		}

 		/* Handle simple cases. */
 		if (xoff == xlim)
 			while (yoff < ylim)
 				filevec[1].changed_flag[1 + filevec[1].realindexes[yoff++]] = true;
 		else if (yoff == ylim)
 			while (xoff < xlim)
 				filevec[0].changed_flag[1 + filevec[0].realindexes[xoff++]] = true;
 		else
 		{
 			/* Find a point of correspondence in the middle of the files. */

 			int d = diag(xoff, xlim, yoff, ylim);
 			int c = cost;
 			int f = fdiag[fdiagoff + d];
 			int b = bdiag[bdiagoff + d];

 			if (c == 1)
 			{
 				/*
 				 * This should be impossible, because it implies that one of the two
 				 * subsequences is empty, and that case was handled above without calling
 				 * `diag'. Let's verify that this is true.
 				 */
 				throw new IllegalArgumentException("Empty subsequence");
 			}
 			else
 			{
 				/* Use that point to split this problem into two subproblems. */
 				compareseq(xoff, b, yoff, b - d);
 				/*
 				 * This used to use f instead of b, but that is incorrect! It is not
 				 * necessarily the case that diagonal d has a snake from b to f.
 				 */
 				compareseq(b, xlim, b - d, ylim);
 			}
 		}
 	}

 	/**
 	 * Discard lines from one file that have no matches in the other file.
 	 */

 	private void discard_confusing_lines()
 	{
 		filevec[0].discard_confusing_lines(filevec[1]);
 		filevec[1].discard_confusing_lines(filevec[0]);
 	}

 	private boolean inhibit = false;

 	/**
 	 * Adjust inserts/deletes of blank lines to join changes as much as possible.
 	 */

 	private void shift_boundaries()
 	{
 		if (inhibit)
 			return;
 		filevec[0].shift_boundaries(filevec[1]);
 		filevec[1].shift_boundaries(filevec[0]);
 	}

 	/**
 	 *
 	 */
 	public interface ScriptBuilder
 	{
 		/**
 		 * Scan the tables of which lines are inserted and deleted, producing an edit
 		 * script.
 		 * @param changed0 true for lines in first file which do not match 2nd
 		 * @param len0 number of lines in first file
 		 * @param changed1 true for lines in 2nd file which do not match 1st
 		 * @param len1 number of lines in 2nd file
 		 * @return a linked list of changes - or null
 		 */
 		public change build_script(boolean[] changed0, int len0, boolean[] changed1, int len1);
 	}

 	/**
 	 * Scan the tables of which lines are inserted and deleted, producing an edit script in
 	 * reverse order.
 	 */

 	static class ReverseScript implements ScriptBuilder
 	{
 		/**
 		 * @see wicket.examples.displaytag.list.Diff.ScriptBuilder#build_script(boolean[], int,
 		 *      boolean[], int)
 		 */
 		public change build_script(final boolean[] changed0, int len0, final boolean[] changed1,
 				int len1)
 		{
 			change script = null;
 			int i0 = 0, i1 = 0;
 			while (i0 < len0 || i1 < len1)
 			{
 				if (changed0[1 + i0] || changed1[1 + i1])
 				{
 					int line0 = i0, line1 = i1;

 					/* Find # lines changed here in each file. */
 					while (changed0[1 + i0])
 						++i0;
 					while (changed1[1 + i1])
 						++i1;

 					/* Record this change. */
 					script = new change(line0, line1, i0 - line0, i1 - line1, script);
 				}

 				/* We have reached lines in the two files that match each other. */
 				i0++;
 				i1++;
 			}

 			return script;
 		}
 	}

 	static class ForwardScript implements ScriptBuilder
 	{
 		/**
 		 * Scan the tables of which lines are inserted and deleted, producing an edit script
 		 * in forward order.
 		 * @param changed0
 		 * @param len0
 		 * @param changed1
 		 * @param len1
 		 * @return change
 		 */
 		public change build_script(final boolean[] changed0, int len0, final boolean[] changed1,
 				int len1)
 		{
 			change script = null;
 			int i0 = len0, i1 = len1;

 			while (i0 >= 0 || i1 >= 0)
 			{
 				if (changed0[i0] || changed1[i1])
 				{
 					int line0 = i0, line1 = i1;

 					/* Find # lines changed here in each file. */
 					while (changed0[i0])
 						--i0;
 					while (changed1[i1])
 						--i1;

 					/* Record this change. */
 					script = new change(i0, i1, line0 - i0, line1 - i1, script);
 				}

 				/* We have reached lines in the two files that match each other. */
 				i0--;
 				i1--;
 			}

 			return script;
 		}
 	}

 	/** forward builder. */
 	public final static ScriptBuilder forwardScript = new ForwardScript();

 	/** reverse builder. */
 	public final static ScriptBuilder reverseScript = new ReverseScript();

 	/**
 	 * Report the differences of two files. DEPTH is the current directory depth.
 	 * @param reverse
 	 * @return change
 	 */
 	public final change diff_2(final boolean reverse)
 	{
 		return diff(reverse ? reverseScript : forwardScript);
 	}

 	/**
 	 * Get the results of comparison as an edit script. The script is described by a list
 	 * of changes. The standard ScriptBuilder implementations provide for forward and
 	 * reverse edit scripts. Alternate implementations could, for instance, list common
 	 * elements instead of differences.
 	 * @param bld an object to build the script from change flags
 	 * @return the head of a list of changes
 	 */
 	public change diff(final ScriptBuilder bld)
 	{

 		/*
 		 * Some lines are obviously insertions or deletions because they don't match
 		 * anything. Detect them now, and avoid even thinking about them in the main
 		 * comparison algorithm.
 		 */

 		discard_confusing_lines();

 		/*
 		 * Now do the main comparison algorithm, considering just the undiscarded lines.
 		 */

 		xvec = filevec[0].undiscarded;
 		yvec = filevec[1].undiscarded;

 		int diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
 		fdiag = new int[diags];
 		fdiagoff = filevec[1].nondiscarded_lines + 1;
 		bdiag = new int[diags];
 		bdiagoff = filevec[1].nondiscarded_lines + 1;

 		compareseq(0, filevec[0].nondiscarded_lines, 0, filevec[1].nondiscarded_lines);
 		fdiag = null;
 		bdiag = null;

 		/*
 		 * Modify the results slightly to make them prettier in cases where that can validly
 		 * be done.
 		 */

 		shift_boundaries();

 		/*
 		 * Get the results of comparison in the form of a chain of `struct change's -- an
 		 * edit script.
 		 */
 		return bld.build_script(filevec[0].changed_flag, filevec[0].buffered_lines,
 				filevec[1].changed_flag, filevec[1].buffered_lines);

 	}

 	/**
 	 * The result of comparison is an "edit script": a chain of change objects. Each change
 	 * represents one place where some lines are deleted and some are inserted. LINE0 and
 	 * LINE1 are the first affected lines in the two files (origin 0). DELETED is the
 	 * number of lines deleted here from file 0. INSERTED is the number of lines inserted
 	 * here in file 1. If DELETED is 0 then LINE0 is the number of the line before which
 	 * the insertion was done; vice versa for INSERTED and LINE1.
 	 */

 	public static class change
 	{
 		/** Previous or next edit command. */
 		public change link;

 		/** # lines of file 1 changed here. */
 		public final int inserted;

 		/** # lines of file 0 changed here. */
 		public final int deleted;

 		/** Line number of 1st deleted line. */
 		public final int line0;

 		/** Line number of 1st inserted line. */
 		public final int line1;

 		/**
 		 * Cons an additional entry onto the front of an edit script OLD. LINE0 and LINE1
 		 * are the first affected lines in the two files (origin 0). DELETED is the number
 		 * of lines deleted here from file 0. INSERTED is the number of lines inserted here
 		 * in file 1. If DELETED is 0 then LINE0 is the number of the line before which the
 		 * insertion was done; vice versa for INSERTED and LINE1.
 		 * @param line0
 		 * @param line1
 		 * @param deleted
 		 * @param inserted
 		 * @param old
 		 */
 		public change(int line0, int line1, int deleted, int inserted, change old)
 		{
 			this.line0 = line0;
 			this.line1 = line1;
 			this.inserted = inserted;
 			this.deleted = deleted;
 			this.link = old;
 		}
 	}

 	/**
 	 * Data on one input file being compared.
 	 */

 	class file_data
 	{

 		/** Allocate changed array for the results of comparison. */
 		void clear()
 		{
 			/*
 			 * Allocate a flag for each line of each file, saying whether that line is an
 			 * insertion or deletion. Allocate an extra element, always zero, at each end of
 			 * each vector.
 			 */
 			changed_flag = new boolean[buffered_lines + 2];
 		}

 		/**
 		 * Return equiv_count[I] as the number of lines in this file that fall in
 		 * equivalence class I.
 		 * @return the array of equivalence class counts.
 		 */
 		int[] equivCount()
 		{
 			int[] equiv_count = new int[equiv_max];
 			for (int i = 0; i < buffered_lines; ++i)
 				++equiv_count[equivs[i]];
 			return equiv_count;
 		}

 		/**
 		 * Discard lines that have no matches in another file. A line which is discarded
 		 * will not be considered by the actual comparison algorithm; it will be as if that
 		 * line were not in the file. The file's `realindexes' table maps virtual line
 		 * numbers (which don't count the discarded lines) into real line numbers; this is
 		 * how the actual comparison algorithm produces results that are comprehensible when
 		 * the discarded lines are counted.
 		 * <p>
 		 * When we discard a line, we also mark it as a deletion or insertion so that it
 		 * will be printed in the output.
 		 * @param f the other file
 		 */
 		void discard_confusing_lines(file_data f)
 		{
 			clear();
 			/* Set up table of which lines are going to be discarded. */
 			final byte[] discarded = discardable(f.equivCount());

 			/*
 			 * Don't really discard the provisional lines except when they occur in a run of
 			 * discardables, with nonprovisionals at the beginning and end.
 			 */
 			filterDiscards(discarded);

 			/* Actually discard the lines. */
 			discard(discarded);
 		}

 		/**
 		 * Mark to be discarded each line that matches no line of another file. If a line
 		 * matches many lines, mark it as provisionally discardable.
 		 * @param counts The count of each equivalence number for the other file.
 		 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each
 		 *         line
 		 */

 		private byte[] discardable(final int[] counts)
 		{
 			final int end = buffered_lines;
 			final byte[] discards = new byte[end];
 			final int[] equivs = this.equivs;
 			int many = 5;
 			int tem = end / 64;

 			/*
 			 * Multiply MANY by approximate square root of number of lines. That is the
 			 * threshold for provisionally discardable lines.
 			 */
 			while ((tem = tem >> 2) > 0)
 				many *= 2;

 			for (int i = 0; i < end; i++)
 			{
 				int nmatch;
 				if (equivs[i] == 0)
 					continue;
 				nmatch = counts[equivs[i]];
 				if (nmatch == 0)
 					discards[i] = 1;
 				else if (nmatch > many)
 					discards[i] = 2;
 			}
 			return discards;
 		}

 		/**
 		 * Don't really discard the provisional lines except when they occur in a run of
 		 * discardables, with nonprovisionals at the beginning and end.
 		 * @param discards to discard
 		 */
 		private void filterDiscards(final byte[] discards)
 		{
 			final int end = buffered_lines;

 			for (int i = 0; i < end; i++)
 			{
 				/* Cancel provisional discards not in middle of run of discards. */
 				if (discards[i] == 2)
 					discards[i] = 0;
 				else if (discards[i] != 0)
 				{
 					/* We have found a nonprovisional discard. */
 					int j;
 					int length;
 					int provisional = 0;

 					/*
 					 * Find end of this run of discardable lines. Count how many are
 					 * provisionally discardable.
 					 */
 					for (j = i; j < end; j++)
 					{
 						if (discards[j] == 0)
 							break;
 						if (discards[j] == 2)
 							++provisional;
 					}

 					/* Cancel provisional discards at end, and shrink the run. */
 					while (j > i && discards[j - 1] == 2)
 					{
 						discards[--j] = 0;
 						--provisional;
 					}

 					/*
 					 * Now we have the length of a run of discardable lines whose first and
 					 * last are not provisional.
 					 */
 					length = j - i;

 					/*
 					 * If 1/4 of the lines in the run are provisional, cancel discarding of all
 					 * provisional lines in the run.
 					 */
 					if (provisional * 4 > length)
 					{
 						while (j > i)
 							if (discards[--j] == 2)
 								discards[j] = 0;
 					}
 					else
 					{
 						int consec;
 						int minimum = 1;
 						int tem = length / 4;

 						/*
 						 * MINIMUM is approximate square root of LENGTH/4. A subrun of two or
 						 * more provisionals can stand when LENGTH is at least 16. A subrun of 4
 						 * or more can stand when LENGTH >= 64.
 						 */
 						while ((tem = tem >> 2) > 0)
 							minimum *= 2;
 						minimum++;

 						/*
 						 * Cancel any subrun of MINIMUM or more provisionals within the larger
 						 * run.
 						 */
 						for (j = 0, consec = 0; j < length; j++)
 							if (discards[i + j] != 2)
 								consec = 0;
 							else if (minimum == ++consec)
 								/* Back up to start of subrun, to cancel it all. */
 								j -= consec;
 							else if (minimum < consec)
 								discards[i + j] = 0;

 						/*
 						 * Scan from beginning of run until we find 3 or more nonprovisionals in
 						 * a row or until the first nonprovisional at least 8 lines in. Until
 						 * that point, cancel any provisionals.
 						 */
 						for (j = 0, consec = 0; j < length; j++)
 						{
 							if (j >= 8 && discards[i + j] == 1)
 								break;
 							if (discards[i + j] == 2)
 							{
 								consec = 0;
 								discards[i + j] = 0;
 							}
 							else if (discards[i + j] == 0)
 								consec = 0;
 							else
 								consec++;
 							if (consec == 3)
 								break;
 						}

 						/* I advances to the last line of the run. */
 						i += length - 1;

 						/* Same thing, from end. */
 						for (j = 0, consec = 0; j < length; j++)
 						{
 							if (j >= 8 && discards[i - j] == 1)
 								break;
 							if (discards[i - j] == 2)
 							{
 								consec = 0;
 								discards[i - j] = 0;
 							}
 							else if (discards[i - j] == 0)
 								consec = 0;
 							else
 								consec++;
 							if (consec == 3)
 								break;
 						}
 					}
 				}
 			}
 		}

 		/**
 		 * Actually discard the lines.
 		 * @param discards flags lines to be discarded
 		 */
 		private void discard(final byte[] discards)
 		{
 			final int end = buffered_lines;
 			int j = 0;
 			for (int i = 0; i < end; ++i)
 				if (no_discards || discards[i] == 0)
 				{
 					undiscarded[j] = equivs[i];
 					realindexes[j++] = i;
 				}
 				else
 					changed_flag[1 + i] = true;
 			nondiscarded_lines = j;
 		}

 		file_data(Object[] data, Hashtable h)
 		{
 			buffered_lines = data.length;

 			equivs = new int[buffered_lines];
 			undiscarded = new int[buffered_lines];
 			realindexes = new int[buffered_lines];

 			for (int i = 0; i < data.length; ++i)
 			{
 				Integer ir = (Integer)h.get(data[i]);
 				if (ir == null)
 					h.put(data[i], new Integer(equivs[i] = equiv_max++));
 				else
 					equivs[i] = ir.intValue();
 			}
 		}

 		/**
 		 * Adjust inserts/deletes of blank lines to join changes as much as possible. We do
 		 * something when a run of changed lines include a blank line at one end and have an
 		 * excluded blank line at the other. We are free to choose which blank line is
 		 * included. `compareseq' always chooses the one at the beginning, but usually it is
 		 * cleaner to consider the following blank line to be the "change". The only
 		 * exception is if the preceding blank line would join this change to other changes.
 		 * @param f the file being compared against
 		 */

 		void shift_boundaries(file_data f)
 		{
 			final boolean[] changed = changed_flag;
 			final boolean[] other_changed = f.changed_flag;
 			int i = 0;
 			int j = 0;
 			int i_end = buffered_lines;
 			int preceding = -1;
 			int other_preceding = -1;

 			for (;;)
 			{
 				int start, end, other_start;

 				/*
 				 * Scan forwards to find beginning of another run of changes. Also keep track
 				 * of the corresponding point in the other file.
 				 */

 				while (i < i_end && !changed[1 + i])
 				{
 					while (other_changed[1 + j++])
 						/*
 						 * Non-corresponding lines in the other file will count as the preceding
 						 * batch of changes.
 						 */
 						other_preceding = j;
 					i++;
 				}

 				if (i == i_end)
 					break;

 				start = i;
 				other_start = j;

 				for (;;)
 				{
 					/* Now find the end of this run of changes. */

 					while (i < i_end && changed[1 + i])
 						i++;
 					end = i;

 					/*
 					 * If the first changed line matches the following unchanged one, and this
 					 * run does not follow right after a previous run, and there are no lines
 					 * deleted from the other file here, then classify the first changed line
 					 * as unchanged and the following line as changed in its place.
 					 */

 					/*
 					 * You might ask, how could this run follow right after another? Only
 					 * because the previous run was shifted here.
 					 */

 					if (end != i_end
 							&& equivs[start] == equivs[end]
 							&& !other_changed[1 + j]
 							&& end != i_end
 							&& !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding)))
 					{
 						changed[1 + end++] = true;
 						changed[1 + start++] = false;
 						++i;
 						/*
 						 * Since one line-that-matches is now before this run instead of after,
 						 * we must advance in the other file to keep in synch.
 						 */
 						++j;
 					}
 					else
 						break;
 				}

 				preceding = i;
 				other_preceding = j;
 			}
 		}

 		/** Number of elements (lines) in this file. */
 		final int buffered_lines;

 		/**
 		 * Vector, indexed by line number, containing an equivalence code for each line. It
 		 * is this vector that is actually compared with that of another file to generate
 		 * differences.
 		 */
 		private final int[] equivs;

 		/**
 		 * Vector, like the previous one except that the elements for discarded lines have
 		 * been squeezed out.
 		 */
 		final int[] undiscarded;

 		/**
 		 * Vector mapping virtual line numbers (not counting discarded lines) to real ones
 		 * (counting those lines). Both are origin-0.
 		 */
 		final int[] realindexes;

 		/** Total number of nondiscarded lines. */
 		int nondiscarded_lines;

 		/**
 		 * Array, indexed by real origin-1 line number, containing true for a line that is
 		 * an insertion or a deletion. The results of comparison are stored here.
 		 */
 		boolean[] changed_flag;

 	}
 }