blob: e70feff4c48d7dbba5a4fa19117ab3fa1dd64ae9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sis.io;
import java.io.Flushable;
import java.io.IOException;
import org.apache.sis.util.Characters;
import org.apache.sis.util.CharSequences;
import org.apache.sis.util.ArgumentChecks;
import org.apache.sis.internal.util.X364;
/**
* An {@link Appendable} which can apply different kinds of reformatting that depend on the
* <cite>End Of Line</cite> (EOL) occurrences. Available reformatting include inserting a
* a margin before each line, wrapping to a maximal line length and replacing tabulations or
* EOL characters. The actual work to be done can be enabled by invoking one or many of the
* following methods:
*
* <ul>
* <li>{@link #setMaximalLineLength(int)} for wrapping the lines to some maximal line length,
* typically 80 Unicode characters (code points).</li>
* <li>{@link #setTabulationExpanded(boolean)} for replacing tabulation characters by spaces.</li>
* <li>{@link #setLineSeparator(String)} for replacing all occurrences of
* {@linkplain Characters#isLineOrParagraphSeparator(int) line separators} by the given string.</li>
* </ul>
*
* In addition this class removes trailing {@linkplain Character#isWhitespace(int) whitespaces}
* before end of lines.
*
* <h2>How line lengths are calculated</h2>
* Line length are measured in unit of Unicode <cite>code points</cite>. This is usually the same
* than the number of {@code char} primitive values, but not always. Combining characters are not
* yet recognized by this class, but future versions may improve on that.
*
* <p>For proper line length calculation in presence of tabulation characters ({@code '\t'}),
* this class needs to known the tabulation width. The default value is 8, but this can be changed
* by a call to {@link #setTabulationWidth(int)}. Note that invoking that method affects only line
* length calculation; it does not replace tabulations by spaces. For tabulation expansion, see
* {@link #setTabulationExpanded(boolean)}.</p>
*
* @author Martin Desruisseaux (Geomatys)
* @version 0.4
* @since 0.3
* @module
*/
public class LineAppender extends Appender implements Flushable {
/**
* The line separator, or {@code null} if not yet determined. If {@code null}, then the
* {@link #append(CharSequence, int, int)} method will try to infer it from the submitted text.
*
* <p>If {@link #isEndOfLineReplaced} is {@code false} (the default), then this line separator
* will be used only when this class inserts new line separators as a consequence of line wraps;
* line separators found in the texts given by the user will be passed "as is". If {@code true},
* then all line separators are replaced.</p>
*/
private String lineSeparator;
/**
* The maximal line length, in units of <em>code points</em> (not {@code char}).
* Can be set to {@link Integer#MAX_VALUE} if there is no limit.
*
* @see #setMaximalLineLength(int)
*/
private int maximalLineLength;
/**
* The length of the current line, in units of <em>code points</em> (not {@code char}).
* It may be greater than the length of {@link #buffer} because the later contains only
* the last word.
*/
private int codePointCount;
/**
* The tabulation width, in number of code points.
*
* @see #setTabulationWidth(int)
*/
private short tabulationWidth = 8;
/**
* {@code true} if this formatter shall expands tabulations into spaces.
*
* @see #setTabulationExpanded(boolean)
*/
private boolean isTabulationExpanded;
/**
* {@code true} if all occurrences of EOL sequences shall be replaced by
* the {@link #lineSeparator}, or {@code false} for keeping EOL unchanged.
*/
private boolean isEndOfLineReplaced;
/**
* {@code true} if the next character needs to be skipped if equals to {@code '\n'}.
* This field is used in order to avoid writing two EOL in place of {@code "\r\n"}.
*/
private boolean skipLF;
/**
* {@code true} if the next character will be at the beginning of a new line.
* This flag is set to {@code true} only for "real" new lines, as a result of
* line separator found in the input given to this formatter. The "generated"
* new lines (resulting from line wrap) will invoke {@link #onLineBegin(boolean)}
* directly without the help of this temporary variable.
*
* @see #transfer(int)
*/
private boolean isNewLine = true;
/**
* {@code true} if an escape sequence is in progress. The escape sequence will stop
* after the first non-digit character other than {@link X364#BRACKET}.
*/
private boolean isEscapeSequence;
/**
* The buffer for the last word being written.
* This buffer will also contain trailing whitespace characters. If whitespaces are followed
* by at least one non-white character, then the whitespaces are written to the underlying
* stream before the non-ignorable one. Otherwise if whitespaces are followed by a line
* separator, then they are discarded.
*/
private final StringBuilder buffer = new StringBuilder();
/**
* The number of Java characters (not Unicode code points) in {@link #buffer},
* ignoring trailing whitespaces.
*/
private int printableLength;
/**
* Constructs a default formatter. Callers should invoke at least one of the following methods
* after construction in order to perform useful work:
*
* <ul>
* <li>{@link #setMaximalLineLength(int)}</li>
* <li>{@link #setTabulationExpanded(boolean)}</li>
* <li>{@link #setLineSeparator(String)}</li>
* </ul>
*
* @param out the underlying stream or buffer to write to.
*/
public LineAppender(final Appendable out) {
super(out);
maximalLineLength = Integer.MAX_VALUE;
}
/**
* Constructs a formatter which will replaces line separators by the given string.
*
* @param out the underlying stream or buffer to write to.
* @param lineSeparator the line separator to send to {@code out}, or {@code null}
* for forwarding the EOL sequences unchanged.
* @param isTabulationExpanded {@code true} for expanding tabulations into spaces,
* or {@code false} for sending {@code '\t'} characters as-is.
*/
public LineAppender(final Appendable out, final String lineSeparator, final boolean isTabulationExpanded) {
super(out);
maximalLineLength = Integer.MAX_VALUE;
this.lineSeparator = lineSeparator;
this.isEndOfLineReplaced = (lineSeparator != null);
this.isTabulationExpanded = isTabulationExpanded;
}
/**
* Constructs a formatter which will wrap the lines at a given maximal length.
*
* @param out the underlying stream or buffer to write to.
* @param maximalLineLength the maximal number of Unicode characters per line,
* or {@link Integer#MAX_VALUE} if there is no limit.
* @param isTabulationExpanded {@code true} for expanding tabulations into spaces,
* or {@code false} for forwarding {@code '\t'} characters as-is.
*/
public LineAppender(final Appendable out, final int maximalLineLength, final boolean isTabulationExpanded) {
super(out);
ArgumentChecks.ensureStrictlyPositive("maximalLineLength", maximalLineLength);
this.maximalLineLength = maximalLineLength;
this.isTabulationExpanded = isTabulationExpanded;
}
/**
* Returns the maximal line length, in unit of Unicode characters (code point count).
* The default value is no limit.
*
* @return the current maximal number of Unicode characters per line,
* or {@link Integer#MAX_VALUE} if there is no limit.
*/
public int getMaximalLineLength() {
return maximalLineLength;
}
/**
* Sets the maximal line length, in units of Unicode characters (code point count).
*
* @param length the new maximal number of Unicode characters per line,
* or {@link Integer#MAX_VALUE} if there is no limit.
*/
public void setMaximalLineLength(final int length) {
ArgumentChecks.ensureStrictlyPositive("length", length);
maximalLineLength = length;
}
/**
* Returns the current tabulation width, in unit of Unicode characters (code point count).
* The default value is 8.
*
* @return the current tabulation width in number of Unicode characters.
*/
public int getTabulationWidth() {
return tabulationWidth;
}
/**
* Sets the tabulation width, in unit of Unicode characters (code point count).
*
* @param width the new tabulation width. Must be greater than 0.
* @throws IllegalArgumentException if {@code tabWidth} is not greater than 0
* or is unreasonably high.
*/
public void setTabulationWidth(final int width) {
ArgumentChecks.ensureStrictlyPositive("width", width);
ArgumentChecks.ensureBetween("width", 1, Integer.MAX_VALUE, width);
tabulationWidth = (short) width;
}
/**
* Returns {@code true} if this formatter expands tabulations into spaces.
* The default value is {@code false}, which means that {@code '\t'} characters
* are sent to the underlying appendable <i>as-is</i>.
*
* @return {@code true} if this formatter expands tabulations into spaces,
* or {@code false} if {@code '\t'} characters are forwarded <i>as-is</i>.
*/
public boolean isTabulationExpanded() {
return isTabulationExpanded;
}
/**
* Sets whether this class formatter expands tabulations into spaces.
*
* @param expanded {@code true} if this class shall expands tabulations into spaces,
* or {@code false} for forwarding {@code '\t'} characters as-is.
*/
public void setTabulationExpanded(final boolean expanded) {
isTabulationExpanded = expanded;
}
/**
* Returns the line separator to be sent to the underlying appendable,
* or {@code null} if EOL sequences are forwarded unchanged.
*
* @return the current line separator, or {@code null} if EOL are forwarded <i>as-is</i>.
*/
public String getLineSeparator() {
return isEndOfLineReplaced ? lineSeparator : null;
}
/**
* Changes the line separator to be sent to the underlying appendable.
* This is the string to insert in place of every occurrences of {@code "\r"}, {@code "\n"},
* {@code "\r\n"} or other {@linkplain Characters#isLineOrParagraphSeparator(int) line separators}.
* If {@code null} (the default), then the line separators given to the {@code append}
* methods are forwarded unchanged.
*
* @param lineSeparator the new line separator, or {@code null} for forwarding EOL <i>as-is</i>.
*
* @see Characters#isLineOrParagraphSeparator(int)
*/
public void setLineSeparator(final String lineSeparator) {
this.lineSeparator = lineSeparator;
isEndOfLineReplaced = (lineSeparator != null);
}
/**
* Writes a line separator to {@link #out}. This method is invoked for new line separators
* generated by this class, not for the line separators found in the texts supplied by the
* user, unless {@link #isEndOfLineReplaced} is {@code true}.
*
* The {@link #append(CharSequence,int,int)} method tries to detect the line separator used
* in the text, but if no line separator has been found we have to use some fallback.
*/
private void writeLineSeparator() throws IOException {
if (lineSeparator == null) {
lineSeparator = System.lineSeparator();
}
out.append(lineSeparator);
}
/**
* Writes pending non-white characters, discards trailing whitespaces, and resets column
* position to zero. This method does <strong>not</strong> write the line separator and
* does not modify the status of the {@link #skipLF} flag; those tasks are caller's
* responsibility.
*/
private void endOfLine() throws IOException {
buffer.setLength(printableLength); // Reduce the amount of work for StringBuilder.deleteCharAt(int).
deleteSoftHyphen(printableLength - 1);
transfer(printableLength);
printableLength = 0;
codePointCount = 0;
isEscapeSequence = false; // Handle line-breaks as "end of escape sequence".
isNewLine = true;
}
/**
* Removes the soft hyphen characters from the given buffer. This is invoked
* when the buffer is about to be written without being split on two lines.
*
* @param i index after the last character to check. This is either {@link printableLength}
* for checking all characters, or {@code printableLength-1} for preserving the last
* soft hyphen on the line (while removing all others).
*/
private void deleteSoftHyphen(int i) {
while (--i >= 0) {
if (buffer.charAt(i) == Characters.SOFT_HYPHEN) {
buffer.deleteCharAt(i);
printableLength--;
}
}
}
/**
* Writes the given amount of characters from the {@linkplain #buffer},
* then removes those characters from the buffer. This method does not
* adjust {@link #printableLength}; it is caller responsibility to do so.
*/
private void transfer(final int length) throws IOException {
if (isNewLine) {
isNewLine = false;
onLineBegin(false);
}
out.append(buffer, 0, length);
buffer.delete(0, length);
}
/**
* Writes the specified code point.
*
* @throws IOException if an I/O error occurs.
*/
@SuppressWarnings("fallthrough")
private void write(final int c) throws IOException {
final StringBuilder buffer = this.buffer;
/*
* If the character to write is a EOL sequence, then:
*
* 1) Trim trailing whitespaces in the buffer.
* 2) Remove unused soft-hyphens (otherwise some consoles display them).
* 3) Flush the buffer to the underlying appendable.
* 4) Write the line separator.
*/
if (Characters.isLineOrParagraphSeparator(c)) {
final boolean skip;
switch (c) {
case '\r': skip = false; skipLF = true; break;
case '\n': skip = skipLF; skipLF = false; break;
default: skip = false; skipLF = false; break;
}
if (!skip) {
endOfLine();
}
if (!isEndOfLineReplaced) {
appendCodePoint(c); // Forward EOL sequences "as-is".
} else if (!skip) {
writeLineSeparator(); // Replace EOL sequences by the unique line separator.
}
return;
}
skipLF = false;
/*
* If the character to write is a whitespace, then write any pending characters from
* the buffer to the underlying appendable since we know that those characters didn't
* exceeded the line length limit.
*
* We use Character.isWhitespace(…) instead of Character.isSpaceChar(…) because
* the former returns 'true' tabulations (which we want), and returns 'false'
* for non-breaking spaces (which we also want).
*/
if (Character.isWhitespace(c)) {
if (printableLength != 0) {
deleteSoftHyphen(printableLength);
transfer(printableLength);
printableLength = 0;
}
if (c != '\t') {
codePointCount++;
} else {
final int width = tabulationWidth - (codePointCount % tabulationWidth);
codePointCount += width;
if (isTabulationExpanded) {
buffer.append(CharSequences.spaces(width));
return;
}
}
buffer.appendCodePoint(c);
return;
}
buffer.appendCodePoint(c);
printableLength = buffer.length();
/*
* Special handling of ANSI X3.64 escape sequences. Since they are not visible
* characters (they are used for controlling the colors), do not count them in
* 'codePointCount' (but still count them as "printable" characters, since we
* don't want to trim them). The sequence pattern is "CSI <digits> <command>"
* where <command> is a single letter.
*/
if (c == X364.ESCAPE) {
isEscapeSequence = true;
return;
} else if (isEscapeSequence) {
final char previous = buffer.charAt(printableLength - 2);
if (previous != X364.ESCAPE) {
isEscapeSequence = (c >= '0' && c <= '9');
return; // The letter after the digits will be the last character to skip.
} else if (c == X364.BRACKET) {
return; // Found the second part of the Control Sequence Introducer (CSI).
}
// [ESC] was not followed by '['. Proceed as a normal character.
isEscapeSequence = false;
}
/*
* The remaining of this method is executed only if we exceeded the maximal line length.
* First, search for the hyphen character, if any. If we find one and if it is preceeded
* by a letter, split there. The "letter before" condition is a way to avoid to split at
* the minus sign of negative numbers like "-99", assuming that the minus sign is preceeded
* by a space. We can not look at the character after since we may not know it yet.
*/
if (++codePointCount > maximalLineLength) {
searchHyp: for (int i=buffer.length(); i>0;) {
final int b = buffer.codePointBefore(i);
final int n = Character.charCount(b);
switch (b) {
case '-': {
if (i>=n && !Character.isLetter(buffer.codePointBefore(i-n))) {
break; // Continue searching previous characters.
}
// fall through
}
case Characters.HYPHEN:
case Characters.SOFT_HYPHEN: {
transfer(i);
break searchHyp;
}
}
i -= n;
}
/*
* At this point, all the remaining content of the buffer must move on the next line.
* Skip the leading whitespaces on the new line.
*/
writeLineSeparator();
final int length = buffer.length();
for (int i=0; i<length;) {
final int s = buffer.codePointAt(i);
if (!Character.isWhitespace(s)) {
buffer.delete(0, i);
break;
}
i += Character.charCount(s);
}
printableLength = buffer.length();
codePointCount = buffer.codePointCount(0, printableLength);
onLineBegin(true);
}
}
/**
* Writes a single character.
*
* @param c the character to append.
* @return a reference to this {@code Appendable}.
* @throws IOException if an I/O error occurs.
*/
@Override
public Appendable append(final char c) throws IOException {
final int cp = toCodePoint(c);
if (cp >= 0) {
write(cp);
}
return this;
}
/**
* Writes a portion of a character sequence.
*
* @param sequence the character sequence to be written.
* @param start index from which to start reading characters.
* @param end index of the character following the last character to read.
* @return a reference to this {@code Appendable}.
* @throws IOException if an I/O error occurs.
*/
@Override
public Appendable append(final CharSequence sequence, int start, final int end) throws IOException {
ArgumentChecks.ensureValidIndexRange(sequence.length(), start, end);
if (lineSeparator == null) {
/*
* Use the line separator found in the submitted document, if possible.
* If we don't find any line separator in the submitted content, leave
* the 'lineSeparator' field to null since the 'write' method will set
* it to the default value only if it really needs it.
*/
lineSeparator = lineSeparator(sequence, start, end);
}
start = appendSurrogate(sequence, start, end);
while (start < end) {
final int c = toCodePoint(sequence.charAt(start++));
if (c >= 0) {
write(c);
}
}
return this;
}
/**
* Resets the {@code LineAppender} internal state as if a new line was beginning.
* Trailing whitespaces not yet sent to the {@linkplain #out underlying appendable}
* are discarded, and the column position (for tabulation expansion calculation) is
* reset to 0. This method does not write any line separator.
*
* @throws IOException if an error occurred while sending the trailing non-white
* characters to the underlying stream.
*/
public void clear() throws IOException {
endOfLine();
skipLF = false;
}
/**
* Sends all pending characters to the underlying appendable, including trailing whitespaces.
* Note that this method should preferably be invoked at the end of a word, sentence or line,
* since invoking this method may prevent {@code LineAppender} to properly wrap the current
* line if the current position is in the middle of a word.
*
* <p>Invoking this method also flushes the underlying stream, if {@linkplain Flushable flushable}.
* A cheaper way to send pending characters is to make sure that the last character is a
* {@linkplain Characters#isLineOrParagraphSeparator(int) line or paragraph terminator},
* or to invoke {@link #clear()}.</p>
*
* @throws IOException if an I/O error occurs.
*/
@Override
public void flush() throws IOException {
out.append(buffer);
buffer.setLength(0);
printableLength = 0;
IO.flush(out);
}
/**
* Invoked when a new line is beginning. The default implementation does nothing,
* but subclasses can override this method for example in order to insert a margin
* on the left side before each line.
*
* <p>If an implementation wishes to write characters, it shall do so by writing
* directly to {@link #out}, <strong>not</strong> by invoking the {@code append}
* methods of this class.</p>
*
* @param isContinuation {@code true} if the new line is the continuation of the previous
* line after a "line wrap", or {@code false} if a line or paragraph separator has
* been explicitly sent to this formatter.
* @throws IOException if an error occurred while writing to {@link #out}.
*/
protected void onLineBegin(boolean isContinuation) throws IOException {
}
}