| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.sis.io; |
| |
| import java.util.Objects; |
| import java.io.Flushable; |
| import java.io.IOException; |
| import org.apache.sis.util.Characters; |
| import org.apache.sis.util.CharSequences; |
| import org.apache.sis.util.ArgumentChecks; |
| import org.apache.sis.util.privy.X364; |
| |
| |
| /** |
| * An {@link Appendable} which can apply different kinds of reformatting that depend on the |
| * <i>End Of Line</i> (EOL) occurrences. Available reformatting include inserting a |
| * a margin before each line, wrapping to a maximal line length and replacing tabulations or |
| * EOL characters. The actual work to be done can be enabled by invoking one or many of the |
| * following methods: |
| * |
| * <ul> |
| * <li>{@link #setMaximalLineLength(int)} for wrapping the lines to some maximal line length, |
| * typically 80 Unicode characters (code points).</li> |
| * <li>{@link #setTabulationExpanded(boolean)} for replacing tabulation characters by spaces.</li> |
| * <li>{@link #setLineSeparator(String)} for replacing all occurrences of |
| * {@linkplain Characters#isLineOrParagraphSeparator(int) line separators} by the given string.</li> |
| * </ul> |
| * |
| * In addition this class removes trailing {@linkplain Character#isWhitespace(int) whitespaces} |
| * before end of lines. |
| * |
| * <h2>How line lengths are calculated</h2> |
| * Line length are measured in unit of Unicode <i>code points</i>. This is usually the same |
| * than the number of {@code char} primitive values, but not always. Combining characters are not |
| * yet recognized by this class, but future versions may improve on that. |
| * |
| * <p>For proper line length calculation in presence of tabulation characters ({@code '\t'}), |
| * this class needs to known the tabulation width. The default value is 8, but this can be changed |
| * by a call to {@link #setTabulationWidth(int)}. Note that invoking that method affects only line |
| * length calculation; it does not replace tabulations by spaces. For tabulation expansion, see |
| * {@link #setTabulationExpanded(boolean)}.</p> |
| * |
| * @author Martin Desruisseaux (Geomatys) |
| * @version 1.5 |
| * @since 0.3 |
| */ |
| public class LineAppender extends Appender implements Flushable { |
| /** |
| * The line separator, or {@code null} if not yet determined. If {@code null}, then the |
| * {@link #append(CharSequence, int, int)} method will try to infer it from the submitted text. |
| * |
| * <p>If {@link #isEndOfLineReplaced} is {@code false} (the default), then this line separator |
| * will be used only when this class inserts new line separators as a consequence of line wraps. |
| * Line separators found in the texts given by the user will be passed "as is". |
| * If {@code true}, then all line separators are replaced.</p> |
| */ |
| private String lineSeparator; |
| |
| /** |
| * The maximal line length, in units of <em>code points</em> (not {@code char}). |
| * Can be set to {@link Integer#MAX_VALUE} if there is no limit. |
| * |
| * @see #setMaximalLineLength(int) |
| */ |
| private int maximalLineLength; |
| |
| /** |
| * The length of the current line, in units of <em>code points</em> (not {@code char}). |
| * It may be greater than the length of {@link #buffer} because the latter contains only |
| * the last word. |
| * |
| * @see #getCurrentLineLength() |
| */ |
| private int codePointCount; |
| |
| /** |
| * The tabulation width, in number of code points. |
| * |
| * @see #setTabulationWidth(int) |
| */ |
| private short tabulationWidth = 8; |
| |
| /** |
| * {@code true} if this formatter shall expands tabulations into spaces. |
| * |
| * @see #setTabulationExpanded(boolean) |
| */ |
| private boolean isTabulationExpanded; |
| |
| /** |
| * {@code true} if all occurrences of EOL sequences shall be replaced by |
| * the {@link #lineSeparator}, or {@code false} for keeping EOL unchanged. |
| */ |
| private boolean isEndOfLineReplaced; |
| |
| /** |
| * {@code true} if the next character needs to be skipped if equals to {@code '\n'}. |
| * This field is used in order to avoid writing two EOL in place of {@code "\r\n"}. |
| */ |
| private boolean skipLF; |
| |
| /** |
| * {@code true} if the next character will be at the beginning of a new line. |
| * This flag is set to {@code true} only for "real" new lines, as a result of |
| * line separator found in the input given to this formatter. The "generated" |
| * new lines (resulting from line wrap) will invoke {@link #onLineBegin(boolean)} |
| * directly without the help of this temporary variable. |
| * |
| * @see #transfer(int) |
| */ |
| private boolean isNewLine = true; |
| |
| /** |
| * {@code true} if an escape sequence is in progress. The escape sequence will stop |
| * after the first non-digit character other than {@link X364#BRACKET}. |
| */ |
| private boolean isEscapeSequence; |
| |
| /** |
| * The buffer for the last word being written. |
| * This buffer will also contain trailing whitespace characters. If whitespaces are followed |
| * by at least one non-white character, then the whitespaces are written to the underlying |
| * stream before the non-ignorable one. Otherwise if whitespaces are followed by a line |
| * separator, then they are discarded. |
| */ |
| private final StringBuilder buffer = new StringBuilder(); |
| |
| /** |
| * The number of Java characters (not Unicode code points) in {@link #buffer}, |
| * ignoring trailing whitespaces. |
| */ |
| private int printableLength; |
| |
| /** |
| * Constructs a default formatter. Callers should invoke at least one of the following methods |
| * after construction in order to perform useful work: |
| * |
| * <ul> |
| * <li>{@link #setMaximalLineLength(int)}</li> |
| * <li>{@link #setTabulationExpanded(boolean)}</li> |
| * <li>{@link #setLineSeparator(String)}</li> |
| * </ul> |
| * |
| * @param out the underlying stream or buffer to write to. |
| */ |
| public LineAppender(final Appendable out) { |
| super(out); |
| maximalLineLength = Integer.MAX_VALUE; |
| } |
| |
| /** |
| * Constructs a formatter which will replaces line separators by the given string. |
| * |
| * @param out the underlying stream or buffer to write to. |
| * @param lineSeparator the line separator to send to {@code out}, or {@code null} |
| * for forwarding the EOL sequences unchanged. |
| * @param isTabulationExpanded {@code true} for expanding tabulations into spaces, |
| * or {@code false} for sending {@code '\t'} characters as-is. |
| */ |
| public LineAppender(final Appendable out, final String lineSeparator, final boolean isTabulationExpanded) { |
| super(out); |
| maximalLineLength = Integer.MAX_VALUE; |
| this.lineSeparator = lineSeparator; |
| this.isEndOfLineReplaced = (lineSeparator != null); |
| this.isTabulationExpanded = isTabulationExpanded; |
| } |
| |
| /** |
| * Constructs a formatter which will wrap the lines at a given maximal length. |
| * |
| * @param out the underlying stream or buffer to write to. |
| * @param maximalLineLength the maximal number of Unicode characters per line, |
| * or {@link Integer#MAX_VALUE} if there is no limit. |
| * @param isTabulationExpanded {@code true} for expanding tabulations into spaces, |
| * or {@code false} for forwarding {@code '\t'} characters as-is. |
| */ |
| public LineAppender(final Appendable out, final int maximalLineLength, final boolean isTabulationExpanded) { |
| super(out); |
| ArgumentChecks.ensureStrictlyPositive("maximalLineLength", maximalLineLength); |
| this.maximalLineLength = maximalLineLength; |
| this.isTabulationExpanded = isTabulationExpanded; |
| } |
| |
| /** |
| * Returns the maximal line length, in unit of Unicode characters (code point count). |
| * The default value is no limit. |
| * |
| * @return the current maximal number of Unicode characters per line, |
| * or {@link Integer#MAX_VALUE} if there is no limit. |
| */ |
| public int getMaximalLineLength() { |
| return maximalLineLength; |
| } |
| |
| /** |
| * Sets the maximal line length, in units of Unicode characters (code point count). |
| * |
| * @param length the new maximal number of Unicode characters per line, |
| * or {@link Integer#MAX_VALUE} if there is no limit. |
| */ |
| public void setMaximalLineLength(final int length) { |
| ArgumentChecks.ensureStrictlyPositive("length", length); |
| maximalLineLength = length; |
| } |
| |
| /** |
| * (@return the length of the current line, in units of Unicode code points}. |
| * |
| * @since 1.5 |
| */ |
| public int getCurrentLineLength() { |
| return codePointCount; |
| } |
| |
| /** |
| * Sets the length of the current line. This method usually do not need to be invoked, |
| * because the value of this property is automatically adjusted when texts are appended |
| * by this {@code LineAppender}. However, setting an explicit value may be useful when |
| * the output specified to the constructor was not initially empty, or when the output |
| * content is modified outside this {@code LineAppender} instance. |
| * |
| * @param lengh the new length of the current line, in units of Unicode code points. |
| * |
| * @since 1.5 |
| */ |
| public void setCurrentLineLength(final int length) { |
| ArgumentChecks.ensurePositive("length", length); |
| codePointCount = length; |
| } |
| |
| /** |
| * Returns the current tabulation width, in unit of Unicode characters (code point count). |
| * The default value is 8. |
| * |
| * @return the current tabulation width in number of Unicode characters. |
| */ |
| public int getTabulationWidth() { |
| return tabulationWidth; |
| } |
| |
| /** |
| * Sets the tabulation width, in unit of Unicode characters (code point count). |
| * |
| * @param width the new tabulation width. Must be greater than 0. |
| * @throws IllegalArgumentException if {@code tabWidth} is not greater than 0 |
| * or is unreasonably high. |
| */ |
| public void setTabulationWidth(final int width) { |
| ArgumentChecks.ensureStrictlyPositive("width", width); |
| ArgumentChecks.ensureBetween("width", 1, Integer.MAX_VALUE, width); |
| tabulationWidth = (short) width; |
| } |
| |
| /** |
| * Returns {@code true} if this formatter expands tabulations into spaces. |
| * The default value is {@code false}, which means that {@code '\t'} characters |
| * are sent to the underlying appendable <i>as-is</i>. |
| * |
| * @return {@code true} if this formatter expands tabulations into spaces, |
| * or {@code false} if {@code '\t'} characters are forwarded <i>as-is</i>. |
| */ |
| public boolean isTabulationExpanded() { |
| return isTabulationExpanded; |
| } |
| |
| /** |
| * Sets whether this class formatter expands tabulations into spaces. |
| * |
| * @param expanded {@code true} if this class shall expands tabulations into spaces, |
| * or {@code false} for forwarding {@code '\t'} characters as-is. |
| */ |
| public void setTabulationExpanded(final boolean expanded) { |
| isTabulationExpanded = expanded; |
| } |
| |
| /** |
| * Returns the line separator to be sent to the underlying appendable, |
| * or {@code null} if EOL sequences are forwarded unchanged. |
| * |
| * @return the current line separator, or {@code null} if EOL are forwarded <i>as-is</i>. |
| */ |
| public String getLineSeparator() { |
| return isEndOfLineReplaced ? lineSeparator : null; |
| } |
| |
| /** |
| * Changes the line separator to be sent to the underlying appendable. |
| * This is the string to insert in place of every occurrences of {@code "\r"}, {@code "\n"}, |
| * {@code "\r\n"} or other {@linkplain Characters#isLineOrParagraphSeparator(int) line separators}. |
| * If {@code null} (the default), then the line separators given to the {@code append} |
| * methods are forwarded unchanged. |
| * |
| * @param lineSeparator the new line separator, or {@code null} for forwarding EOL <i>as-is</i>. |
| * |
| * @see Characters#isLineOrParagraphSeparator(int) |
| */ |
| public void setLineSeparator(final String lineSeparator) { |
| this.lineSeparator = lineSeparator; |
| isEndOfLineReplaced = (lineSeparator != null); |
| } |
| |
| /** |
| * Writes a line separator to {@link #out}. This method is invoked for new line separators |
| * generated by this class, not for the line separators found in the texts supplied by the |
| * user, unless {@link #isEndOfLineReplaced} is {@code true}. |
| * |
| * The {@link #append(CharSequence,int,int)} method tries to detect the line separator used |
| * in the text, but if no line separator has been found we have to use some fallback. |
| */ |
| private void writeLineSeparator() throws IOException { |
| if (lineSeparator == null) { |
| lineSeparator = System.lineSeparator(); |
| } |
| out.append(lineSeparator); |
| } |
| |
| /** |
| * Writes pending non-white characters, discards trailing whitespaces, and resets column position to zero. |
| * This method does <strong>not</strong> write the line separator and does not modify the status of the |
| * {@link #skipLF} flag. Those tasks are caller's responsibility. |
| */ |
| private void endOfLine() throws IOException { |
| buffer.setLength(printableLength); // Reduce the amount of work for StringBuilder.deleteCharAt(int). |
| deleteSoftHyphen(printableLength - 1); |
| transfer(printableLength); |
| printableLength = 0; |
| codePointCount = 0; |
| isEscapeSequence = false; // Handle line-breaks as "end of escape sequence". |
| isNewLine = true; |
| } |
| |
| /** |
| * Removes the soft hyphen characters from the given buffer. This is invoked |
| * when the buffer is about to be written without being split on two lines. |
| * |
| * @param i index after the last character to check. This is either {@link printableLength} |
| * for checking all characters, or {@code printableLength-1} for preserving the last |
| * soft hyphen on the line (while removing all others). |
| */ |
| private void deleteSoftHyphen(int i) { |
| while (--i >= 0) { |
| if (buffer.charAt(i) == Characters.SOFT_HYPHEN) { |
| buffer.deleteCharAt(i); |
| printableLength--; |
| } |
| } |
| } |
| |
| /** |
| * Writes the given number of characters from the {@linkplain #buffer}, |
| * then removes those characters from the buffer. This method does not |
| * adjust {@link #printableLength}; it is caller responsibility to do so. |
| */ |
| private void transfer(final int length) throws IOException { |
| if (isNewLine) { |
| isNewLine = false; |
| onLineBegin(false); |
| } |
| out.append(buffer, 0, length); |
| buffer.delete(0, length); |
| } |
| |
| /** |
| * Writes the specified code point. |
| * |
| * @throws IOException if an I/O error occurs. |
| */ |
| private void write(final int c) throws IOException { |
| /* |
| * If the character to write is a EOL sequence, then: |
| * |
| * 1) Trim trailing whitespaces in the buffer. |
| * 2) Remove unused soft-hyphens (otherwise some consoles display them). |
| * 3) Flush the buffer to the underlying appendable. |
| * 4) Write the line separator. |
| */ |
| if (Characters.isLineOrParagraphSeparator(c)) { |
| final boolean skip; |
| switch (c) { |
| case '\r': skip = false; skipLF = true; break; |
| case '\n': skip = skipLF; skipLF = false; break; |
| default: skip = false; skipLF = false; break; |
| } |
| if (!skip) { |
| endOfLine(); |
| } |
| if (!isEndOfLineReplaced) { |
| appendCodePoint(c); // Forward EOL sequences "as-is". |
| } else if (!skip) { |
| writeLineSeparator(); // Replace EOL sequences by the unique line separator. |
| } |
| return; |
| } |
| skipLF = false; |
| /* |
| * If the character to write is a whitespace, then write any pending characters from |
| * the buffer to the underlying appendable since we know that those characters didn't |
| * exceed the line length limit. |
| * |
| * We use `Character.isWhitespace(…)` instead of `Character.isSpaceChar(…)` because |
| * the former returns `true` for tabulations (which we want), and returns `false` |
| * for non-breaking spaces (which we also want). |
| */ |
| if (Character.isWhitespace(c)) { |
| if (printableLength != 0) { |
| deleteSoftHyphen(printableLength); |
| transfer(printableLength); |
| printableLength = 0; |
| } |
| if (c != '\t') { |
| codePointCount++; |
| } else { |
| final int width = tabulationWidth - (codePointCount % tabulationWidth); |
| codePointCount += width; |
| if (isTabulationExpanded) { |
| buffer.append(CharSequences.spaces(width)); |
| return; |
| } |
| } |
| buffer.appendCodePoint(c); |
| return; |
| } |
| buffer.appendCodePoint(c); |
| printableLength = buffer.length(); |
| /* |
| * Special handling of ANSI X3.64 escape sequences. Since they are not visible |
| * characters (they are used for controlling the colors), do not count them in |
| * `codePointCount` (but still count them as "printable" characters, since we |
| * don't want to trim them). The sequence pattern is "CSI <digits> <command>" |
| * where <command> is a single letter. |
| */ |
| if (c == X364.ESCAPE) { |
| isEscapeSequence = true; |
| return; |
| } else if (isEscapeSequence) { |
| final char previous = buffer.charAt(printableLength - 2); |
| if (previous != X364.ESCAPE) { |
| isEscapeSequence = (c >= '0' && c <= '9'); |
| return; // The letter after the digits will be the last character to skip. |
| } else if (c == X364.BRACKET) { |
| return; // Found the second part of the Control Sequence Introducer (CSI). |
| } |
| // [ESC] was not followed by '['. Proceed as a normal character. |
| isEscapeSequence = false; |
| } |
| /* |
| * The remaining of this method is executed only if we exceeded the maximal line length. |
| * First, search for a dash character (hyphen) for splitting the line after it. If we do |
| * not find a dash character, as a fallback split on any non-letter or digit characters |
| * except the punctuation starts. |
| */ |
| if (++codePointCount < maximalLineLength) { |
| return; |
| } |
| int splitAt = buffer.length(); // Where to separate the line as two lines. |
| int fallback = splitAt; // Fallback to use if we could not find a value for `splitAt`. |
| boolean hasFallback = false; // Whether the `fallback` value has been defined. |
| split: for (;;) { |
| if (splitAt <= 0) { |
| splitAt = fallback; |
| break; |
| } |
| int b = buffer.codePointBefore(splitAt); |
| int n = Character.charCount(b); |
| switch (Character.getType(b)) { |
| case Character.UPPERCASE_LETTER: |
| case Character.LOWERCASE_LETTER: |
| case Character.TITLECASE_LETTER: |
| case Character.MODIFIER_LETTER: |
| case Character.OTHER_LETTER: |
| case Character.DECIMAL_DIGIT_NUMBER: |
| case Character.INITIAL_QUOTE_PUNCTUATION: |
| case Character.START_PUNCTUATION: break; // Do nothing (search another character). |
| case Character.PARAGRAPH_SEPARATOR: |
| case Character.SPACE_SEPARATOR: |
| case Character.LINE_SEPARATOR: |
| case Character.CONTROL: { |
| /* |
| * Split the line before a space (except no-break space) and discard trailing spaces. |
| * The `isWhitespace(b)` check is necessary for excluding the no-break spaces. |
| */ |
| final int end = splitAt; |
| while (Character.isWhitespace(b)) { |
| if ((splitAt -= n) <= 0) break; |
| b = buffer.codePointBefore(splitAt); |
| n = Character.charCount(b); |
| } |
| if (splitAt == end) break; // No-break space. Search another character. |
| buffer.delete(splitAt, end); |
| break split; // Split here (before the space character). |
| } |
| /* |
| * Split the line after a dash character. |
| * The "letter before" condition is a way to avoid splitting at the minus sign |
| * of negative numbers, assuming that the minus sign is preceeded by a space. |
| * We cannot look at the character after because it may not be in the buffer yet. |
| */ |
| case Character.DASH_PUNCTUATION: { |
| if (b == '-') { |
| b = splitAt - n; |
| if (b > 0 && !Character.isLetter(buffer.codePointBefore(b))) { |
| break; // Continue the search in previous characters. |
| } |
| } |
| break split; // Split here (after the dash character). |
| } |
| /* |
| * Soft hyphen are not in the dash category, so they need to be checked here. |
| * Replace soft-hyphen by ordinary (visible) hyphen since the hyphen is used. |
| */ |
| case Character.FORMAT: { |
| if (b == Characters.SOFT_HYPHEN) { |
| buffer.setCharAt(splitAt - n, Characters.HYPHEN); |
| break split; // Split here (after the dash character). |
| } |
| break; // Do nothing (search another character). |
| } |
| /* |
| * All other categories (e.g. punctuations) may be used as a split point |
| * if no better location is found. |
| */ |
| default: { |
| if (!hasFallback && b != '<') { |
| hasFallback = true; |
| fallback = splitAt; |
| } |
| break; |
| } |
| } |
| splitAt -= n; |
| } |
| transfer(splitAt); |
| writeLineSeparator(); |
| printableLength = buffer.length(); // Remaining characters will be on next line. |
| codePointCount = buffer.codePointCount(0, printableLength); |
| onLineBegin(true); |
| } |
| |
| /** |
| * Writes a single character. |
| * |
| * @param c the character to append. |
| * @return a reference to this {@code Appendable}. |
| * @throws IOException if an I/O error occurs. |
| */ |
| @Override |
| public Appendable append(final char c) throws IOException { |
| final int cp = toCodePoint(c); |
| if (cp >= 0) { |
| write(cp); |
| } |
| return this; |
| } |
| |
| /** |
| * Writes a portion of a character sequence. |
| * |
| * @param sequence the character sequence to be written. |
| * @param start index from which to start reading characters. |
| * @param end index of the character following the last character to read. |
| * @return a reference to this {@code Appendable}. |
| * @throws IOException if an I/O error occurs. |
| */ |
| @Override |
| public Appendable append(final CharSequence sequence, int start, final int end) throws IOException { |
| Objects.checkFromToIndex(start, end, sequence.length()); |
| if (lineSeparator == null) { |
| /* |
| * Use the line separator found in the submitted document, if possible. |
| * If we don't find any line separator in the submitted content, leave |
| * the `lineSeparator` field to null since the `write` method will set |
| * it to the default value only if it really needs it. |
| */ |
| lineSeparator = lineSeparator(sequence, start, end); |
| } |
| start = appendSurrogate(sequence, start, end); |
| while (start < end) { |
| final int c = toCodePoint(sequence.charAt(start++)); |
| if (c >= 0) { |
| write(c); |
| } |
| } |
| return this; |
| } |
| |
| /** |
| * Resets the {@code LineAppender} internal state as if a new line was beginning. |
| * Trailing whitespaces not yet sent to the {@linkplain #out underlying appendable} |
| * are discarded, and the column position (for tabulation expansion calculation) is |
| * reset to 0. This method does not write any line separator. |
| * |
| * @throws IOException if an error occurred while sending the trailing non-white |
| * characters to the underlying stream. |
| */ |
| public void clear() throws IOException { |
| endOfLine(); |
| skipLF = false; |
| } |
| |
| /** |
| * Sends all pending characters to the underlying appendable, including trailing whitespaces. |
| * Note that this method should preferably be invoked at the end of a word, sentence or line, |
| * since invoking this method may prevent {@code LineAppender} to properly wrap the current |
| * line if the current position is in the middle of a word. |
| * |
| * <p>Invoking this method also flushes the underlying stream, if {@linkplain Flushable flushable}. |
| * A cheaper way to send pending characters is to make sure that the last character is a |
| * {@linkplain Characters#isLineOrParagraphSeparator(int) line or paragraph terminator}, |
| * or to invoke {@link #clear()}.</p> |
| * |
| * @throws IOException if an I/O error occurs. |
| */ |
| @Override |
| public void flush() throws IOException { |
| out.append(buffer); |
| buffer.setLength(0); |
| printableLength = 0; |
| IO.flush(out); |
| } |
| |
| /** |
| * Invoked when a new line is beginning. The default implementation does nothing, |
| * but subclasses can override this method for example in order to insert a margin |
| * on the left side before each line. |
| * |
| * <p>If an implementation wishes to write characters, it shall do so by writing |
| * directly to {@link #out}, <strong>not</strong> by invoking the {@code append} |
| * methods of this class.</p> |
| * |
| * @param isContinuation {@code true} if the new line is the continuation of the previous |
| * line after a "line wrap", or {@code false} if a line or paragraph separator has |
| * been explicitly sent to this formatter. |
| * @throws IOException if an error occurred while writing to {@link #out}. |
| */ |
| protected void onLineBegin(boolean isContinuation) throws IOException { |
| } |
| } |