| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.sis.io; |
| |
| import java.util.Locale; |
| import java.util.TimeZone; |
| import java.text.ParsePosition; |
| import java.util.regex.Pattern; |
| import java.util.regex.Matcher; |
| import org.apache.sis.util.StringBuilders; |
| import org.apache.sis.util.ArgumentChecks; |
| import org.apache.sis.util.resources.Errors; |
| |
| |
| /** |
| * Base class for parser and formatter of tabular data, providing control on line and column separators. |
| * The line separator is specified by a string. But the column separator is specified by a pattern which |
| * provide some control on the character to repeat, and on the strings to insert before and after the |
| * repeated character. See the following methods for details: |
| * |
| * <ul> |
| * <li>{@link #setLineSeparator(String)}</li> |
| * <li>{@link #setColumnSeparatorPattern(String)}</li> |
| * </ul> |
| * |
| * <h2>Note for subclass implementions</h2> |
| * This base class takes care of splitting a column separator pattern into its components |
| * ({@link #beforeFill}, {@link #fillCharacter} and {@link #columnSeparator}) |
| * for easier usage in {@code format(…)} method implementations. |
| * Subclasses can use those fields like below: |
| * |
| * <p><b>Formatting table without border:</b></p> |
| * {@preformat java |
| * TableAppender table = new TableAppender(out, ""); |
| * // ... do some work, then add a column separator: |
| * table.append(beforeFill); |
| * table.nextColumn(fillCharacter); |
| * table.append(columnSeparator); |
| * } |
| * |
| * <p><b>Formatting table with a border:</b></p> |
| * {@preformat java |
| * TableAppender table = new TableAppender(out, columnSeparator); |
| * // ... do some work, then add a column separator: |
| * table.append(beforeFill); |
| * table.nextColumn(fillCharacter); |
| * } |
| * |
| * @author Martin Desruisseaux (Geomatys) |
| * @version 0.3 |
| * |
| * @param <T> the base type of objects parsed and formatted by this class. |
| * |
| * @see TableAppender |
| * |
| * @since 0.3 |
| * @module |
| */ |
| public abstract class TabularFormat<T> extends CompoundFormat<T> { |
| /** |
| * For cross-version compatibility. |
| */ |
| private static final long serialVersionUID = -4556687020021477908L; |
| |
| /** |
| * The line separator to use for formatting the tree. |
| * The default value is system-dependent. |
| * |
| * @see #getLineSeparator() |
| * @see #setLineSeparator(String) |
| */ |
| protected String lineSeparator; |
| |
| /** |
| * The string to write after the {@link #fillCharacter}, or an empty string if none. |
| * This is the sequence of characters after the "{@code [ ]}" pair of brackets in the |
| * pattern given to the {@link #setColumnSeparatorPattern(String)} method. |
| */ |
| protected String columnSeparator; |
| |
| /** |
| * The character to repeat after the content of a cell for alignment with the next column. |
| * This is the character between the "{@code [ ]}" pair of brackets in the pattern given |
| * to the {@link #setColumnSeparatorPattern(String)} method. |
| * |
| * <p>Subclasses will typically use this value in calls to {@link TableAppender#nextColumn(char)}.</p> |
| */ |
| protected char fillCharacter; |
| |
| /** |
| * The string to write before the {@link #fillCharacter}, or an empty string if none. |
| * This is the sequence of characters before the "{@code [ ]}" pair of brackets in the |
| * pattern given to the {@link #setColumnSeparatorPattern(String)} method. |
| */ |
| protected String beforeFill; |
| |
| /** |
| * {@code true} if the trailing {@code null} values shall be omitted at formatting time. |
| * This flag is controlled by the presence or absence of the {@code '?'} character at the |
| * beginning of the pattern given to the {@link #setColumnSeparatorPattern(String)} method. |
| */ |
| protected boolean omitTrailingNulls; |
| |
| /** |
| * {@code true} if the user defined the parsing pattern explicitly. |
| */ |
| private boolean isParsePatternDefined; |
| |
| /** |
| * The pattern used at parsing time for finding the column separators, or {@code null} if not |
| * yet constructed. This field is serialized because it may be a user-specified pattern. |
| * The same {@code Pattern} instance can be safely shared by many {@code TabularFormat} instances. |
| */ |
| private Pattern parsePattern; |
| |
| /** |
| * Creates a new tabular format. |
| * |
| * @param locale the locale to use for numbers, dates and angles formatting, |
| * or {@code null} for the {@linkplain Locale#ROOT root locale}. |
| * @param timezone the timezone, or {@code null} for UTC. |
| */ |
| public TabularFormat(final Locale locale, final TimeZone timezone) { |
| super(locale, timezone); |
| beforeFill = ""; |
| fillCharacter = ' '; |
| columnSeparator = " "; |
| lineSeparator = System.lineSeparator(); |
| } |
| |
| /** |
| * Returns the current line separator. The default value is system-dependent. |
| * |
| * @return the current line separator. |
| */ |
| public String getLineSeparator() { |
| return lineSeparator; |
| } |
| |
| /** |
| * Sets the line separator. Can not be a null or empty string. |
| * |
| * @param separator the new line separator. |
| */ |
| public void setLineSeparator(final String separator) { |
| ArgumentChecks.ensureNonEmpty("separator", separator); |
| lineSeparator = separator; |
| } |
| |
| /** |
| * Returns the pattern of characters used in column separators. Those characters will be used |
| * only if more than one column is formatted. See {@link #setColumnSeparatorPattern(String)} |
| * for a description of the pattern syntax. |
| * |
| * @return the pattern of the current column separator. |
| */ |
| public String getColumnSeparatorPattern() { |
| final StringBuilder buffer = new StringBuilder(8); |
| buffer.append(beforeFill).append('\uFFFF').append(columnSeparator); |
| StringBuilders.replace(buffer, "\\", "\\\\"); |
| StringBuilders.replace(buffer, "?", "\\?"); |
| StringBuilders.replace(buffer, "[", "\\["); |
| StringBuilders.replace(buffer, "]", "\\]"); |
| StringBuilders.replace(buffer, "/", "\\/"); |
| if (omitTrailingNulls) { |
| buffer.insert(0, '?'); |
| } |
| final int insertAt = buffer.indexOf("\uFFFF"); |
| buffer.replace(insertAt, insertAt+1, "[\uFFFF]").setCharAt(insertAt+1, fillCharacter); |
| if (isParsePatternDefined) { |
| buffer.append('/').append(parsePattern.pattern()); |
| } |
| return buffer.toString(); |
| } |
| |
| /** |
| * Sets the pattern of the characters to insert between the columns. The pattern shall contain |
| * exactly one occurrence of the {@code "[ ]"} pair of bracket, with exactly one character |
| * between them. This character will be repeated as many time as needed for columns alignment. |
| * |
| * <p>The formatting pattern can optionally be followed by a regular expression to be used at |
| * parsing time. If omitted, the parsing pattern will be inferred from the formatting pattern. |
| * If specified, then the {@link #parse(CharSequence, ParsePosition) parse} method will invoke |
| * the {@link Matcher#find()} method for determining the column boundaries.</p> |
| * |
| * <p>The characters listed below have special meaning in the pattern. |
| * Other characters are appended <cite>as-is</cite> between the columns.</p> |
| * |
| * <table class="sis"> |
| * <caption>Reserved characters</caption> |
| * <tr><th>Character(s)</th> <th>Meaning</th></tr> |
| * <tr><td>{@code '?'}</td> <td>Omit the column separator for trailing null values.</td></tr> |
| * <tr><td>{@code "[ ]"}</td><td>Repeat the character between bracket as needed.</td></tr> |
| * <tr><td>{@code '/'}</td> <td>Separate the formatting pattern from the parsing pattern.</td></tr> |
| * <tr><td>{@code '\\'}</td> <td>Escape any of the characters listed in this table.</td></tr> |
| * </table> |
| * |
| * <h4>Restrictions</h4> |
| * <ul> |
| * <li>If present, {@code '?'} shall be the first character in the pattern.</li> |
| * <li>The repeated character (specified inside the pair of brackets) is mandatory.</li> |
| * <li>In the current implementation, the repeated character must be in the |
| * {@linkplain Character#isBmpCodePoint(int) Basic Multilanguage Plane}.</li> |
| * <li>If {@code '/'} is present, anything on its right side shall be compliant |
| * with the {@link Pattern} syntax.</li> |
| * </ul> |
| * |
| * <div class="note"><b>Example:</b> |
| * The {@code "?……[…] "} pattern means <cite>"If the next value is non-null, then insert the |
| * {@code "……"} string, repeat the {@code '…'} character as many time as needed (may be zero), |
| * then insert a space"</cite>. |
| * </div> |
| * |
| * @param pattern the pattern of the new column separator. |
| * @throws IllegalArgumentException if the given pattern is illegal. |
| */ |
| public void setColumnSeparatorPattern(final String pattern) throws IllegalArgumentException { |
| ArgumentChecks.ensureNonEmpty("pattern", pattern); |
| final int length = pattern.length(); |
| final StringBuilder buffer = new StringBuilder(length); |
| boolean escape = false; |
| boolean trim = false; |
| String prefix = null; |
| String regex = null; |
| int separatorIndex = -1; |
| scan: for (int i=0; i<length; i++) { |
| final char c = pattern.charAt(i); |
| switch (c) { |
| case '\uFFFF': { // This "character" is reserved. |
| prefix = null; |
| break scan; // This will cause IllegalArgumentException to be thrown. |
| } |
| case '\\': { |
| if (i != separatorIndex) { |
| if (escape) break; |
| escape = true; |
| } |
| continue; |
| } |
| case '?': { |
| if (i != 0) { |
| prefix = null; |
| break scan; |
| } |
| trim = true; |
| continue; |
| } |
| case '[': { |
| if (escape) break; |
| if (i != separatorIndex) { |
| if (separatorIndex >= 0) { |
| prefix = null; |
| break scan; // This will cause IllegalArgumentException to be thrown. |
| } |
| separatorIndex = i+1; |
| } |
| continue; |
| } |
| case ']': { |
| if (escape) break; |
| switch (i - separatorIndex) { |
| case 0: continue; |
| case 1: prefix = buffer.toString(); buffer.setLength(0); continue; |
| default: prefix = null; break scan; |
| } |
| } |
| case '/': { |
| if (escape) break; |
| regex = pattern.substring(i+1); |
| break scan; |
| } |
| } |
| if (i != separatorIndex) { |
| buffer.append(c); |
| } |
| } |
| if (prefix == null) { |
| throw new IllegalArgumentException(Errors.format( |
| Errors.Keys.IllegalFormatPatternForClass_2, getValueType(), pattern)); |
| } |
| /* |
| * Finally store the result. The parsing pattern must be first because the call to |
| * Pattern.compile(regex) may thrown PatternSyntaxException. In such case, we want |
| * it to happen before we modified anything else. |
| */ |
| if (regex != null) { |
| parsePattern = Pattern.compile(regex); |
| isParsePatternDefined = true; |
| } else { |
| parsePattern = null; |
| isParsePatternDefined = false; |
| } |
| omitTrailingNulls = trim; |
| beforeFill = prefix; |
| columnSeparator = buffer.toString(); |
| fillCharacter = pattern.charAt(separatorIndex); |
| } |
| |
| /** |
| * Returns a matcher for the column separators in the given text. |
| * This method is invoked by subclasses in their {@code parse(…)} implementations. |
| * |
| * @param text the text for which to get a matcher. |
| * @return a matcher for the column separators in the given text. |
| */ |
| protected Matcher getColumnSeparatorMatcher(final CharSequence text) { |
| if (parsePattern == null) { |
| final StringBuilder pattern = new StringBuilder(beforeFill).append(fillCharacter); |
| String tmp = pattern.toString(); |
| pattern.setLength(0); |
| pattern.append(Pattern.quote(tmp)).append('*'); |
| tmp = columnSeparator; |
| if (tmp.length() != 0) { |
| pattern.append(Pattern.quote(tmp)); |
| } |
| parsePattern = Pattern.compile(pattern.toString()); |
| } |
| return parsePattern.matcher(text); |
| } |
| |
| /** |
| * Returns a clone of this format. |
| * |
| * @return a clone of this format. |
| */ |
| @Override |
| public TabularFormat<T> clone() { |
| return (TabularFormat<T>) super.clone(); |
| } |
| } |