| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.sis.io; |
| |
| import java.util.Map; |
| import java.util.IdentityHashMap; |
| import java.util.Locale; |
| import java.util.TimeZone; |
| import java.util.Date; |
| import java.io.IOException; |
| import java.io.UncheckedIOException; |
| import java.text.Format; |
| import java.text.DateFormat; |
| import java.text.NumberFormat; |
| import java.text.FieldPosition; |
| import java.text.ParsePosition; |
| import java.text.ParseException; |
| import java.text.SimpleDateFormat; |
| import javax.measure.Unit; |
| |
| import org.opengis.referencing.IdentifiedObject; |
| import org.opengis.geometry.DirectPosition; |
| import org.apache.sis.measure.Angle; |
| import org.apache.sis.measure.AngleFormat; |
| import org.apache.sis.measure.Range; |
| import org.apache.sis.measure.RangeFormat; |
| import org.apache.sis.measure.UnitFormat; |
| import org.apache.sis.util.Numbers; |
| import org.apache.sis.util.Classes; |
| import org.apache.sis.util.Localized; |
| import org.apache.sis.util.ArraysExt; |
| import org.apache.sis.util.ArgumentChecks; |
| import org.apache.sis.internal.util.MetadataServices; |
| import org.apache.sis.internal.util.LocalizedParseException; |
| |
| import static org.apache.sis.internal.util.StandardDateFormat.UTC; |
| |
| |
| /** |
| * Base class of {@link Format} implementations which delegate part of their work to other |
| * {@code Format} instances. {@code CompoundFormat} subclasses typically work on relatively |
| * large blocks of data, for example a metadata tree or a <cite>Well Known Text</cite> (WKT). |
| * Those blocks of data usually contain smaller elements like numbers and dates, whose parsing |
| * and formatting can be delegated to {@link NumberFormat} and {@link DateFormat} respectively. |
| * Subclasses can obtain instances of those formats by call to {@link #getFormat(Class)} where |
| * the argument is the type of the value to parse or format. |
| * {@code CompoundFormat} supports at least the following value types, but subclasses may add more types: |
| * |
| * <table class="sis"> |
| * <caption>Supported value types</caption> |
| * <tr><th>Value type</th> <th>Format type</th> <th>Remarks</th></tr> |
| * <tr><td>{@link DirectPosition}</td> <td>{@link org.apache.sis.geometry.CoordinateFormat}</td> <td>Requires {@code sis-referencing} module.</td></tr> |
| * <tr><td>{@link Angle}</td> <td>{@link AngleFormat}</td> <td></td></tr> |
| * <tr><td>{@link Date}</td> <td>{@link DateFormat}</td> <td>Timezone specified by {@link #getTimeZone()}.</td></tr> |
| * <tr><td>{@link Number}</td> <td>{@link NumberFormat}</td> <td></td></tr> |
| * <tr><td>{@link Unit}</td> <td>{@link UnitFormat}</td> <td></td></tr> |
| * <tr><td>{@link Range}</td> <td>{@link RangeFormat}</td> <td></td></tr> |
| * <tr><td>{@link Class}</td> <td>(internal)</td> <td></td></tr> |
| * </table> |
| * |
| * <div class="section">Sources and destinations</div> |
| * Since {@code CompoundFormat} may work on larger texts than the usual {@code Format} classes, |
| * it defines {@code parse} and {@code format} methods working with arbitrary {@link CharSequence} |
| * and {@link Appendable} instances. The standard {@code Format} methods redirect to the above-cited |
| * methods. |
| * |
| * <div class="section">Sub-classing</div> |
| * The abstract methods to be defined by subclasses are: |
| * <ul> |
| * <li>{@link #getValueType()}</li> |
| * <li>{@link #format(Object, Appendable)}</li> |
| * <li>{@link #parse(CharSequence, ParsePosition)}</li> |
| * </ul> |
| * |
| * <div class="note"><b>API note:</b> |
| * in the standard {@link Format} class, the {@code parse} methods either accept a {@link ParsePosition} argument |
| * and returns {@code null} on error, or does not take position argument and throws a {@link ParseException} on error. |
| * In this {@code CompoundFormat} class, the {@code parse} method both takes a {@code ParsePosition} argument and |
| * throws a {@code ParseException} on error. This allows both substring parsing and more accurate exception message |
| * in case of error.</div> |
| * |
| * @author Martin Desruisseaux (Geomatys) |
| * @version 1.0 |
| * |
| * @param <T> the base type of objects parsed and formatted by this class. |
| * |
| * @since 0.3 |
| * @module |
| */ |
| public abstract class CompoundFormat<T> extends Format implements Localized { |
| /** |
| * For cross-version compatibility. |
| */ |
| private static final long serialVersionUID = -689151528653024968L; |
| |
| /** |
| * The locale given at construction time, or {@link Locale#ROOT} (never {@code null}) for |
| * unlocalized format. See {@link #getLocale()} for more information on {@code ROOT} locale. |
| * |
| * @see #getLocale() |
| */ |
| private final Locale locale; |
| |
| /** |
| * The timezone given at construction time, or {@code null} for UTC. |
| * |
| * @see #getTimeZone() |
| */ |
| private final TimeZone timezone; |
| |
| /** |
| * The formats for smaller unit of information, created when first needed. |
| * {@code null} is used as a sentinel value meaning "no format". |
| */ |
| private transient Map<Class<?>, Format> formats; |
| |
| /** |
| * Creates a new format for the given locale. The given locale can be {@code null} or |
| * {@link Locale#ROOT} if this format shall parse and format "unlocalized" strings. |
| * See {@link #getLocale()} for more information about the {@code ROOT} locale. |
| * |
| * @param locale the locale for the new {@code Format}, or {@code null} for {@code Locale.ROOT}. |
| * @param timezone the timezone, or {@code null} for UTC. |
| */ |
| protected CompoundFormat(final Locale locale, final TimeZone timezone) { |
| this.locale = (locale != null) ? locale : Locale.ROOT; |
| this.timezone = timezone; |
| } |
| |
| /** |
| * Returns the locale used by this format. The returned value may be {@link Locale#ROOT} |
| * if this format does not apply any localization. The definition of "unlocalized string" |
| * is implementation-dependent, but some typical examples are: |
| * |
| * <ul> |
| * <li>Format {@link Number} instances using {@code toString()} instead than {@code NumberFormat}.</li> |
| * <li>Format {@link Date} instances using the ISO pattern instead than the English one.</li> |
| * </ul> |
| * |
| * @return the locale of this {@code Format}, or {@code Locale.ROOT} for unlocalized format. |
| */ |
| @Override |
| public Locale getLocale() { |
| return locale; |
| } |
| |
| /** |
| * Returns the locale for the given category. Subclasses may override this method in order to assign |
| * different roles to the different locale categories. A typical (but not mandatory) mapping is: |
| * |
| * <ul> |
| * <li>{@link java.util.Locale.Category#FORMAT} specifies the locale to use for numbers, dates and angles formatting.</li> |
| * <li>{@link java.util.Locale.Category#DISPLAY} specifies the locale to use for {@link org.opengis.util.CodeList} labels |
| * and {@link org.opengis.util.InternationalString} contents.</li> |
| * </ul> |
| * |
| * <div class="note"><b>Example:</b> |
| * The ISO 19162 (<cite>Well Known Text</cite>) standard requires a number format similar to the one defined by |
| * {@code Locale.ROOT} while it allows informative texts (remarks, <i>etc.</i>) to be formatted according the |
| * user's locale. Consequently {@code WKTFormat} fixes (usually) the locale for {@code Category.FORMAT} to |
| * {@code Locale.ROOT} and let {@code Category.DISPLAY} be any locale.</div> |
| * |
| * For subclasses that do not override this method, the default implementation returns {@link #getLocale()}. |
| * |
| * @param category the category for which a locale is desired. |
| * @return the locale for the given category (never {@code null}). |
| * |
| * @since 0.4 |
| */ |
| public Locale getLocale(final Locale.Category category) { |
| ArgumentChecks.ensureNonNull("category", category); |
| return getLocale(); |
| } |
| |
| /** |
| * Returns the timezone used by this format. |
| * |
| * @return the timezone used for this format, or UTC for unlocalized format. |
| */ |
| public TimeZone getTimeZone() { |
| return (timezone != null) ? (TimeZone) timezone.clone() : TimeZone.getTimeZone(UTC); |
| } |
| |
| /** |
| * Returns the base type of values parsed and formatted by this {@code Format} instance. |
| * The returned type may be a subclass of {@code <T>} if the format is configured in a way |
| * that restrict the kind value to be parsed. |
| * |
| * <div class="note"><b>Example:</b> |
| * <ul> |
| * <li>{@code StatisticsFormat} unconditionally returns {@code Statistics.class}.</li> |
| * <li>{@code TreeTableFormat} unconditionally returns {@code TreeTable.class}.</li> |
| * </ul> |
| * </div> |
| * |
| * @return the base type of values parsed and formatted by this {@code Format} instance. |
| */ |
| public abstract Class<? extends T> getValueType(); |
| |
| /** |
| * Creates an object from the given character sequence. |
| * The parsing begins at the index given by the {@code pos} argument. |
| * If parsing succeeds, then: |
| * |
| * <ul> |
| * <li>The {@code pos} {@linkplain ParsePosition#getIndex() index} is updated to the index |
| * after the last successfully parsed character.</li> |
| * <li>The parsed object is returned.</li> |
| * </ul> |
| * |
| * If parsing fails, then: |
| * |
| * <ul> |
| * <li>The {@code pos} index is left unchanged</li> |
| * <li>The {@code pos} {@linkplain ParsePosition#getErrorIndex() error index} |
| * is set to the beginning of the unparsable character sequence.</li> |
| * <li>One of the following actions is taken (at implementation choice): |
| * <ul> |
| * <li>this method returns {@code null}, or</li> |
| * <li>a {@code ParseException} is thrown with an {@linkplain ParseException#getErrorOffset() error offset} |
| * set to the index of the first unparsable character.</li> |
| * </ul> |
| * </li> |
| * </ul> |
| * |
| * <div class="note"><b>Note:</b> |
| * if a {@code ParseException} is thrown, its error offset is usually the same than the {@code ParsePosition} |
| * error index, but implementations are free to adopt a slightly different policy. For example |
| * if parsing of the {@code "30.0 40,0"} coordinate fails on the coma in the last number, then the {@code pos} |
| * {@linkplain ParsePosition#getErrorIndex() error index} may be set to 5 (the beginning of the {@code "40.0"} |
| * character sequence) or to 7 (the coma position), depending on the implementation.</div> |
| * |
| * Most implementations never return {@code null}. However some implementations may choose to return {@code null} |
| * if they can determine that the given text is not a supported format and reserve {@code ParseException} for the |
| * cases where the text seems to be the expected format but contains a malformed element. |
| * |
| * @param text the character sequence for the object to parse. |
| * @param pos the position where to start the parsing. |
| * On return, the position where the parsing stopped or where an error occurred. |
| * @return the parsed object, or {@code null} if the text is not recognized. |
| * @throws ParseException if an error occurred while parsing the object. |
| */ |
| public abstract T parse(CharSequence text, ParsePosition pos) throws ParseException; |
| |
| /** |
| * Creates an object from the given string representation, or returns {@code null} if an error |
| * occurred while parsing. The parsing begins at the index given by the {@code pos} argument. |
| * If parsing succeeds, then: |
| * |
| * <ul> |
| * <li>The {@code pos} {@linkplain ParsePosition#getIndex() index} is updated to the index |
| * after the last successfully parsed character.</li> |
| * <li>The parsed object is returned.</li> |
| * </ul> |
| * |
| * If parsing fails, then: |
| * |
| * <ul> |
| * <li>The {@code pos} index is left unchanged</li> |
| * <li>The {@code pos} {@linkplain ParsePosition#getErrorIndex() error index} |
| * is set to the index of the character where the error occurred.</li> |
| * <li>{@code null} is returned.</li> |
| * </ul> |
| * |
| * The default implementation delegates to {@link #parse(CharSequence, ParsePosition)}. |
| * |
| * @param text the string representation of the object to parse. |
| * @param pos the position where to start the parsing. |
| * @return the parsed object, or {@code null} if the given string can not be parsed. |
| */ |
| @Override |
| public T parseObject(final String text, final ParsePosition pos) { |
| try { |
| return parse(text, pos); |
| } catch (ParseException e) { |
| if (pos.getErrorIndex() < 0) { |
| pos.setErrorIndex(e.getErrorOffset()); |
| } |
| return null; |
| } |
| } |
| |
| /** |
| * Creates an object from the given string representation. |
| * The default implementation delegates to {@link #parse(CharSequence, ParsePosition)} |
| * and ensures that the given string has been fully used, ignoring trailing |
| * {@linkplain Character#isSpaceChar(int) spaces} and |
| * {@linkplain Character#isISOControl(int) ISO control characters}. |
| * |
| * <div class="note"><b>Note:</b> |
| * The usual SIS policy, as documented in the {@link org.apache.sis.util.CharSequences} class, is to test for |
| * whitespaces using the {@code Character.isWhitespace(…)} method. The combination of {@code isSpaceChar(…)} |
| * and {@code isISOControl(…)} done in this {@code parseObject(…)} method is more permissive since it encompasses |
| * all whitespace characters, plus non-breaking spaces and non-white ISO controls.</div> |
| * |
| * @param text the string representation of the object to parse. |
| * @return the parsed object. |
| * @throws ParseException if an error occurred while parsing the object. |
| */ |
| @Override |
| public T parseObject(final String text) throws ParseException { |
| final ParsePosition pos = new ParsePosition(0); |
| final T value = parse(text, pos); |
| if (value != null) { |
| final int length = text.length(); |
| int c, n=0, i=pos.getIndex(); |
| do { |
| if ((i += n) >= length) { |
| return value; |
| } |
| c = text.codePointAt(i); |
| n = Character.charCount(c); |
| } while (Character.isSpaceChar(c) || Character.isISOControl(c)); |
| pos.setErrorIndex(i); |
| } |
| throw new LocalizedParseException(getLocale(Locale.Category.DISPLAY), getValueType(), text, pos); |
| } |
| |
| /** |
| * Writes a textual representation of the given object in the given stream or buffer. |
| * |
| * @param object the object to format. |
| * @param toAppendTo where to format the object. |
| * @throws IOException if an error occurred while writing to the given appendable. |
| */ |
| public abstract void format(T object, Appendable toAppendTo) throws IOException; |
| |
| /** |
| * Writes a textual representation of the specified object in the given buffer. |
| * This method delegates its work to {@link #format(Object, Appendable)}, but |
| * without propagating {@link IOException}. The I/O exception should never |
| * occur since we are writing in a {@link StringBuffer}. |
| * |
| * <div class="note"><b>Note:</b> |
| * Strictly speaking, an {@link IOException} could still occur if a subclass overrides the above {@code format} |
| * method and performs some I/O operation outside the given {@link StringBuffer}. However this is not the intended |
| * usage of this class and implementers should avoid such unexpected I/O operation.</div> |
| * |
| * @param object the object to format. |
| * @param toAppendTo where to format the object. |
| * @param pos ignored in current implementation. |
| * @return the given buffer, returned for convenience. |
| */ |
| @Override |
| public StringBuffer format(final Object object, final StringBuffer toAppendTo, final FieldPosition pos) { |
| final Class<? extends T> valueType = getValueType(); |
| ArgumentChecks.ensureCanCast("object", valueType, object); |
| try { |
| format(valueType.cast(object), toAppendTo); |
| } catch (IOException e) { |
| /* |
| * Should never happen when writing into a StringBuffer, unless the user |
| * override the format(Object, Appendable) method. We do not rethrow an |
| * AssertionError because of this possibility. |
| */ |
| throw new UncheckedIOException(e); |
| } |
| return toAppendTo; |
| } |
| |
| /** |
| * Returns the format to use for parsing and formatting values of the given type. |
| * This method applies the following algorithm: |
| * |
| * <ol> |
| * <li>If a format is cached for the given type, return that format.</li> |
| * <li>Otherwise if a format can be {@linkplain #createFormat(Class) created} |
| * for the given type, cache the newly created format and return it.</li> |
| * <li>Otherwise do again the same checks for the {@linkplain Class#getSuperclass() super class}.</li> |
| * <li>If no format is found for a concrete class, search again for |
| * {@linkplain Classes#getAllInterfaces(Class) all implemented interfaces}.</li> |
| * <li>If no format can be created, return {@code null}.</li> |
| * </ol> |
| * |
| * See {@link #createFormat(Class)} for the list of value types recognized by the default |
| * {@code CompoundFormat} implementation. |
| * |
| * @param valueType the base type of values to parse or format, or {@code null} if unknown. |
| * @return the format to use for parsing and formatting values of the given type or any parent type, |
| * or {@code null} if none. |
| */ |
| protected Format getFormat(final Class<?> valueType) { |
| if (formats == null) { |
| formats = new IdentityHashMap<>(4); |
| } |
| Format format = formats.get(valueType); |
| if (format == null && !formats.containsKey(valueType)) { |
| format = createFormat(valueType); |
| if (format == null) { |
| Class<?> type = valueType; |
| Class<?>[] interfaces = null; |
| for (int i=-1;;) { |
| if (i >= 0 || (type = type.getSuperclass()) == null) { // Try parent classes first. |
| if (interfaces == null) { |
| interfaces = Classes.getAllInterfaces(valueType); // Try interfaces after we tried all parent classes. |
| } |
| if (++i >= interfaces.length) break; // No format found - stop the search with format = null. |
| type = interfaces[i]; |
| } |
| format = formats.get(type); |
| if (format != null) break; // Intentionally no formats.containsKey(type) check here. |
| format = createFormat(type); |
| if (format != null) { |
| formats.put(type, format); |
| break; |
| } |
| } |
| } |
| formats.put(valueType, format); // Store result even null. |
| } |
| return format; |
| } |
| |
| /** |
| * Creates a new format to use for parsing and formatting values of the given type. |
| * This method is invoked by {@link #getFormat(Class)} the first time that a format |
| * is needed for the given type. |
| * The class given in argument can be any of the classes listed in the "Value type" column below: |
| * |
| * <table class="sis"> |
| * <caption>Supported value types</caption> |
| * <tr><th>Value type</th> <th>Format type</th></tr> |
| * <tr><td>{@link DirectPosition}</td> <td>{@link org.apache.sis.geometry.CoordinateFormat}</td></tr> |
| * <tr><td>{@link Angle}</td> <td>{@link AngleFormat}</td></tr> |
| * <tr><td>{@link Date}</td> <td>{@link DateFormat}</td></tr> |
| * <tr><td>{@link Number}</td> <td>{@link NumberFormat}</td></tr> |
| * <tr><td>{@link Unit}</td> <td>{@link UnitFormat}</td></tr> |
| * <tr><td>{@link Range}</td> <td>{@link RangeFormat}</td></tr> |
| * <tr><td>{@link Class}</td> <td>(internal)</td></tr> |
| * </table> |
| * |
| * Subclasses can override this method for adding more types, or for configuring the |
| * newly created {@link Format} instances. Note that implementations shall check the |
| * type using the {@code expected == type} comparator, not |
| * <code>expected.{@linkplain Class#isAssignableFrom(Class) isAssignableFrom}(type)</code>, |
| * because the check for parent types is done by the {@link #getFormat(Class)} method. |
| * This approach allows subclasses to create specialized formats for different value |
| * sub-types. For example a subclass may choose to format {@link Double} values differently |
| * than other types of number. |
| * |
| * @param valueType the base type of values to parse or format. |
| * @return the format to use for parsing of formatting values of the given type, or {@code null} if none. |
| */ |
| protected Format createFormat(final Class<?> valueType) { |
| /* |
| * The first case below is an apparent exception to the 'expected == type' rule |
| * documented in this method javadoc. But actually it is not, since the call to |
| * DefaultFormat.getInstance(…) will indirectly perform this kind of comparison. |
| */ |
| final Locale locale = getLocale(Locale.Category.FORMAT); |
| if (Number.class.isAssignableFrom(valueType)) { |
| if (Locale.ROOT.equals(locale)) { |
| return DefaultFormat.getInstance(valueType); |
| } else if (valueType == Number.class) { |
| return NumberFormat.getInstance(locale); |
| } else if (Numbers.isInteger(valueType)) { |
| return NumberFormat.getIntegerInstance(locale); |
| } |
| } else if (valueType == Date.class) { |
| final DateFormat format; |
| if (!Locale.ROOT.equals(locale)) { |
| format = DateFormat.getDateTimeInstance(DateFormat.DEFAULT, DateFormat.DEFAULT, locale); |
| } else { |
| format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT); |
| } |
| format.setTimeZone(getTimeZone()); |
| return format; |
| } else if (valueType == Angle.class) { |
| return AngleFormat.getInstance(locale); |
| } else if (valueType == Unit.class) { |
| return new UnitFormat(locale); |
| } else if (valueType == Range.class) { |
| return new RangeFormat(locale); |
| } else if (valueType == DirectPosition.class) { |
| return MetadataServices.getInstance().createCoordinateFormat(locale, getTimeZone()); |
| } else if (valueType == Class.class) { |
| return ClassFormat.INSTANCE; |
| } else { |
| final Class<?>[] interfaces = valueType.getInterfaces(); |
| if (ArraysExt.contains(interfaces, IdentifiedObject.class)) { |
| return new IdentifiedObjectFormat(locale); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Returns a clone of this format. |
| * |
| * @return a clone of this format. |
| */ |
| @Override |
| public CompoundFormat<T> clone() { |
| @SuppressWarnings("unchecked") |
| final CompoundFormat<T> clone = (CompoundFormat<T>) super.clone(); |
| if (clone.formats != null) { |
| clone.formats = new IdentityHashMap<>(clone.formats); |
| for (final Map.Entry<Class<?>,Format> entry : clone.formats.entrySet()) { |
| entry.setValue((Format) entry.getValue().clone()); |
| } |
| } |
| return clone; |
| } |
| |
| /* |
| * Do not override equals(Object) and hashCode(). They are unlikely to be needed since we |
| * do not expect CompoundFormats to be used as keys in HashMap, especially since they are |
| * mutable. Furthermore it is difficult to check for equality since the values in the |
| * 'formats' map are created only when needed and we don't know how subclasses will |
| * configure them. |
| */ |
| } |