core/sis-utility/src/main/java/org/apache/sis/io/CompoundFormat.java - sis - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.sis.io;

 import java.util.Map;
 import java.util.IdentityHashMap;
 import java.util.Locale;
 import java.util.TimeZone;
 import java.util.Date;
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.text.Format;
 import java.text.DateFormat;
 import java.text.NumberFormat;
 import java.text.FieldPosition;
 import java.text.ParsePosition;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import javax.measure.Unit;

 import org.opengis.referencing.IdentifiedObject;
 import org.opengis.geometry.DirectPosition;
 import org.apache.sis.measure.Angle;
 import org.apache.sis.measure.AngleFormat;
 import org.apache.sis.measure.Range;
 import org.apache.sis.measure.RangeFormat;
 import org.apache.sis.measure.UnitFormat;
 import org.apache.sis.util.Numbers;
 import org.apache.sis.util.Classes;
 import org.apache.sis.util.Localized;
 import org.apache.sis.util.ArraysExt;
 import org.apache.sis.util.ArgumentChecks;
 import org.apache.sis.internal.util.MetadataServices;
 import org.apache.sis.internal.util.LocalizedParseException;

 import static org.apache.sis.internal.util.StandardDateFormat.UTC;


 /**
  * Base class of {@link Format} implementations which delegate part of their work to other
  * {@code Format} instances. {@code CompoundFormat} subclasses typically work on relatively
  * large blocks of data, for example a metadata tree or a <cite>Well Known Text</cite> (WKT).
  * Those blocks of data usually contain smaller elements like numbers and dates, whose parsing
  * and formatting can be delegated to {@link NumberFormat} and {@link DateFormat} respectively.
  * Subclasses can obtain instances of those formats by call to {@link #getFormat(Class)} where
  * the argument is the type of the value to parse or format.
  * {@code CompoundFormat} supports at least the following value types, but subclasses may add more types:
  *
  * <table class="sis">
  *   <caption>Supported value types</caption>
  *   <tr><th>Value type</th>              <th>Format type</th>                                      <th>Remarks</th></tr>
  *   <tr><td>{@link DirectPosition}</td>  <td>{@link org.apache.sis.geometry.CoordinateFormat}</td> <td>Requires {@code sis-referencing} module.</td></tr>
  *   <tr><td>{@link Angle}</td>           <td>{@link AngleFormat}</td>                              <td></td></tr>
  *   <tr><td>{@link Date}</td>            <td>{@link DateFormat}</td>                               <td>Timezone specified by {@link #getTimeZone()}.</td></tr>
  *   <tr><td>{@link Number}</td>          <td>{@link NumberFormat}</td>                             <td></td></tr>
  *   <tr><td>{@link Unit}</td>            <td>{@link UnitFormat}</td>                               <td></td></tr>
  *   <tr><td>{@link Range}</td>           <td>{@link RangeFormat}</td>                              <td></td></tr>
  *   <tr><td>{@link Class}</td>           <td>(internal)</td>                                       <td></td></tr>
  * </table>
  *
  * <div class="section">Sources and destinations</div>
  * Since {@code CompoundFormat} may work on larger texts than the usual {@code Format} classes,
  * it defines {@code parse} and {@code format} methods working with arbitrary {@link CharSequence}
  * and {@link Appendable} instances. The standard {@code Format} methods redirect to the above-cited
  * methods.
  *
  * <div class="section">Sub-classing</div>
  * The abstract methods to be defined by subclasses are:
  * <ul>
  *   <li>{@link #getValueType()}</li>
  *   <li>{@link #format(Object, Appendable)}</li>
  *   <li>{@link #parse(CharSequence, ParsePosition)}</li>
  * </ul>
  *
  * <div class="note"><b>API note:</b>
  * in the standard {@link Format} class, the {@code parse} methods either accept a {@link ParsePosition} argument
  * and returns {@code null} on error, or does not take position argument and throws a {@link ParseException} on error.
  * In this {@code CompoundFormat} class, the {@code parse} method both takes a {@code ParsePosition} argument and
  * throws a {@code ParseException} on error. This allows both substring parsing and more accurate exception message
  * in case of error.</div>
  *
  * @author  Martin Desruisseaux (Geomatys)
  * @version 1.0
  *
  * @param <T>  the base type of objects parsed and formatted by this class.
  *
  * @since 0.3
  * @module
  */
 public abstract class CompoundFormat<T> extends Format implements Localized {
     /**
      * For cross-version compatibility.
      */
     private static final long serialVersionUID = -689151528653024968L;

     /**
      * The locale given at construction time, or {@link Locale#ROOT} (never {@code null}) for
      * unlocalized format. See {@link #getLocale()} for more information on {@code ROOT} locale.
      *
      * @see #getLocale()
      */
     private final Locale locale;

     /**
      * The timezone given at construction time, or {@code null} for UTC.
      *
      * @see #getTimeZone()
      */
     private final TimeZone timezone;

     /**
      * The formats for smaller unit of information, created when first needed.
      * {@code null} is used as a sentinel value meaning "no format".
      */
     private transient Map<Class<?>, Format> formats;

     /**
      * Creates a new format for the given locale. The given locale can be {@code null} or
      * {@link Locale#ROOT} if this format shall parse and format "unlocalized" strings.
      * See {@link #getLocale()} for more information about the {@code ROOT} locale.
      *
      * @param  locale    the locale for the new {@code Format}, or {@code null} for {@code Locale.ROOT}.
      * @param  timezone  the timezone, or {@code null} for UTC.
      */
     protected CompoundFormat(final Locale locale, final TimeZone timezone) {
         this.locale   = (locale != null) ? locale : Locale.ROOT;
         this.timezone = timezone;
     }

     /**
      * Returns the locale used by this format. The returned value may be {@link Locale#ROOT}
      * if this format does not apply any localization. The definition of "unlocalized string"
      * is implementation-dependent, but some typical examples are:
      *
      * <ul>
      *   <li>Format {@link Number} instances using {@code toString()} instead than {@code NumberFormat}.</li>
      *   <li>Format {@link Date} instances using the ISO pattern instead than the English one.</li>
      * </ul>
      *
      * @return the locale of this {@code Format}, or {@code Locale.ROOT} for unlocalized format.
      */
     @Override
     public Locale getLocale() {
         return locale;
     }

     /**
      * Returns the locale for the given category. Subclasses may override this method in order to assign
      * different roles to the different locale categories. A typical (but not mandatory) mapping is:
      *
      * <ul>
      *   <li>{@link java.util.Locale.Category#FORMAT} specifies the locale to use for numbers, dates and angles formatting.</li>
      *   <li>{@link java.util.Locale.Category#DISPLAY} specifies the locale to use for {@link org.opengis.util.CodeList} labels
      *       and {@link org.opengis.util.InternationalString} contents.</li>
      * </ul>
      *
      * <div class="note"><b>Example:</b>
      * The ISO 19162 (<cite>Well Known Text</cite>) standard requires a number format similar to the one defined by
      * {@code Locale.ROOT} while it allows informative texts (remarks, <i>etc.</i>) to be formatted according the
      * user's locale. Consequently {@code WKTFormat} fixes (usually) the locale for {@code Category.FORMAT} to
      * {@code Locale.ROOT} and let {@code Category.DISPLAY} be any locale.</div>
      *
      * For subclasses that do not override this method, the default implementation returns {@link #getLocale()}.
      *
      * @param  category  the category for which a locale is desired.
      * @return the locale for the given category (never {@code null}).
      *
      * @since 0.4
      */
     public Locale getLocale(final Locale.Category category) {
         ArgumentChecks.ensureNonNull("category", category);
         return getLocale();
     }

     /**
      * Returns the timezone used by this format.
      *
      * @return the timezone used for this format, or UTC for unlocalized format.
      */
     public TimeZone getTimeZone() {
         return (timezone != null) ? (TimeZone) timezone.clone() : TimeZone.getTimeZone(UTC);
     }

     /**
      * Returns the base type of values parsed and formatted by this {@code Format} instance.
      * The returned type may be a subclass of {@code <T>} if the format is configured in a way
      * that restrict the kind value to be parsed.
      *
      * <div class="note"><b>Example:</b>
      *   <ul>
      *     <li>{@code StatisticsFormat} unconditionally returns {@code Statistics.class}.</li>
      *     <li>{@code TreeTableFormat} unconditionally returns {@code TreeTable.class}.</li>
      *   </ul>
      * </div>
      *
      * @return the base type of values parsed and formatted by this {@code Format} instance.
      */
     public abstract Class<? extends T> getValueType();

     /**
      * Creates an object from the given character sequence.
      * The parsing begins at the index given by the {@code pos} argument.
      * If parsing succeeds, then:
      *
      * <ul>
      *   <li>The {@code pos} {@linkplain ParsePosition#getIndex() index} is updated to the index
      *       after the last successfully parsed character.</li>
      *   <li>The parsed object is returned.</li>
      * </ul>
      *
      * If parsing fails, then:
      *
      * <ul>
      *   <li>The {@code pos} index is left unchanged</li>
      *   <li>The {@code pos} {@linkplain ParsePosition#getErrorIndex() error index}
      *       is set to the beginning of the unparsable character sequence.</li>
      *   <li>One of the following actions is taken (at implementation choice):
      *     <ul>
      *       <li>this method returns {@code null}, or</li>
      *       <li>a {@code ParseException} is thrown with an {@linkplain ParseException#getErrorOffset() error offset}
      *           set to the index of the first unparsable character.</li>
      *     </ul>
      *   </li>
      * </ul>
      *
      * <div class="note"><b>Note:</b>
      * if a {@code ParseException} is thrown, its error offset is usually the same than the {@code ParsePosition}
      * error index, but implementations are free to adopt a slightly different policy. For example
      * if parsing of the {@code "30.0 40,0"} coordinate fails on the coma in the last number, then the {@code pos}
      * {@linkplain ParsePosition#getErrorIndex() error index} may be set to 5 (the beginning of the {@code "40.0"}
      * character sequence) or to 7 (the coma position), depending on the implementation.</div>
      *
      * Most implementations never return {@code null}. However some implementations may choose to return {@code null}
      * if they can determine that the given text is not a supported format and reserve {@code ParseException} for the
      * cases where the text seems to be the expected format but contains a malformed element.
      *
      * @param  text  the character sequence for the object to parse.
      * @param  pos   the position where to start the parsing.
      *               On return, the position where the parsing stopped or where an error occurred.
      * @return the parsed object, or {@code null} if the text is not recognized.
      * @throws ParseException if an error occurred while parsing the object.
      */
     public abstract T parse(CharSequence text, ParsePosition pos) throws ParseException;

     /**
      * Creates an object from the given string representation, or returns {@code null} if an error
      * occurred while parsing. The parsing begins at the index given by the {@code pos} argument.
      * If parsing succeeds, then:
      *
      * <ul>
      *   <li>The {@code pos} {@linkplain ParsePosition#getIndex() index} is updated to the index
      *       after the last successfully parsed character.</li>
      *   <li>The parsed object is returned.</li>
      * </ul>
      *
      * If parsing fails, then:
      *
      * <ul>
      *   <li>The {@code pos} index is left unchanged</li>
      *   <li>The {@code pos} {@linkplain ParsePosition#getErrorIndex() error index}
      *       is set to the index of the character where the error occurred.</li>
      *   <li>{@code null} is returned.</li>
      * </ul>
      *
      * The default implementation delegates to {@link #parse(CharSequence, ParsePosition)}.
      *
      * @param  text  the string representation of the object to parse.
      * @param  pos   the position where to start the parsing.
      * @return the parsed object, or {@code null} if the given string can not be parsed.
      */
     @Override
     public T parseObject(final String text, final ParsePosition pos) {
         try {
             return parse(text, pos);
         } catch (ParseException e) {
             if (pos.getErrorIndex() < 0) {
                 pos.setErrorIndex(e.getErrorOffset());
             }
             return null;
         }
     }

     /**
      * Creates an object from the given string representation.
      * The default implementation delegates to {@link #parse(CharSequence, ParsePosition)}
      * and ensures that the given string has been fully used, ignoring trailing
      * {@linkplain Character#isSpaceChar(int) spaces} and
      * {@linkplain Character#isISOControl(int) ISO control characters}.
      *
      * <div class="note"><b>Note:</b>
      * The usual SIS policy, as documented in the {@link org.apache.sis.util.CharSequences} class, is to test for
      * whitespaces using the {@code Character.isWhitespace(…)} method. The combination of {@code isSpaceChar(…)}
      * and {@code isISOControl(…)} done in this {@code parseObject(…)} method is more permissive since it encompasses
      * all whitespace characters, plus non-breaking spaces and non-white ISO controls.</div>
      *
      * @param  text  the string representation of the object to parse.
      * @return the parsed object.
      * @throws ParseException if an error occurred while parsing the object.
      */
     @Override
     public T parseObject(final String text) throws ParseException {
         final ParsePosition pos = new ParsePosition(0);
         final T value = parse(text, pos);
         if (value != null) {
             final int length = text.length();
             int c, n=0, i=pos.getIndex();
             do {
                 if ((i += n) >= length) {
                     return value;
                 }
                 c = text.codePointAt(i);
                 n = Character.charCount(c);
             } while (Character.isSpaceChar(c) || Character.isISOControl(c));
             pos.setErrorIndex(i);
         }
         throw new LocalizedParseException(getLocale(Locale.Category.DISPLAY), getValueType(), text, pos);
     }

     /**
      * Writes a textual representation of the given object in the given stream or buffer.
      *
      * @param  object      the object to format.
      * @param  toAppendTo  where to format the object.
      * @throws IOException if an error occurred while writing to the given appendable.
      */
     public abstract void format(T object, Appendable toAppendTo) throws IOException;

     /**
      * Writes a textual representation of the specified object in the given buffer.
      * This method delegates its work to {@link #format(Object, Appendable)}, but
      * without propagating {@link IOException}. The I/O exception should never
      * occur since we are writing in a {@link StringBuffer}.
      *
      * <div class="note"><b>Note:</b>
      * Strictly speaking, an {@link IOException} could still occur if a subclass overrides the above {@code format}
      * method and performs some I/O operation outside the given {@link StringBuffer}. However this is not the intended
      * usage of this class and implementers should avoid such unexpected I/O operation.</div>
      *
      * @param  object      the object to format.
      * @param  toAppendTo  where to format the object.
      * @param  pos         ignored in current implementation.
      * @return the given buffer, returned for convenience.
      */
     @Override
     public StringBuffer format(final Object object, final StringBuffer toAppendTo, final FieldPosition pos) {
         final Class<? extends T> valueType = getValueType();
         ArgumentChecks.ensureCanCast("object", valueType, object);
         try {
             format(valueType.cast(object), toAppendTo);
         } catch (IOException e) {
             /*
              * Should never happen when writing into a StringBuffer, unless the user
              * override the format(Object, Appendable) method.  We do not rethrow an
              * AssertionError because of this possibility.
              */
             throw new UncheckedIOException(e);
         }
         return toAppendTo;
     }

     /**
      * Returns the format to use for parsing and formatting values of the given type.
      * This method applies the following algorithm:
      *
      * <ol>
      *   <li>If a format is cached for the given type, return that format.</li>
      *   <li>Otherwise if a format can be {@linkplain #createFormat(Class) created}
      *       for the given type, cache the newly created format and return it.</li>
      *   <li>Otherwise do again the same checks for the {@linkplain Class#getSuperclass() super class}.</li>
      *   <li>If no format is found for a concrete class, search again for
      *       {@linkplain Classes#getAllInterfaces(Class) all implemented interfaces}.</li>
      *   <li>If no format can be created, return {@code null}.</li>
      * </ol>
      *
      * See {@link #createFormat(Class)} for the list of value types recognized by the default
      * {@code CompoundFormat} implementation.
      *
      * @param  valueType  the base type of values to parse or format, or {@code null} if unknown.
      * @return the format to use for parsing and formatting values of the given type or any parent type,
      *         or {@code null} if none.
      */
     protected Format getFormat(final Class<?> valueType) {
         if (formats == null) {
             formats = new IdentityHashMap<>(4);
         }
         Format format = formats.get(valueType);
         if (format == null && !formats.containsKey(valueType)) {
             format = createFormat(valueType);
             if (format == null) {
                 Class<?>   type = valueType;
                 Class<?>[] interfaces = null;
                 for (int i=-1;;) {
                     if (i >= 0 || (type = type.getSuperclass()) == null) {      // Try parent classes first.
                         if (interfaces == null) {
                             interfaces = Classes.getAllInterfaces(valueType);   // Try interfaces after we tried all parent classes.
                         }
                         if (++i >= interfaces.length) break;                    // No format found - stop the search with format = null.
                         type = interfaces[i];
                     }
                     format = formats.get(type);
                     if (format != null) break;                                  // Intentionally no formats.containsKey(type) check here.
                     format = createFormat(type);
                     if (format != null) {
                         formats.put(type, format);
                         break;
                     }
                 }
             }
             formats.put(valueType, format);                                     // Store result even null.
         }
         return format;
     }

     /**
      * Creates a new format to use for parsing and formatting values of the given type.
      * This method is invoked by {@link #getFormat(Class)} the first time that a format
      * is needed for the given type.
      * The class given in argument can be any of the classes listed in the "Value type" column below:
      *
      * <table class="sis">
      *   <caption>Supported value types</caption>
      *   <tr><th>Value type</th>              <th>Format type</th></tr>
      *   <tr><td>{@link DirectPosition}</td>  <td>{@link org.apache.sis.geometry.CoordinateFormat}</td></tr>
      *   <tr><td>{@link Angle}</td>           <td>{@link AngleFormat}</td></tr>
      *   <tr><td>{@link Date}</td>            <td>{@link DateFormat}</td></tr>
      *   <tr><td>{@link Number}</td>          <td>{@link NumberFormat}</td></tr>
      *   <tr><td>{@link Unit}</td>            <td>{@link UnitFormat}</td></tr>
      *   <tr><td>{@link Range}</td>           <td>{@link RangeFormat}</td></tr>
      *   <tr><td>{@link Class}</td>           <td>(internal)</td></tr>
      * </table>
      *
      * Subclasses can override this method for adding more types, or for configuring the
      * newly created {@link Format} instances. Note that implementations shall check the
      * type using the {@code expected == type} comparator, not
      * <code>expected.{@linkplain Class#isAssignableFrom(Class) isAssignableFrom}(type)</code>,
      * because the check for parent types is done by the {@link #getFormat(Class)} method.
      * This approach allows subclasses to create specialized formats for different value
      * sub-types. For example a subclass may choose to format {@link Double} values differently
      * than other types of number.
      *
      * @param  valueType  the base type of values to parse or format.
      * @return the format to use for parsing of formatting values of the given type, or {@code null} if none.
      */
     protected Format createFormat(final Class<?> valueType) {
         /*
          * The first case below is an apparent exception to the 'expected == type' rule
          * documented in this method javadoc. But actually it is not, since the call to
          * DefaultFormat.getInstance(…) will indirectly perform this kind of comparison.
          */
         final Locale locale = getLocale(Locale.Category.FORMAT);
         if (Number.class.isAssignableFrom(valueType)) {
             if (Locale.ROOT.equals(locale)) {
                 return DefaultFormat.getInstance(valueType);
             } else if (valueType == Number.class) {
                 return NumberFormat.getInstance(locale);
             } else if (Numbers.isInteger(valueType)) {
                 return NumberFormat.getIntegerInstance(locale);
             }
         } else if (valueType == Date.class) {
             final DateFormat format;
             if (!Locale.ROOT.equals(locale)) {
                 format = DateFormat.getDateTimeInstance(DateFormat.DEFAULT, DateFormat.DEFAULT, locale);
             } else {
                 format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT);
             }
             format.setTimeZone(getTimeZone());
             return format;
         } else if (valueType == Angle.class) {
             return AngleFormat.getInstance(locale);
         } else if (valueType == Unit.class) {
             return new UnitFormat(locale);
         } else if (valueType == Range.class) {
             return new RangeFormat(locale);
         } else if (valueType == DirectPosition.class) {
             return MetadataServices.getInstance().createCoordinateFormat(locale, getTimeZone());
         } else if (valueType == Class.class) {
             return ClassFormat.INSTANCE;
         } else {
             final Class<?>[] interfaces = valueType.getInterfaces();
             if (ArraysExt.contains(interfaces, IdentifiedObject.class)) {
                 return new IdentifiedObjectFormat(locale);
             }
         }
         return null;
     }

     /**
      * Returns a clone of this format.
      *
      * @return a clone of this format.
      */
     @Override
     public CompoundFormat<T> clone() {
         @SuppressWarnings("unchecked")
         final CompoundFormat<T> clone = (CompoundFormat<T>) super.clone();
         if (clone.formats != null) {
             clone.formats = new IdentityHashMap<>(clone.formats);
             for (final Map.Entry<Class<?>,Format> entry : clone.formats.entrySet()) {
                 entry.setValue((Format) entry.getValue().clone());
             }
         }
         return clone;
     }

     /*
      * Do not override equals(Object) and hashCode(). They are unlikely to be needed since we
      * do not expect CompoundFormats to be used as keys in HashMap, especially since they are
      * mutable. Furthermore it is difficult to check for equality since the values in the
      * 'formats' map are created only when needed and we don't know how subclasses will
      * configure them.
      */
 }