blob: 9c582f84c2d31ad652238ec85d25374f861754e1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sis.internal.storage.io;
import java.util.Locale;
import java.io.File;
import java.io.FileInputStream;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URISyntaxException;
import java.net.MalformedURLException;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.OpenOption;
import java.nio.file.StandardOpenOption;
import java.nio.file.FileSystemNotFoundException;
import java.nio.charset.StandardCharsets;
import javax.imageio.stream.ImageInputStream;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamReader;
import org.apache.sis.util.CharSequences;
import org.apache.sis.util.Exceptions;
import org.apache.sis.util.Static;
import org.apache.sis.util.resources.Errors;
import org.apache.sis.internal.storage.Resources;
/**
* Utility methods related to I/O operations. Many methods in this class accept arbitrary {@link Object} argument
* and perform a sequence of {@code instanceof} checks. Since this approach provides no type safety and since the
* sequence of {@code instanceof} checks is somewhat arbitrary, those methods can not be in public API.
*
* <p>Unless otherwise specified, giving an instance of unknown type or a {@code null} value cause the methods to
* return {@code null}. No exception is thrown for unknown type - callers must check that the return value is not
* null. However exceptions may be thrown for malformed URI or URL.</p>
*
* @author Martin Desruisseaux (Geomatys)
* @author Johann Sorel (Geomatys)
* @version 0.8
* @since 0.3
* @module
*/
public final class IOUtilities extends Static {
/**
* Do not allow instantiation of this class.
*/
private IOUtilities() {
}
/**
* Returns the filename from a {@link Path}, {@link File}, {@link URL}, {@link URI} or {@link CharSequence}
* instance. If the given argument is specialized type like {@code Path} or {@code File}, then this method uses
* dedicated API like {@link Path#getFileName()}. Otherwise this method gets a string representation of the path
* and returns the part after the last {@code '/'} or platform-dependent name separator character, if any.
*
* @param path the path as an instance of one of the above-cited types, or {@code null}.
* @return the filename in the given path, or {@code null} if the given object is null or of unknown type.
*/
public static String filename(final Object path) {
return part(path, false);
}
/**
* Returns the filename extension (without leading dot) from a {@link Path}, {@link File}, {@link URL},
* {@link URI} or {@link CharSequence} instance. If no extension is found, returns an empty string.
* If the given object is of unknown type, return {@code null}.
*
* @param path the path as an instance of one of the above-cited types, or {@code null}.
* @return the extension in the given path, or an empty string if none, or {@code null}
* if the given object is null or of unknown type.
*/
public static String extension(final Object path) {
return part(path, true);
}
/**
* Implementation of {@link #filename(Object)} and {@link #extension(Object)} methods.
*/
private static String part(final Object path, final boolean extension) {
int fromIndex = 0;
final String name;
if (path instanceof File) {
name = ((File) path).getName();
} else if (path instanceof Path) {
name = ((Path) path).getFileName().toString();
} else {
char separator = '/';
if (path instanceof URL) {
name = ((URL) path).getPath();
} else if (path instanceof URI) {
final URI uri = (URI) path;
name = uri.isOpaque() ? uri.getSchemeSpecificPart() : uri.getPath();
} else if (path instanceof CharSequence) {
name = path.toString();
separator = File.separatorChar;
} else {
return null;
}
fromIndex = name.lastIndexOf('/') + 1;
if (separator != '/') {
// Search for platform-specific character only if the object is neither a URL or a URI.
fromIndex = Math.max(fromIndex, CharSequences.lastIndexOf(name, separator, fromIndex, name.length()) + 1);
}
}
if (extension) {
fromIndex = CharSequences.lastIndexOf(name, '.', fromIndex, name.length()) + 1;
if (fromIndex <= 1) {
// If the dot is the first character, do not consider as a filename extension.
return "";
}
}
return name.substring(fromIndex);
}
/**
* Returns a string representation of the given path, or {@code null} if none. The current implementation
* recognizes only the {@link Path}, {@link File}, {@link URL}, {@link URI} or {@link CharSequence} types.
*
* @param path the path for which to return a string representation.
* @return the string representation, or {@code null} if none.
*/
public static String toString(final Object path) {
/*
* For the following types, the string that we want can be obtained only by toString(),
* or the class is final so we know that the toString(à behavior can not be changed.
*/
if (path instanceof CharSequence || path instanceof Path || path instanceof URL || path instanceof URI) {
return path.toString();
}
/*
* While toString() would work too on the default implementation, the following
* type is not final. So we are better to invoke the dedicated method.
*/
if (path instanceof File) {
return ((File) path).getPath();
}
return null;
}
/**
* Returns the given path without the directories and without the extension.
* For example if the given path is {@code "/Users/name/Map.png"}, then this
* method returns {@code "Map"}.
*
* @param path the path from which to get the filename without extension, or {@code null}.
* @return the filename without extension, or {@code null} if none.
*/
public static String filenameWithoutExtension(String path) {
if (path != null) {
int s = path.lastIndexOf(File.separatorChar);
if (s < 0 && File.separatorChar != '/') {
s = path.lastIndexOf('/');
}
int e = path.lastIndexOf('.');
if (e <= ++s) {
e = path.length();
}
path = path.substring(s, e);
}
return path;
}
/**
* Encodes the characters that are not legal for the {@link URI#URI(String)} constructor.
* Note that in addition to unreserved characters ("{@code _-!.~'()*}"), the reserved
* characters ("{@code ?/[]@}") and the punctuation characters ("{@code ,;:$&+=}")
* are left unchanged, so they will be processed with their special meaning by the
* URI constructor.
*
* <p>The current implementations replaces only the space characters, control characters
* and the {@code %} character. Future versions may replace more characters as we learn
* from experience.</p>
*
* @param path the path to encode, or {@code null}.
* @return the encoded path, or {@code null} if and only if the given path was null.
*/
public static String encodeURI(final String path) {
if (path == null) {
return null;
}
StringBuilder buffer = null;
final int length = path.length();
for (int i=0; i<length;) {
final int c = path.codePointAt(i);
final int n = Character.charCount(c);
if (!Character.isSpaceChar(c) && !Character.isISOControl(c) && c != '%') {
/*
* The character is valid, or is punction character, or is a reserved character.
* All those characters should be handled properly by the URI(String) constructor.
*/
if (buffer != null) {
buffer.appendCodePoint(c);
}
} else {
/*
* The character is invalid, so we need to escape it. Note that the encoding
* is fixed to UTF-8 as of java.net.URI specification (see its class javadoc).
*/
if (buffer == null) {
buffer = new StringBuilder(path);
buffer.setLength(i);
}
for (final byte b : path.substring(i, i+n).getBytes(StandardCharsets.UTF_8)) {
buffer.append('%');
final String hex = Integer.toHexString(Byte.toUnsignedInt(b)).toUpperCase(Locale.ROOT);
if (hex.length() < 2) {
buffer.append('0');
}
buffer.append(hex);
}
}
i += n;
}
return (buffer != null) ? buffer.toString() : path;
}
/**
* Converts a {@link URL} to a {@link URI}. This is equivalent to a call to the standard {@link URL#toURI()}
* method, except for the following functionalities:
*
* <ul>
* <li>Optionally decodes the {@code "%XX"} sequences, where {@code "XX"} is a number.</li>
* <li>Converts various exceptions into subclasses of {@link IOException}.</li>
* </ul>
*
* @param url the URL to convert, or {@code null}.
* @param encoding if the URL is encoded in a {@code application/x-www-form-urlencoded} MIME format,
* the character encoding (normally {@code "UTF-8"}). If the URL is not encoded,
* then {@code null}.
* @return the URI for the given URL, or {@code null} if the given URL was null.
* @throws IOException if the URL can not be converted to a URI.
*
* @see URI#URI(String)
*/
public static URI toURI(final URL url, final String encoding) throws IOException {
if (url == null) {
return null;
}
/*
* Convert the URL to a URI, taking in account the encoding if any.
*
* Note: URL.toURI() is implemented as new URI(URL.toString()) where toString()
* delegates to toExternalForm(), and all those methods are final. So we really
* don't lost anything by doing those steps ourself.
*/
String path = url.toExternalForm();
if (encoding != null) {
path = URLDecoder.decode(path, encoding);
}
path = encodeURI(path);
try {
return new URI(path);
} catch (URISyntaxException cause) {
/*
* Occurs only if the URL is not compliant with RFC 2396. Otherwise every URL
* should succeed, so a failure can actually be considered as a malformed URL.
*/
throw (MalformedURLException) new MalformedURLException(Exceptions.formatChainedMessages(null,
Errors.format(Errors.Keys.IllegalArgumentValue_2, "URL", path), cause)).initCause(cause);
}
}
/**
* Converts a {@link URL} to a {@link File}. This is equivalent to a call to the standard
* {@link URL#toURI()} method followed by a call to the {@link File#File(URI)} constructor,
* except for the following functionalities:
*
* <ul>
* <li>Optionally decodes the {@code "%XX"} sequences, where {@code "XX"} is a number.</li>
* <li>Converts various exceptions into subclasses of {@link IOException}.</li>
* </ul>
*
* @param url the URL to convert, or {@code null}.
* @param encoding if the URL is encoded in a {@code application/x-www-form-urlencoded} MIME format,
* the character encoding (normally {@code "UTF-8"}). If the URL is not encoded,
* then {@code null}.
* @return the file for the given URL, or {@code null} if the given URL was null.
* @throws IOException if the URL can not be converted to a file.
*
* @see File#File(URI)
*/
public static File toFile(final URL url, final String encoding) throws IOException {
if (url == null) {
return null;
}
final URI uri = toURI(url, encoding);
/*
* We really want to call the File constructor expecting a URI argument,
* not the constructor expecting a String argument, because the one for
* the URI argument performs additional platform-specific parsing.
*/
try {
return new File(uri);
} catch (IllegalArgumentException cause) {
/*
* Typically happen when the URI scheme is not "file". But may also happen if the
* URI contains fragment that can not be represented in a File (e.g. a Query part).
* The IllegalArgumentException does not allow us to distinguish those cases.
*/
throw new IOException(Exceptions.formatChainedMessages(null,
Errors.format(Errors.Keys.IllegalArgumentValue_2, "URL", url), cause), cause);
}
}
/**
* Converts a {@link URL} to a {@link Path}. This is equivalent to a call to the standard
* {@link URL#toURI()} method followed by a call to the {@link Paths#get(URI)} static method,
* except for the following functionalities:
*
* <ul>
* <li>Optionally decodes the {@code "%XX"} sequences, where {@code "XX"} is a number.</li>
* <li>Converts various exceptions into subclasses of {@link IOException}.</li>
* </ul>
*
* @param url the URL to convert, or {@code null}.
* @param encoding if the URL is encoded in a {@code application/x-www-form-urlencoded} MIME format,
* the character encoding (normally {@code "UTF-8"}). If the URL is not encoded,
* then {@code null}.
* @return the path for the given URL, or {@code null} if the given URL was null.
* @throws IOException if the URL can not be converted to a path.
*
* @see Paths#get(URI)
*/
public static Path toPath(final URL url, final String encoding) throws IOException {
if (url == null) {
return null;
}
final URI uri = toURI(url, encoding);
try {
return Paths.get(uri);
} catch (IllegalArgumentException | FileSystemNotFoundException cause) {
final String message = Exceptions.formatChainedMessages(null,
Errors.format(Errors.Keys.IllegalArgumentValue_2, "URL", url), cause);
/*
* If the exception is IllegalArgumentException, then the URI scheme has been recognized
* but the URI syntax is illegal for that file system. So we can consider that the URL is
* malformed in regard to the rules of that particular file system.
*/
final IOException e;
if (cause instanceof IllegalArgumentException) {
e = new MalformedURLException(message);
e.initCause(cause);
} else {
e = new IOException(message, cause);
}
throw e;
}
}
/**
* Parses the following path as a {@link File} if possible, or a {@link URL} otherwise.
* In the special case where the given {@code path} is a URL using the {@code "file"} protocol,
* the URL is converted to a {@link File} object using the given {@code encoding} for decoding
* the {@code "%XX"} sequences, if any.
*
* <div class="section">Rational</div>
* A URL can represent a file, but {@link URL#openStream()} appears to return a {@code BufferedInputStream}
* wrapping the {@link FileInputStream}, which is not a desirable feature when we want to obtain a channel.
*
* @param path the path to convert, or {@code null}.
* @param encoding if the URL is encoded in a {@code application/x-www-form-urlencoded} MIME format,
* the character encoding (normally {@code "UTF-8"}). If the URL is not encoded,
* then {@code null}. This argument is ignored if the given path does not need
* to be converted from URL to {@code File}.
* @return the path as a {@link File} if possible, or a {@link URL} otherwise.
* @throws IOException if the given path is not a file and can't be parsed as a URL.
*/
public static Object toFileOrURL(final String path, final String encoding) throws IOException {
if (path == null) {
return null;
}
/*
* Check if the path seems to be a local file. Those paths are assumed never encoded.
* The heuristic rules applied here may change in any future SIS version.
*/
if (path.indexOf('?') < 0 && path.indexOf('#') < 0) {
final int s = path.indexOf(':');
/*
* If the ':' character is found, the part before it is probably a protocol in a URL,
* except in the particular case where there is just one letter before ':'. In such
* case, it may be the drive letter of a Windows file.
*/
if (s<0 || (s==1 && Character.isLetter(path.charAt(0)) && !path.regionMatches(2, "//", 0, 2))) {
return new File(path);
}
}
final URL url = new URL(path);
final String scheme = url.getProtocol();
if (scheme != null && scheme.equalsIgnoreCase("file")) {
return toFile(url, encoding);
}
/*
* Leave the URL in its original encoding on the assumption that this is the encoding expected by
* the server. This is different than the policy for URI, because the later are always in UTF-8.
* If a URI is needed, callers should use toURI(url, encoding).
*/
return url;
}
/**
* Converts the given output stream to an input stream. It is caller's responsibility to flush
* the stream and reset its position to the beginning of file before to invoke this method.
* The data read by the input stream will be the data that have been written in the output stream
* before this method is invoked.
*
* <p>The given output stream should not be used anymore after this method invocation, but should
* not be closed neither since the returned input stream may be backed by the same channel.</p>
*
* @param stream the input or output stream to converts to an {@code InputStream}.
* @return the input stream, or {@code null} if the given stream can not be converted.
* @throws IOException if an error occurred during input stream creation.
*
* @since 0.8
*/
public static InputStream toInputStream(AutoCloseable stream) throws IOException {
if (stream != null) {
if (stream instanceof InputStream) {
return (InputStream) stream;
}
if (stream instanceof OutputStreamAdapter) {
stream = ((OutputStreamAdapter) stream).output;
}
if (stream instanceof ChannelDataOutput) {
final ChannelDataOutput c = (ChannelDataOutput) stream;
if (c.channel instanceof ReadableByteChannel) {
stream = new ChannelImageInputStream(c.filename, (ReadableByteChannel) c.channel, c.buffer, true);
}
}
if (stream instanceof ImageInputStream) {
return new InputStreamAdapter((ImageInputStream) stream);
}
}
return null;
}
/**
* Converts the given input stream to an output stream. It is caller's responsibility to reset
* the stream position to the beginning of file before to invoke this method. The data written
* by the output stream will overwrite the previous data, but the caller may need to
* {@linkplain #truncate truncate} the output stream after he finished to write in it.
*
* <p>The given input stream should not be used anymore after this method invocation, but should
* not be closed neither since the returned output stream may be backed by the same channel.</p>
*
* @param stream the input or output stream to converts to an {@code OutputStream}.
* @return the output stream, or {@code null} if the given stream can not be converted.
* @throws IOException if an error occurred during output stream creation.
*
* @since 0.8
*/
public static OutputStream toOutputStream(AutoCloseable stream) throws IOException {
if (stream != null) {
if (stream instanceof OutputStream) {
return (OutputStream) stream;
}
if (stream instanceof InputStreamAdapter) {
stream = ((InputStreamAdapter) stream).input;
}
if (stream instanceof ChannelDataInput) {
final ChannelDataInput c = (ChannelDataInput) stream;
if (c.channel instanceof WritableByteChannel) {
stream = new ChannelImageOutputStream(c.filename, (WritableByteChannel) c.channel, c.buffer);
}
}
if (stream instanceof ChannelImageOutputStream) {
return new OutputStreamAdapter((ChannelImageOutputStream) stream);
}
}
return null;
}
/**
* Truncates the given output stream at its current position.
* This method works with Apache SIS implementations backed (sometime indirectly) by {@link SeekableByteChannel}.
* Callers may need to {@linkplain java.io.Flushable#flush() flush} the stream before to invoke this method.
*
* @param stream the output stream or writable channel to truncate.
* @return whether this method has been able to truncate the given stream.
* @throws IOException if an error occurred while truncating the stream.
*/
public static boolean truncate(AutoCloseable stream) throws IOException {
if (stream instanceof OutputStreamAdapter) {
stream = ((OutputStreamAdapter) stream).output;
}
if (stream instanceof ChannelDataOutput) {
stream = ((ChannelDataOutput) stream).channel;
}
if (stream instanceof SeekableByteChannel) {
final SeekableByteChannel s = (SeekableByteChannel) stream;
s.truncate(s.position());
return true;
}
return false;
}
/**
* Returns {@code true} if the given options would open a file mostly for writing.
* This method returns {@code true} if the following conditions are true:
*
* <ul>
* <li>The array contains {@link StandardOpenOption#WRITE}.</li>
* <li>The array does not contain {@link StandardOpenOption#READ}, unless the array contains also
* {@link StandardOpenOption#CREATE_NEW} or {@link StandardOpenOption#TRUNCATE_EXISTING} in which
* case the {@code READ} option is ignored (because the caller would have no data to read).</li>
* </ul>
*
* @param options the open options to check, or {@code null} if none.
* @return {@code true} if a file opened with the given options would be mostly for write operations.
*
* @since 0.8
*/
public static boolean isWrite(final OpenOption[] options) {
boolean isRead = false;
boolean isWrite = false;
boolean truncate = false;
if (options != null) {
for (final OpenOption op : options) {
if (op instanceof StandardOpenOption) {
switch ((StandardOpenOption) op) {
case READ: isRead = true; break;
case WRITE: isWrite = true; break;
case CREATE_NEW:
case TRUNCATE_EXISTING: truncate = true; break;
}
}
}
}
return isWrite & (!isRead | truncate);
}
/**
* Reads the next character as an Unicode code point. Unless end-of-file has been reached, the returned value is
* between {@value java.lang.Character#MIN_CODE_POINT} and {@value java.lang.Character#MAX_CODE_POINT} inclusive.
*
* @param in the reader from which to read code point.
* @return the next code point, or -1 on end of file.
* @throws IOException if an error occurred while reading characters.
*
* @since 0.8
*/
public static int readCodePoint(final Reader in) throws IOException {
int c = in.read();
while (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE) {
final int low = in.read();
if (low >= Character.MIN_LOW_SURROGATE && low <= Character.MAX_LOW_SURROGATE) {
c = Character.toCodePoint((char) c, (char) low);
break;
} else {
c = low; // Discard orphan high surrogate and take the next character.
}
}
return c;
}
/**
* Returns the error message for a file that can not be parsed.
* The error message will contain the line number if available.
*
* @param locale the language for the error message.
* @param format abbreviation of the file format (e.g. "CSV", "GML", "WKT", <i>etc</i>).
* @param filename name of the file or the data store.
* @param store the input or output object, or {@code null}.
* @return the parameters for a localized error message for a file that can not be processed.
*
* @since 0.8
*/
public static String canNotReadFile(final Locale locale, final String format, final String filename, final Object store) {
final Object[] parameters = errorMessageParameters(format, filename, store);
return Resources.forLocale(locale).getString(errorMessageKey(parameters), parameters);
}
/**
* Returns the {@link Resources.Keys} value together with the parameters given by {@code errorMessageParameters(…)}.
*
* @param parameters the result of {@code errorMessageParameters(…)} method call.
* @return the {@link Resources.Keys} value to use for formatting the error message.
*
* @since 0.8
*/
public static short errorMessageKey(final Object[] parameters) {
return (parameters.length == 2) ? Resources.Keys.CanNotReadFile_2 :
(parameters.length == 3) ? Resources.Keys.CanNotReadFile_3 :
Resources.Keys.CanNotReadFile_4;
}
/**
* Returns the parameters for an error message saying that an error occurred while processing a file.
* This method uses the information provided by methods like {@link LineNumberReader#getLineNumber()}
* or {@link XMLStreamReader#getLocation()} if the given {@code store} is one of the supported types.
*
* @param format abbreviation of the file format (e.g. "CSV", "GML", "WKT", <i>etc</i>).
* @param filename name of the file or the data store.
* @param store the input or output object, or {@code null}.
* @return the parameters for a localized error message for a file that can not be processed.
*
* @since 0.8
*/
@SuppressWarnings("fallthrough")
public static Object[] errorMessageParameters(final String format, final String filename, final Object store) {
int line = 0;
int column = 0;
if (store instanceof XMLStreamReader) {
final Location location = ((XMLStreamReader) store).getLocation();
line = location.getLineNumber() + 1;
column = location.getColumnNumber() + 1;
} else if (store instanceof LineNumberReader) {
line = ((LineNumberReader) store).getLineNumber();
}
final Object[] params = new Object[(line == 0) ? 2 : (column == 0) ? 3 : 4];
switch (params.length) {
default: // Fallthrough everywhere
case 4: params[3] = column;
case 3: params[2] = line;
case 2: params[1] = filename;
case 1: params[0] = format;
case 0: break;
}
return params;
}
}