| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.vfs2.provider; |
| |
| import org.apache.commons.lang3.SystemUtils; |
| import org.apache.commons.vfs2.FileName; |
| import org.apache.commons.vfs2.FileSystemException; |
| import org.apache.commons.vfs2.FileType; |
| import org.apache.commons.vfs2.VFS; |
| |
| /** |
| * Utilities for dealing with URIs. See RFC 2396 for details. |
| */ |
| public final class UriParser { |
| |
| /** |
| * The set of valid separators. These are all converted to the normalized one. Does <i>not</i> contain the |
| * normalized separator |
| */ |
| // public static final char[] separators = {'\\'}; |
| public static final char TRANS_SEPARATOR = '\\'; |
| |
| /** |
| * The normalised separator to use. |
| */ |
| private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; |
| |
| private static final int HEX_BASE = 16; |
| |
| private static final int BITS_IN_HALF_BYTE = 4; |
| |
| private static final char LOW_MASK = 0x0F; |
| |
| /** |
| * Encodes and appends a string to a StringBuilder. |
| * |
| * @param buffer The StringBuilder to append to. |
| * @param unencodedValue The String to encode and append. |
| * @param reserved characters to encode. |
| */ |
| public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { |
| final int offset = buffer.length(); |
| buffer.append(unencodedValue); |
| encode(buffer, offset, unencodedValue.length(), reserved); |
| } |
| |
| public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, |
| final FileNameParser fileNameParser) throws FileSystemException { |
| int index = offset; |
| int count = length; |
| for (; count > 0; count--, index++) { |
| final char ch = buffer.charAt(index); |
| if (ch == '%') { |
| if (count < 3) { |
| throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", |
| buffer.substring(index, index + count)); |
| } |
| |
| // Decode |
| final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); |
| final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); |
| if (dig1 == -1 || dig2 == -1) { |
| throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", |
| buffer.substring(index, index + 3)); |
| } |
| final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); |
| |
| final boolean match = value == '%' || fileNameParser.encodeCharacter(value); |
| |
| if (match) { |
| // this is a reserved character, not allowed to decode |
| index += 2; |
| count -= 2; |
| continue; |
| } |
| |
| // Replace |
| buffer.setCharAt(index, value); |
| buffer.delete(index + 1, index + 3); |
| count -= 2; |
| } else if (fileNameParser.encodeCharacter(ch)) { |
| // Encode |
| final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE), |
| Character.forDigit(ch & LOW_MASK, HEX_BASE) }; |
| buffer.setCharAt(index, '%'); |
| buffer.insert(index + 1, digits); |
| index += 2; |
| } |
| } |
| } |
| |
| /** |
| * Decodes the String. |
| * |
| * @param uri The String to decode. |
| * @throws FileSystemException if an error occurs. |
| */ |
| public static void checkUriEncoding(final String uri) throws FileSystemException { |
| decode(uri); |
| } |
| |
| /** |
| * Removes %nn encodings from a string. |
| * |
| * @param encodedStr The encoded String. |
| * @return The decoded String. |
| * @throws FileSystemException if an error occurs. |
| */ |
| public static String decode(final String encodedStr) throws FileSystemException { |
| if (encodedStr == null) { |
| return null; |
| } |
| if (encodedStr.indexOf('%') < 0) { |
| return encodedStr; |
| } |
| final StringBuilder buffer = new StringBuilder(encodedStr); |
| decode(buffer, 0, buffer.length()); |
| return buffer.toString(); |
| } |
| |
| /** |
| * Removes %nn encodings from a string. |
| * |
| * @param buffer StringBuilder containing the string to decode. |
| * @param offset The position in the string to start decoding. |
| * @param length The number of characters to decode. |
| * @throws FileSystemException if an error occurs. |
| */ |
| public static void decode(final StringBuilder buffer, final int offset, final int length) |
| throws FileSystemException { |
| int index = offset; |
| int count = length; |
| for (; count > 0; count--, index++) { |
| final char ch = buffer.charAt(index); |
| if (ch != '%') { |
| continue; |
| } |
| if (count < 3) { |
| throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", |
| buffer.substring(index, index + count)); |
| } |
| |
| // Decode |
| final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); |
| final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); |
| if (dig1 == -1 || dig2 == -1) { |
| throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", |
| buffer.substring(index, index + 3)); |
| } |
| final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); |
| |
| // Replace |
| buffer.setCharAt(index, value); |
| buffer.delete(index + 1, index + 3); |
| count -= 2; |
| } |
| } |
| |
| /** |
| * Converts "special" characters to their %nn value. |
| * |
| * @param decodedStr The decoded String. |
| * @return The encoded String. |
| */ |
| public static String encode(final String decodedStr) { |
| return encode(decodedStr, null); |
| } |
| |
| /** |
| * Converts "special" characters to their %nn value. |
| * |
| * @param decodedStr The decoded String. |
| * @param reserved Characters to encode. |
| * @return The encoded String |
| */ |
| public static String encode(final String decodedStr, final char[] reserved) { |
| if (decodedStr == null) { |
| return null; |
| } |
| final StringBuilder buffer = new StringBuilder(decodedStr); |
| encode(buffer, 0, buffer.length(), reserved); |
| return buffer.toString(); |
| } |
| |
| /** |
| * Encode an array of Strings. |
| * |
| * @param strings The array of Strings to encode. |
| * @return An array of encoded Strings. |
| */ |
| public static String[] encode(final String[] strings) { |
| if (strings == null) { |
| return null; |
| } |
| for (int i = 0; i < strings.length; i++) { |
| strings[i] = encode(strings[i]); |
| } |
| return strings; |
| } |
| |
| /** |
| * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. |
| * |
| * @param buffer The StringBuilder to append to. |
| * @param offset The position in the buffer to start encoding at. |
| * @param length The number of characters to encode. |
| * @param reserved characters to encode. |
| */ |
| public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { |
| int index = offset; |
| int count = length; |
| for (; count > 0; index++, count--) { |
| final char ch = buffer.charAt(index); |
| boolean match = ch == '%'; |
| if (reserved != null) { |
| for (int i = 0; !match && i < reserved.length; i++) { |
| if (ch == reserved[i]) { |
| match = true; |
| break; |
| } |
| } |
| } |
| if (match) { |
| // Encode |
| final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE), |
| Character.forDigit(ch & LOW_MASK, HEX_BASE) }; |
| buffer.setCharAt(index, '%'); |
| buffer.insert(index + 1, digits); |
| index += 2; |
| } |
| } |
| } |
| |
| /** |
| * Extracts the first element of a path. |
| * |
| * @param name StringBuilder containing the path. |
| * @return The first element of the path. |
| */ |
| public static String extractFirstElement(final StringBuilder name) { |
| final int len = name.length(); |
| if (len < 1) { |
| return null; |
| } |
| int startPos = 0; |
| if (name.charAt(0) == SEPARATOR_CHAR) { |
| startPos = 1; |
| } |
| for (int pos = startPos; pos < len; pos++) { |
| if (name.charAt(pos) == SEPARATOR_CHAR) { |
| // Found a separator |
| final String elem = name.substring(startPos, pos); |
| name.delete(startPos, pos + 1); |
| return elem; |
| } |
| } |
| |
| // No separator |
| final String elem = name.substring(startPos); |
| name.setLength(0); |
| return elem; |
| } |
| |
| /** |
| * Extract the query String from the URI. |
| * |
| * @param name StringBuilder containing the URI. |
| * @return The query string, if any. null otherwise. |
| */ |
| public static String extractQueryString(final StringBuilder name) { |
| for (int pos = 0; pos < name.length(); pos++) { |
| if (name.charAt(pos) == '?') { |
| final String queryString = name.substring(pos + 1); |
| name.delete(pos, name.length()); |
| return queryString; |
| } |
| } |
| |
| return null; |
| } |
| |
| /** |
| * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. |
| * <p> |
| * The scheme is extracted based on the currently supported schemes in the system. That is to say the schemes |
| * supported by the registered providers. |
| * </p> |
| * <p> |
| * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically |
| * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them. |
| * </p> |
| * @param schemes The schemes to check. |
| * @param uri The potential URI. May also be a name. |
| * @return The scheme name. Returns null if there is no scheme. |
| * @since 2.3 |
| */ |
| public static String extractScheme(final String[] schemes, final String uri) { |
| return extractScheme(schemes, uri, null); |
| } |
| |
| /** |
| * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. |
| * <p> |
| * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes |
| * supported by the registered providers. |
| * </p> |
| * <p> |
| * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically |
| * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them. |
| * </p> |
| * @param schemes The schemes to check. |
| * @param uri The potential URI. May also just be a name. |
| * @param buffer Returns the remainder of the URI. |
| * @return The scheme name. Returns null if there is no scheme. |
| * @since 2.3 |
| */ |
| public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) { |
| if (buffer != null) { |
| buffer.setLength(0); |
| buffer.append(uri); |
| } |
| for (final String scheme : schemes) { |
| if (uri.startsWith(scheme + ":")) { |
| if (buffer != null) { |
| buffer.delete(0, uri.indexOf(':') + 1); |
| } |
| return scheme; |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Extracts the scheme from a URI. |
| * |
| * @param uri The URI. |
| * @return The scheme name. Returns null if there is no scheme. |
| * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. |
| */ |
| @Deprecated |
| public static String extractScheme(final String uri) { |
| return extractScheme(uri, null); |
| } |
| |
| /** |
| * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. |
| * |
| * @param uri The URI. |
| * @param buffer Returns the remainder of the URI. |
| * @return The scheme name. Returns null if there is no scheme. |
| * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. |
| */ |
| @Deprecated |
| public static String extractScheme(final String uri, final StringBuilder buffer) { |
| if (buffer != null) { |
| buffer.setLength(0); |
| buffer.append(uri); |
| } |
| |
| final int maxPos = uri.length(); |
| for (int pos = 0; pos < maxPos; pos++) { |
| final char ch = uri.charAt(pos); |
| |
| if (ch == ':') { |
| // Found the end of the scheme |
| final String scheme = uri.substring(0, pos); |
| if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) { |
| // This is not a scheme, but a Windows drive letter |
| return null; |
| } |
| if (buffer != null) { |
| buffer.delete(0, pos + 1); |
| } |
| return scheme.intern(); |
| } |
| |
| if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { |
| // A scheme character |
| continue; |
| } |
| if (pos > 0 && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.')) { |
| // A scheme character (these are not allowed as the first |
| // character of the scheme, but can be used as subsequent |
| // characters. |
| continue; |
| } |
| |
| // Not a scheme character |
| break; |
| } |
| |
| // No scheme in URI |
| return null; |
| } |
| |
| /** |
| * Normalises the separators in a name. |
| * |
| * @param name The StringBuilder containing the name |
| * @return true if the StringBuilder was modified. |
| */ |
| public static boolean fixSeparators(final StringBuilder name) { |
| boolean changed = false; |
| final int maxlen = name.length(); |
| for (int i = 0; i < maxlen; i++) { |
| final char ch = name.charAt(i); |
| if (ch == TRANS_SEPARATOR) { |
| name.setCharAt(i, SEPARATOR_CHAR); |
| changed = true; |
| } |
| } |
| return changed; |
| } |
| |
| /** |
| * Normalises a path. Does the following: |
| * <ul> |
| * <li>Removes empty path elements. |
| * <li>Handles '.' and '..' elements. |
| * <li>Removes trailing separator. |
| * </ul> |
| * |
| * Its assumed that the separators are already fixed. |
| * |
| * @param path The path to normalize. |
| * @return The FileType. |
| * @throws FileSystemException if an error occurs. |
| * |
| * @see #fixSeparators |
| */ |
| public static FileType normalisePath(final StringBuilder path) throws FileSystemException { |
| FileType fileType = FileType.FOLDER; |
| if (path.length() == 0) { |
| return fileType; |
| } |
| |
| if (path.charAt(path.length() - 1) != '/') { |
| fileType = FileType.FILE; |
| } |
| |
| // Adjust separators |
| // fixSeparators(path); |
| |
| // Determine the start of the first element |
| int startFirstElem = 0; |
| if (path.charAt(0) == SEPARATOR_CHAR) { |
| if (path.length() == 1) { |
| return fileType; |
| } |
| startFirstElem = 1; |
| } |
| |
| // Iterate over each element |
| int startElem = startFirstElem; |
| int maxlen = path.length(); |
| while (startElem < maxlen) { |
| // Find the end of the element |
| int endElem = startElem; |
| for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++) { |
| } |
| |
| final int elemLen = endElem - startElem; |
| if (elemLen == 0) { |
| // An empty element - axe it |
| path.delete(endElem, endElem + 1); |
| maxlen = path.length(); |
| continue; |
| } |
| if (elemLen == 1 && path.charAt(startElem) == '.') { |
| // A '.' element - axe it |
| path.delete(startElem, endElem + 1); |
| maxlen = path.length(); |
| continue; |
| } |
| if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { |
| // A '..' element - remove the previous element |
| if (startElem == startFirstElem) { |
| // Previous element is missing |
| throw new FileSystemException("vfs.provider/invalid-relative-path.error"); |
| } |
| |
| // Find start of previous element |
| int pos = startElem - 2; |
| for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--) { |
| } |
| startElem = pos + 1; |
| |
| path.delete(startElem, endElem + 1); |
| maxlen = path.length(); |
| continue; |
| } |
| |
| // A regular element |
| startElem = endElem + 1; |
| } |
| |
| // Remove trailing separator |
| if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { |
| path.delete(maxlen - 1, maxlen); |
| } |
| |
| return fileType; |
| } |
| |
| private UriParser() { |
| } |
| } |