| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.io; |
| |
| import java.io.File; |
| import java.util.ArrayDeque; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Deque; |
| import java.util.List; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.stream.Stream; |
| |
| /** |
| * General file name and file path manipulation utilities. The methods in this class |
| * operate on strings that represent relative or absolute paths. Nothing in this class |
| * ever accesses the file system, or depends on whether a path points to a file that exists. |
| * <p> |
| * When dealing with file names, you can hit problems when moving from a Windows |
| * based development machine to a UNIX based production machine. |
| * This class aims to help avoid those problems. |
| * </p> |
| * <p> |
| * <b>NOTE</b>: You may be able to avoid using this class entirely simply by |
| * using JDK {@link java.io.File File} objects and the two argument constructor |
| * {@link java.io.File#File(java.io.File, String) File(File,String)}. |
| * </p> |
| * <p> |
| * Most methods in this class are designed to work the same on both UNIX and Windows. |
| * Those that don't include 'System', 'Unix', or 'Windows' in their name. |
| * </p> |
| * <p> |
| * Most methods recognize both separators (forward and backslashes), and both |
| * sets of prefixes. See the Javadoc of each method for details. |
| * </p> |
| * <p> |
| * This class defines six components within a path (sometimes called a file name or a full file name). |
| * Given an absolute Windows path such as C:\dev\project\file.txt they are: |
| * </p> |
| * <ul> |
| * <li>the full file name, or just file name - C:\dev\project\file.txt</li> |
| * <li>the prefix - C:\</li> |
| * <li>the path - dev\project\</li> |
| * <li>the full path - C:\dev\project\</li> |
| * <li>the name - file.txt</li> |
| * <li>the base name - file</li> |
| * <li>the extension - txt</li> |
| * </ul> |
| * <p> |
| * Given an absolute UNIX path such as /dev/project/file.txt they are: |
| * </p> |
| * <ul> |
| * <li>the full file name, or just file name - /dev/project/file.txt</li> |
| * <li>the prefix - /</li> |
| * <li>the path - dev/project</li> |
| * <li>the full path - /dev/project</li> |
| * <li>the name - file.txt</li> |
| * <li>the base name - file</li> |
| * <li>the extension - txt</li> |
| * </ul> |
| * <p> |
| * Given a relative Windows path such as dev\project\file.txt they are: |
| * </p> |
| * <ul> |
| * <li>the full file name, or just file name - dev\project\file.txt</li> |
| * <li>the prefix - null</li> |
| * <li>the path - dev\project\</li> |
| * <li>the full path - dev\project\</li> |
| * <li>the name - file.txt</li> |
| * <li>the base name - file</li> |
| * <li>the extension - txt</li> |
| * </ul> |
| * <p> |
| * Given an absolute UNIX path such as /dev/project/file.txt they are: |
| * </p> |
| * <ul> |
| * <li>the full path, full file name, or just file name - /dev/project/file.txt</li> |
| * <li>the prefix - /</li> |
| * <li>the path - dev/project</li> |
| * <li>the full path - /dev/project</li> |
| * <li>the name - file.txt</li> |
| * <li>the base name - file</li> |
| * <li>the extension - txt</li> |
| * </ul> |
| * |
| * |
| * <p> |
| * This class works best if directory names end with a separator. |
| * If you omit the last separator, it is impossible to determine if the last component |
| * corresponds to a file or a directory. This class treats final components |
| * that do not end with a separator as files, not directories. |
| * </p> |
| * <p> |
| * This class only supports UNIX and Windows style names. |
| * Prefixes are matched as follows: |
| * </p> |
| * <pre> |
| * Windows: |
| * a\b\c.txt --> "" --> relative |
| * \a\b\c.txt --> "\" --> current drive absolute |
| * C:a\b\c.txt --> "C:" --> drive relative |
| * C:\a\b\c.txt --> "C:\" --> absolute |
| * \\server\a\b\c.txt --> "\\server\" --> UNC |
| * |
| * Unix: |
| * a/b/c.txt --> "" --> relative |
| * /a/b/c.txt --> "/" --> absolute |
| * ~/a/b/c.txt --> "~/" --> current user |
| * ~ --> "~/" --> current user (slash added) |
| * ~user/a/b/c.txt --> "~user/" --> named user |
| * ~user --> "~user/" --> named user (slash added) |
| * </pre> |
| * <p> |
| * Both prefix styles are matched, irrespective of the machine that you are |
| * currently running on. |
| * </p> |
| * |
| * @since 1.1 |
| */ |
| public class FilenameUtils { |
| |
| private static final String[] EMPTY_STRING_ARRAY = {}; |
| |
| private static final String EMPTY_STRING = ""; |
| |
| private static final int NOT_FOUND = -1; |
| |
| /** |
| * The extension separator character. |
| * @since 1.4 |
| */ |
| public static final char EXTENSION_SEPARATOR = '.'; |
| |
| /** |
| * The extension separator String. |
| * @since 1.4 |
| */ |
| public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); |
| |
| /** |
| * The UNIX separator character. |
| */ |
| private static final char UNIX_NAME_SEPARATOR = '/'; |
| |
| /** |
| * The Windows separator character. |
| */ |
| private static final char WINDOWS_NAME_SEPARATOR = '\\'; |
| |
| /** |
| * The system separator character. |
| */ |
| private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar; |
| |
| /** |
| * The separator character that is the opposite of the system separator. |
| */ |
| private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR); |
| |
| private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); |
| |
| private static final int IPV4_MAX_OCTET_VALUE = 255; |
| |
| private static final int IPV6_MAX_HEX_GROUPS = 8; |
| |
| private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; |
| |
| private static final int MAX_UNSIGNED_SHORT = 0xffff; |
| |
| private static final int BASE_16 = 16; |
| |
| private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); |
| |
| /** |
| * Concatenates a fileName to a base path using normal command line style rules. |
| * <p> |
| * The effect is equivalent to resultant directory after changing |
| * directory to the first argument, followed by changing directory to |
| * the second argument. |
| * </p> |
| * <p> |
| * The first argument is the base path, the second is the path to concatenate. |
| * The returned path is always normalized via {@link #normalize(String)}, |
| * thus {@code ..} is handled. |
| * </p> |
| * <p> |
| * If {@code pathToAdd} is absolute (has an absolute prefix), then |
| * it will be normalized and returned. |
| * Otherwise, the paths will be joined, normalized and returned. |
| * </p> |
| * <p> |
| * The output will be the same on both UNIX and Windows except |
| * for the separator character. |
| * </p> |
| * <pre> |
| * /foo/ + bar --> /foo/bar |
| * /foo + bar --> /foo/bar |
| * /foo + /bar --> /bar |
| * /foo + C:/bar --> C:/bar |
| * /foo + C:bar --> C:bar [1] |
| * /foo/a/ + ../bar --> /foo/bar |
| * /foo/ + ../../bar --> null |
| * /foo/ + /bar --> /bar |
| * /foo/.. + /bar --> /bar |
| * /foo + bar/c.txt --> /foo/bar/c.txt |
| * /foo/c.txt + bar --> /foo/c.txt/bar [2] |
| * </pre> |
| * <p> |
| * [1] Note that the Windows relative drive prefix is unreliable when |
| * used with this method. |
| * </p> |
| * <p> |
| * [2] Note that the first parameter must be a path. If it ends with a name, then |
| * the name will be built into the concatenated path. If this might be a problem, |
| * use {@link #getFullPath(String)} on the base path argument. |
| * </p> |
| * |
| * @param basePath the base path to attach to, always treated as a path |
| * @param fullFileNameToAdd the file name (or path) to attach to the base |
| * @return the concatenated path, or null if invalid |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| public static String concat(final String basePath, final String fullFileNameToAdd) { |
| final int prefix = getPrefixLength(fullFileNameToAdd); |
| if (prefix < 0) { |
| return null; |
| } |
| if (prefix > 0) { |
| return normalize(fullFileNameToAdd); |
| } |
| if (basePath == null) { |
| return null; |
| } |
| final int len = basePath.length(); |
| if (len == 0) { |
| return normalize(fullFileNameToAdd); |
| } |
| final char ch = basePath.charAt(len - 1); |
| if (isSeparator(ch)) { |
| return normalize(basePath + fullFileNameToAdd); |
| } |
| return normalize(basePath + '/' + fullFileNameToAdd); |
| } |
| |
| /** |
| * Determines whether the {@code parent} directory contains the {@code child} (a file or directory). |
| * This does not read from the file system, and there is no guarantee or expectation that |
| * these paths actually exist. |
| * <p> |
| * The files names are expected to be normalized. |
| * </p> |
| * |
| * Edge cases: |
| * <ul> |
| * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> |
| * <li>A directory does not contain itself: return false</li> |
| * <li>A null child file is not contained in any parent: return false</li> |
| * </ul> |
| * |
| * @param canonicalParent the path string to consider as the parent. |
| * @param canonicalChild the path string to consider as the child. |
| * @return true if the candidate leaf is under the specified composite. False otherwise. |
| * @since 2.2 |
| * @see FileUtils#directoryContains(File, File) |
| */ |
| public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { |
| if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) { |
| return false; |
| } |
| |
| if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { |
| return false; |
| } |
| |
| final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR); |
| final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator; |
| |
| return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator); |
| } |
| |
| /** |
| * Does the work of getting the path. |
| * |
| * @param fileName the file name |
| * @param includeSeparator true to include the end separator |
| * @return the path |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| private static String doGetFullPath(final String fileName, final boolean includeSeparator) { |
| if (fileName == null) { |
| return null; |
| } |
| final int prefix = getPrefixLength(fileName); |
| if (prefix < 0) { |
| return null; |
| } |
| if (prefix >= fileName.length()) { |
| if (includeSeparator) { |
| return getPrefix(fileName); // add end slash if necessary |
| } |
| return fileName; |
| } |
| final int index = indexOfLastSeparator(fileName); |
| if (index < 0) { |
| return fileName.substring(0, prefix); |
| } |
| int end = index + (includeSeparator ? 1 : 0); |
| if (end == 0) { |
| end++; |
| } |
| return fileName.substring(0, end); |
| } |
| |
| /** |
| * Does the work of getting the path. |
| * |
| * @param fileName the file name |
| * @param separatorAdd 0 to omit the end separator, 1 to return it |
| * @return the path |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| private static String doGetPath(final String fileName, final int separatorAdd) { |
| if (fileName == null) { |
| return null; |
| } |
| final int prefix = getPrefixLength(fileName); |
| if (prefix < 0) { |
| return null; |
| } |
| final int index = indexOfLastSeparator(fileName); |
| final int endIndex = index + separatorAdd; |
| if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { |
| return EMPTY_STRING; |
| } |
| return requireNonNullChars(fileName.substring(prefix, endIndex)); |
| } |
| |
| /** |
| * Internal method to perform the normalization. |
| * |
| * @param fileName the file name |
| * @param separator The separator character to use |
| * @param keepSeparator true to keep the final separator |
| * @return the normalized fileName |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { |
| if (fileName == null) { |
| return null; |
| } |
| |
| requireNonNullChars(fileName); |
| |
| int size = fileName.length(); |
| if (size == 0) { |
| return fileName; |
| } |
| final int prefix = getPrefixLength(fileName); |
| if (prefix < 0) { |
| return null; |
| } |
| |
| final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy |
| fileName.getChars(0, fileName.length(), array, 0); |
| |
| // fix separators throughout |
| final char otherSeparator = flipSeparator(separator); |
| for (int i = 0; i < array.length; i++) { |
| if (array[i] == otherSeparator) { |
| array[i] = separator; |
| } |
| } |
| |
| // add extra separator on the end to simplify code below |
| boolean lastIsDirectory = true; |
| if (array[size - 1] != separator) { |
| array[size++] = separator; |
| lastIsDirectory = false; |
| } |
| |
| // adjoining slashes |
| // If we get here, prefix can only be 0 or greater, size 1 or greater |
| // If prefix is 0, set loop start to 1 to prevent index errors |
| for (int i = prefix != 0 ? prefix : 1; i < size; i++) { |
| if (array[i] == separator && array[i - 1] == separator) { |
| System.arraycopy(array, i, array, i - 1, size - i); |
| size--; |
| i--; |
| } |
| } |
| |
| // dot slash |
| for (int i = prefix + 1; i < size; i++) { |
| if (array[i] == separator && array[i - 1] == '.' && |
| (i == prefix + 1 || array[i - 2] == separator)) { |
| if (i == size - 1) { |
| lastIsDirectory = true; |
| } |
| System.arraycopy(array, i + 1, array, i - 1, size - i); |
| size -=2; |
| i--; |
| } |
| } |
| |
| // double dot slash |
| outer: |
| for (int i = prefix + 2; i < size; i++) { |
| if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && |
| (i == prefix + 2 || array[i - 3] == separator)) { |
| if (i == prefix + 2) { |
| return null; |
| } |
| if (i == size - 1) { |
| lastIsDirectory = true; |
| } |
| int j; |
| for (j = i - 4 ; j >= prefix; j--) { |
| if (array[j] == separator) { |
| // remove b/../ from a/b/../c |
| System.arraycopy(array, i + 1, array, j + 1, size - i); |
| size -= i - j; |
| i = j + 1; |
| continue outer; |
| } |
| } |
| // remove a/../ from a/../c |
| System.arraycopy(array, i + 1, array, prefix, size - i); |
| size -= i + 1 - prefix; |
| i = prefix + 1; |
| } |
| } |
| |
| if (size <= 0) { // should never be less than 0 |
| return EMPTY_STRING; |
| } |
| if (size <= prefix) { // should never be less than prefix |
| return new String(array, 0, size); |
| } |
| if (lastIsDirectory && keepSeparator) { |
| return new String(array, 0, size); // keep trailing separator |
| } |
| return new String(array, 0, size - 1); // lose trailing separator |
| } |
| |
| /** |
| * Checks whether two file names are exactly equal. |
| * <p> |
| * No processing is performed on the file names other than comparison. |
| * This is merely a null-safe case-sensitive string equality. |
| * </p> |
| * |
| * @param fileName1 the first file name, may be null |
| * @param fileName2 the second file name, may be null |
| * @return true if the file names are equal, null equals null |
| * @see IOCase#SENSITIVE |
| */ |
| public static boolean equals(final String fileName1, final String fileName2) { |
| return equals(fileName1, fileName2, false, IOCase.SENSITIVE); |
| } |
| |
| /** |
| * Checks whether two file names are equal, optionally normalizing and providing |
| * control over the case-sensitivity. |
| * |
| * @param fileName1 the first file name, may be null |
| * @param fileName2 the second file name, may be null |
| * @param normalize whether to normalize the file names |
| * @param ioCase what case sensitivity rule to use, null means case-sensitive |
| * @return true if the file names are equal, null equals null |
| * @since 1.3 |
| */ |
| public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) { |
| |
| if (fileName1 == null || fileName2 == null) { |
| return fileName1 == null && fileName2 == null; |
| } |
| if (normalize) { |
| fileName1 = normalize(fileName1); |
| if (fileName1 == null) { |
| return false; |
| } |
| fileName2 = normalize(fileName2); |
| if (fileName2 == null) { |
| return false; |
| } |
| } |
| return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2); |
| } |
| |
| /** |
| * Checks whether two file names are equal after both have been normalized. |
| * <p> |
| * Both file names are first passed to {@link #normalize(String)}. |
| * The check is then performed in a case-sensitive manner. |
| * </p> |
| * |
| * @param fileName1 the first file name, may be null |
| * @param fileName2 the second file name, may be null |
| * @return true if the file names are equal, null equals null |
| * @see IOCase#SENSITIVE |
| */ |
| public static boolean equalsNormalized(final String fileName1, final String fileName2) { |
| return equals(fileName1, fileName2, true, IOCase.SENSITIVE); |
| } |
| |
| /** |
| * Checks whether two file names are equal using the case rules of the system |
| * after both have been normalized. |
| * <p> |
| * Both file names are first passed to {@link #normalize(String)}. |
| * The check is then performed case-sensitively on UNIX and |
| * case-insensitively on Windows. |
| * </p> |
| * |
| * @param fileName1 the first file name, may be null |
| * @param fileName2 the second file name, may be null |
| * @return true if the file names are equal, null equals null |
| * @see IOCase#SYSTEM |
| */ |
| public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { |
| return equals(fileName1, fileName2, true, IOCase.SYSTEM); |
| } |
| |
| /** |
| * Checks whether two file names are equal using the case rules of the system. |
| * <p> |
| * No processing is performed on the file names other than comparison. |
| * The check is case-sensitive on UNIX and case-insensitive on Windows. |
| * </p> |
| * |
| * @param fileName1 the first file name, may be null |
| * @param fileName2 the second file name, may be null |
| * @return true if the file names are equal, null equals null |
| * @see IOCase#SYSTEM |
| */ |
| public static boolean equalsOnSystem(final String fileName1, final String fileName2) { |
| return equals(fileName1, fileName2, false, IOCase.SYSTEM); |
| } |
| |
| /** |
| * Flips the Windows name separator to Linux and vice-versa. |
| * |
| * @param ch The Windows or Linux name separator. |
| * @return The Windows or Linux name separator. |
| */ |
| static char flipSeparator(final char ch) { |
| if (ch == UNIX_NAME_SEPARATOR) { |
| return WINDOWS_NAME_SEPARATOR; |
| } |
| if (ch == WINDOWS_NAME_SEPARATOR) { |
| return UNIX_NAME_SEPARATOR; |
| } |
| throw new IllegalArgumentException(String.valueOf(ch)); |
| } |
| |
| /** |
| * Special handling for NTFS ADS: Don't accept colon in the file name. |
| * |
| * @param fileName a file name |
| * @return ADS offsets. |
| */ |
| private static int getAdsCriticalOffset(final String fileName) { |
| // Step 1: Remove leading path segments. |
| final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR); |
| final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); |
| if (offset1 == -1) { |
| if (offset2 == -1) { |
| return 0; |
| } |
| return offset2 + 1; |
| } |
| if (offset2 == -1) { |
| return offset1 + 1; |
| } |
| return Math.max(offset1, offset2) + 1; |
| } |
| |
| /** |
| * Gets the base name, minus the full path and extension, from a full file name. |
| * <p> |
| * This method will handle a path in either UNIX or Windows format. |
| * The text after the last forward or backslash and before the last dot is returned. |
| * </p> |
| * <pre> |
| * a/b/c.txt --> c |
| * a\b\c.txt --> c |
| * a/b/c.foo.txt --> c.foo |
| * a.txt --> a |
| * a/b/c --> c |
| * a/b/c/ --> "" |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the name of the file without the path, or an empty string if none exists |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static String getBaseName(final String fileName) { |
| return removeExtension(getName(fileName)); |
| } |
| |
| /** |
| * Gets the extension of a fileName. |
| * <p> |
| * This method returns the textual part of the file name after the last dot. |
| * There must be no directory separator after the dot. |
| * </p> |
| * <pre> |
| * foo.txt --> "txt" |
| * a/b/c.jpg --> "jpg" |
| * a/b.txt/c --> "" |
| * a/b/c --> "" |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on, with the |
| * exception of a possible {@link IllegalArgumentException} on Windows (see below). |
| * </p> |
| * <p> |
| * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". |
| * In this case, the name wouldn't be the name of a file, but the identifier of an |
| * alternate data stream (bar.txt) on the file foo.exe. The method used to return |
| * ".txt" here, which would be misleading. Commons IO 2.7 and later throw |
| * an {@link IllegalArgumentException} for names like this. |
| * </p> |
| * |
| * @param fileName the file name to retrieve the extension of. |
| * @return the extension of the file or an empty string if none exists or {@code null} |
| * if the file name is {@code null}. |
| * @throws IllegalArgumentException <b>Windows only:</b> the file name parameter is, in fact, |
| * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". |
| */ |
| public static String getExtension(final String fileName) throws IllegalArgumentException { |
| if (fileName == null) { |
| return null; |
| } |
| final int index = indexOfExtension(fileName); |
| if (index == NOT_FOUND) { |
| return EMPTY_STRING; |
| } |
| return fileName.substring(index + 1); |
| } |
| |
| /** |
| * Gets the full path (prefix + path) from a full file name. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The method is entirely text based, and returns the text before and |
| * including the last forward or backslash. |
| * </p> |
| * <pre> |
| * C:\a\b\c.txt --> C:\a\b\ |
| * ~/a/b/c.txt --> ~/a/b/ |
| * a.txt --> "" |
| * a/b/c --> a/b/ |
| * a/b/c/ --> a/b/c/ |
| * C: --> C: |
| * C:\ --> C:\ |
| * ~ --> ~/ |
| * ~/ --> ~/ |
| * ~user --> ~user/ |
| * ~user/ --> ~user/ |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the path of the file, an empty string if none exists, null if invalid |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| public static String getFullPath(final String fileName) { |
| return doGetFullPath(fileName, true); |
| } |
| |
| /** |
| * Gets the full path (prefix + path) from a full file name, |
| * excluding the final directory separator. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The method is entirely text based, and returns the text before the |
| * last forward or backslash. |
| * </p> |
| * <pre> |
| * C:\a\b\c.txt --> C:\a\b |
| * ~/a/b/c.txt --> ~/a/b |
| * a.txt --> "" |
| * a/b/c --> a/b |
| * a/b/c/ --> a/b/c |
| * C: --> C: |
| * C:\ --> C:\ |
| * ~ --> ~ |
| * ~/ --> ~ |
| * ~user --> ~user |
| * ~user/ --> ~user |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the path of the file, an empty string if none exists, null if invalid |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| public static String getFullPathNoEndSeparator(final String fileName) { |
| return doGetFullPath(fileName, false); |
| } |
| |
| /** |
| * Gets the name minus the path from a full file name. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The text after the last forward or backslash is returned. |
| * </p> |
| * <pre> |
| * a/b/c.txt --> c.txt |
| * a\b\c.txt --> c.txt |
| * a.txt --> a.txt |
| * a/b/c --> c |
| * a/b/c/ --> "" |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the name of the file without the path, or an empty string if none exists |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static String getName(final String fileName) { |
| if (fileName == null) { |
| return null; |
| } |
| return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1); |
| } |
| |
| /** |
| * Gets the path from a full file name, which excludes the prefix and the name. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The method is entirely text based, and returns the text before and |
| * including the last forward or backslash. |
| * </p> |
| * <pre> |
| * C:\a\b\c.txt --> a\b\ |
| * ~/a/b/c.txt --> a/b/ |
| * a.txt --> "" |
| * a/b/c --> a/b/ |
| * a/b/c/ --> a/b/c/ |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * </p> |
| * <p> |
| * This method drops the prefix from the result. |
| * See {@link #getFullPath(String)} for the method that retains the prefix. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the path of the file, an empty string if none exists, null if invalid |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| public static String getPath(final String fileName) { |
| return doGetPath(fileName, 1); |
| } |
| |
| /** |
| * Gets the path (which excludes the prefix) from a full file name, and |
| * also excluding the final directory separator. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The method is entirely text based, and returns the text before the |
| * last forward or backslash. |
| * </p> |
| * <pre> |
| * C:\a\b\c.txt --> a\b |
| * ~/a/b/c.txt --> a/b |
| * a.txt --> "" |
| * a/b/c --> a/b |
| * a/b/c/ --> a/b/c |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * </p> |
| * <p> |
| * This method drops the prefix from the result. |
| * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the path of the file, an empty string if none exists, null if invalid |
| * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) |
| */ |
| public static String getPathNoEndSeparator(final String fileName) { |
| return doGetPath(fileName, 0); |
| } |
| |
| /** |
| * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name, |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The prefix includes the first slash in the full file name where applicable. |
| * </p> |
| * <pre> |
| * Windows: |
| * a\b\c.txt --> "" --> relative |
| * \a\b\c.txt --> "\" --> current drive absolute |
| * C:a\b\c.txt --> "C:" --> drive relative |
| * C:\a\b\c.txt --> "C:\" --> absolute |
| * \\server\a\b\c.txt --> "\\server\" --> UNC |
| * |
| * Unix: |
| * a/b/c.txt --> "" --> relative |
| * /a/b/c.txt --> "/" --> absolute |
| * ~/a/b/c.txt --> "~/" --> current user |
| * ~ --> "~/" --> current user (slash added) |
| * ~user/a/b/c.txt --> "~user/" --> named user |
| * ~user --> "~user/" --> named user (slash added) |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * ie. both UNIX and Windows prefixes are matched regardless. |
| * </p> |
| * |
| * @param fileName the file name, null returns null |
| * @return the prefix of the file, null if invalid |
| * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}) |
| */ |
| public static String getPrefix(final String fileName) { |
| if (fileName == null) { |
| return null; |
| } |
| final int len = getPrefixLength(fileName); |
| if (len < 0) { |
| return null; |
| } |
| if (len > fileName.length()) { |
| requireNonNullChars(fileName); |
| return fileName + UNIX_NAME_SEPARATOR; |
| } |
| return requireNonNullChars(fileName.substring(0, len)); |
| } |
| |
| /** |
| * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * </p> |
| * <p> |
| * The prefix length includes the first slash in the full file name |
| * if applicable. Thus, it is possible that the length returned is greater |
| * than the length of the input string. |
| * </p> |
| * <pre> |
| * Windows: |
| * a\b\c.txt --> 0 --> relative |
| * \a\b\c.txt --> 1 --> current drive absolute |
| * C:a\b\c.txt --> 2 --> drive relative |
| * C:\a\b\c.txt --> 3 --> absolute |
| * \\server\a\b\c.txt --> 9 --> UNC |
| * \\\a\b\c.txt --> -1 --> error |
| * |
| * Unix: |
| * a/b/c.txt --> 0 --> relative |
| * /a/b/c.txt --> 1 --> absolute |
| * ~/a/b/c.txt --> 2 --> current user |
| * ~ --> 2 --> current user (slash added) |
| * ~user/a/b/c.txt --> 6 --> named user |
| * ~user --> 6 --> named user (slash added) |
| * //server/a/b/c.txt --> 9 |
| * ///a/b/c.txt --> -1 --> error |
| * C: --> 0 --> valid file name as only null character and / are reserved characters |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * ie. both UNIX and Windows prefixes are matched regardless. |
| * </p> |
| * <p> |
| * Note that a leading // (or \\) is used to indicate a UNC name on Windows. |
| * These must be followed by a server name, so double-slashes are not collapsed |
| * to a single slash at the start of the file name. |
| * </p> |
| * |
| * @param fileName the file name to find the prefix in, null returns -1 |
| * @return the length of the prefix, -1 if invalid or null |
| */ |
| public static int getPrefixLength(final String fileName) { |
| if (fileName == null) { |
| return NOT_FOUND; |
| } |
| final int len = fileName.length(); |
| if (len == 0) { |
| return 0; |
| } |
| char ch0 = fileName.charAt(0); |
| if (ch0 == ':') { |
| return NOT_FOUND; |
| } |
| if (len == 1) { |
| if (ch0 == '~') { |
| return 2; // return a length greater than the input |
| } |
| return isSeparator(ch0) ? 1 : 0; |
| } |
| if (ch0 == '~') { |
| int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1); |
| int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1); |
| if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { |
| return len + 1; // return a length greater than the input |
| } |
| posUnix = posUnix == NOT_FOUND ? posWin : posUnix; |
| posWin = posWin == NOT_FOUND ? posUnix : posWin; |
| return Math.min(posUnix, posWin) + 1; |
| } |
| final char ch1 = fileName.charAt(1); |
| if (ch1 == ':') { |
| ch0 = Character.toUpperCase(ch0); |
| if (ch0 >= 'A' && ch0 <= 'Z') { |
| if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { |
| return 0; |
| } |
| if (len == 2 || !isSeparator(fileName.charAt(2))) { |
| return 2; |
| } |
| return 3; |
| } |
| if (ch0 == UNIX_NAME_SEPARATOR) { |
| return 1; |
| } |
| return NOT_FOUND; |
| |
| } |
| if (!isSeparator(ch0) || !isSeparator(ch1)) { |
| return isSeparator(ch0) ? 1 : 0; |
| } |
| int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2); |
| int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2); |
| if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { |
| return NOT_FOUND; |
| } |
| posUnix = posUnix == NOT_FOUND ? posWin : posUnix; |
| posWin = posWin == NOT_FOUND ? posUnix : posWin; |
| final int pos = Math.min(posUnix, posWin) + 1; |
| final String hostnamePart = fileName.substring(2, pos - 1); |
| return isValidHostName(hostnamePart) ? pos : NOT_FOUND; |
| } |
| |
| /** |
| * Returns the index of the last extension separator character, which is a dot. |
| * <p> |
| * This method also checks that there is no directory separator after the last dot. To do this it uses |
| * {@link #indexOfLastSeparator(String)} which will handle a file in either UNIX or Windows format. |
| * </p> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on, with the |
| * exception of a possible {@link IllegalArgumentException} on Windows (see below). |
| * </p> |
| * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". |
| * In this case, the name wouldn't be the name of a file, but the identifier of an |
| * alternate data stream (bar.txt) on the file foo.exe. The method used to return |
| * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing |
| * an {@link IllegalArgumentException} for names like this. |
| * |
| * @param fileName |
| * the file name to find the last extension separator in, null returns -1 |
| * @return the index of the last extension separator character, or -1 if there is no such character |
| * @throws IllegalArgumentException <b>Windows only:</b> the file name parameter is, in fact, |
| * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". |
| */ |
| public static int indexOfExtension(final String fileName) throws IllegalArgumentException { |
| if (fileName == null) { |
| return NOT_FOUND; |
| } |
| if (isSystemWindows()) { |
| // Special handling for NTFS ADS: Don't accept colon in the file name. |
| final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); |
| if (offset != -1) { |
| throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); |
| } |
| } |
| final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); |
| final int lastSeparator = indexOfLastSeparator(fileName); |
| return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; |
| } |
| |
| /** |
| * Returns the index of the last directory separator character. |
| * <p> |
| * This method will handle a file in either UNIX or Windows format. |
| * The position of the last forward or backslash is returned. |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * |
| * @param fileName the file name to find the last path separator in, null returns -1 |
| * @return the index of the last separator character, or -1 if there |
| * is no such character |
| */ |
| public static int indexOfLastSeparator(final String fileName) { |
| if (fileName == null) { |
| return NOT_FOUND; |
| } |
| final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR); |
| final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR); |
| return Math.max(lastUnixPos, lastWindowsPos); |
| } |
| |
| private static boolean isEmpty(final String string) { |
| return string == null || string.isEmpty(); |
| } |
| |
| /** |
| * Checks whether the extension of the file name is one of those specified. |
| * <p> |
| * This method obtains the extension as the textual part of the file name |
| * after the last dot. There must be no directory separator after the dot. |
| * The extension check is case-sensitive on all platforms. |
| * |
| * @param fileName the file name, null returns false |
| * @param extensions the extensions to check for, null checks for no extension |
| * @return true if the file name is one of the extensions |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static boolean isExtension(final String fileName, final Collection<String> extensions) { |
| if (fileName == null) { |
| return false; |
| } |
| requireNonNullChars(fileName); |
| |
| if (extensions == null || extensions.isEmpty()) { |
| return indexOfExtension(fileName) == NOT_FOUND; |
| } |
| return extensions.contains(getExtension(fileName)); |
| } |
| |
| /** |
| * Checks whether the extension of the file name is that specified. |
| * <p> |
| * This method obtains the extension as the textual part of the file name |
| * after the last dot. There must be no directory separator after the dot. |
| * The extension check is case-sensitive on all platforms. |
| * |
| * @param fileName the file name, null returns false |
| * @param extension the extension to check for, null or empty checks for no extension |
| * @return true if the file name has the specified extension |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static boolean isExtension(final String fileName, final String extension) { |
| if (fileName == null) { |
| return false; |
| } |
| requireNonNullChars(fileName); |
| |
| if (isEmpty(extension)) { |
| return indexOfExtension(fileName) == NOT_FOUND; |
| } |
| return getExtension(fileName).equals(extension); |
| } |
| |
| /** |
| * Checks whether the extension of the file name is one of those specified. |
| * <p> |
| * This method obtains the extension as the textual part of the file name |
| * after the last dot. There must be no directory separator after the dot. |
| * The extension check is case-sensitive on all platforms. |
| * |
| * @param fileName the file name, null returns false |
| * @param extensions the extensions to check for, null checks for no extension |
| * @return true if the file name is one of the extensions |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static boolean isExtension(final String fileName, final String... extensions) { |
| if (fileName == null) { |
| return false; |
| } |
| requireNonNullChars(fileName); |
| |
| if (extensions == null || extensions.length == 0) { |
| return indexOfExtension(fileName) == NOT_FOUND; |
| } |
| final String fileExt = getExtension(fileName); |
| return Stream.of(extensions).anyMatch(fileExt::equals); |
| } |
| |
| /** |
| * Checks whether a given string represents a valid IPv4 address. |
| * |
| * @param name the name to validate |
| * @return true if the given name is a valid IPv4 address |
| */ |
| // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address |
| private static boolean isIPv4Address(final String name) { |
| final Matcher m = IPV4_PATTERN.matcher(name); |
| if (!m.matches() || m.groupCount() != 4) { |
| return false; |
| } |
| |
| // verify that address subgroups are legal |
| for (int i = 1; i <= 4; i++) { |
| final String ipSegment = m.group(i); |
| final int iIpSegment = Integer.parseInt(ipSegment); |
| if (iIpSegment > IPV4_MAX_OCTET_VALUE) { |
| return false; |
| } |
| |
| if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { |
| return false; |
| } |
| |
| } |
| |
| return true; |
| } |
| |
| // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address |
| /** |
| * Checks whether a given string represents a valid IPv6 address. |
| * |
| * @param inet6Address the name to validate |
| * @return true if the given name is a valid IPv6 address |
| */ |
| private static boolean isIPv6Address(final String inet6Address) { |
| final boolean containsCompressedZeroes = inet6Address.contains("::"); |
| if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) { |
| return false; |
| } |
| if (inet6Address.startsWith(":") && !inet6Address.startsWith("::") |
| || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) { |
| return false; |
| } |
| String[] octets = inet6Address.split(":"); |
| if (containsCompressedZeroes) { |
| final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); |
| if (inet6Address.endsWith("::")) { |
| // String.split() drops ending empty segments |
| octetList.add(""); |
| } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { |
| octetList.remove(0); |
| } |
| octets = octetList.toArray(EMPTY_STRING_ARRAY); |
| } |
| if (octets.length > IPV6_MAX_HEX_GROUPS) { |
| return false; |
| } |
| int validOctets = 0; |
| int emptyOctets = 0; // consecutive empty chunks |
| for (int index = 0; index < octets.length; index++) { |
| final String octet = octets[index]; |
| if (octet.isEmpty()) { |
| emptyOctets++; |
| if (emptyOctets > 1) { |
| return false; |
| } |
| } else { |
| emptyOctets = 0; |
| // Is last chunk an IPv4 address? |
| if (index == octets.length - 1 && octet.contains(".")) { |
| if (!isIPv4Address(octet)) { |
| return false; |
| } |
| validOctets += 2; |
| continue; |
| } |
| if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { |
| return false; |
| } |
| final int octetInt; |
| try { |
| octetInt = Integer.parseInt(octet, BASE_16); |
| } catch (final NumberFormatException e) { |
| return false; |
| } |
| if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { |
| return false; |
| } |
| } |
| validOctets++; |
| } |
| return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); |
| } |
| |
| /** |
| * Checks whether a given string is a valid host name according to |
| * RFC 3986 - not accepting IP addresses. |
| * |
| * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" |
| * @param name the hostname to validate |
| * @return true if the given name is a valid host name |
| */ |
| private static boolean isRFC3986HostName(final String name) { |
| final String[] parts = name.split("\\.", -1); |
| for (int i = 0; i < parts.length; i++) { |
| if (parts[i].isEmpty()) { |
| // trailing dot is legal, otherwise we've hit a .. sequence |
| return i == parts.length - 1; |
| } |
| if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Checks if the character is a separator. |
| * |
| * @param ch the character to check |
| * @return true if it is a separator character |
| */ |
| private static boolean isSeparator(final char ch) { |
| return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR; |
| } |
| |
| /** |
| * Determines if Windows file system is in use. |
| * |
| * @return true if the system is Windows |
| */ |
| static boolean isSystemWindows() { |
| return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR; |
| } |
| |
| /** |
| * Checks whether a given string is a valid host name according to |
| * RFC 3986. |
| * |
| * <p>Accepted are IP addresses (v4 and v6) as well as what the |
| * RFC calls a "reg-name". Percent encoded names don't seem to be |
| * valid names in UNC paths.</p> |
| * |
| * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" |
| * @param name the hostname to validate |
| * @return true if the given name is a valid host name |
| */ |
| private static boolean isValidHostName(final String name) { |
| return isIPv6Address(name) || isRFC3986HostName(name); |
| } |
| |
| /** |
| * Normalizes a path, removing double and single dot path steps. |
| * <p> |
| * This method normalizes a path to a standard format. |
| * The input may contain separators in either UNIX or Windows format. |
| * The output will contain separators in the format of the system. |
| * <p> |
| * A trailing slash will be retained. |
| * A double slash will be merged to a single slash (but UNC names are handled). |
| * A single dot path segment will be removed. |
| * A double dot will cause that path segment and the one before to be removed. |
| * If the double dot has no parent path segment, {@code null} is returned. |
| * <p> |
| * The output will be the same on both UNIX and Windows except |
| * for the separator character. |
| * <pre> |
| * /foo// --> /foo/ |
| * /foo/./ --> /foo/ |
| * /foo/../bar --> /bar |
| * /foo/../bar/ --> /bar/ |
| * /foo/../bar/../baz --> /baz |
| * //foo//./bar --> //foo/bar |
| * /../ --> null |
| * ../foo --> null |
| * foo/bar/.. --> foo/ |
| * foo/../../bar --> null |
| * foo/../bar --> bar |
| * //server/foo/../bar --> //server/bar |
| * //server/../bar --> null |
| * C:\foo\..\bar --> C:\bar |
| * C:\..\bar --> null |
| * ~/foo/../bar/ --> ~/bar/ |
| * ~/../bar --> null |
| * </pre> |
| * (Note the file separator will be correct for Windows/Unix.) |
| * |
| * @param fileName the file name to normalize, null returns null |
| * @return the normalized fileName, or null if invalid |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static String normalize(final String fileName) { |
| return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true); |
| } |
| |
| /** |
| * Normalizes a path, removing double and single dot path steps. |
| * <p> |
| * This method normalizes a path to a standard format. |
| * The input may contain separators in either UNIX or Windows format. |
| * The output will contain separators in the format specified. |
| * <p> |
| * A trailing slash will be retained. |
| * A double slash will be merged to a single slash (but UNC names are handled). |
| * A single dot path segment will be removed. |
| * A double dot will cause that path segment and the one before to be removed. |
| * If the double dot has no parent path segment to work with, {@code null} |
| * is returned. |
| * <p> |
| * The output will be the same on both UNIX and Windows except |
| * for the separator character. |
| * <pre> |
| * /foo// --> /foo/ |
| * /foo/./ --> /foo/ |
| * /foo/../bar --> /bar |
| * /foo/../bar/ --> /bar/ |
| * /foo/../bar/../baz --> /baz |
| * //foo//./bar --> /foo/bar |
| * /../ --> null |
| * ../foo --> null |
| * foo/bar/.. --> foo/ |
| * foo/../../bar --> null |
| * foo/../bar --> bar |
| * //server/foo/../bar --> //server/bar |
| * //server/../bar --> null |
| * C:\foo\..\bar --> C:\bar |
| * C:\..\bar --> null |
| * ~/foo/../bar/ --> ~/bar/ |
| * ~/../bar --> null |
| * </pre> |
| * The output will be the same on both UNIX and Windows including |
| * the separator character. |
| * |
| * @param fileName the file name to normalize, null returns null |
| * @param unixSeparator {@code true} if a UNIX separator should |
| * be used or {@code false} if a Windows separator should be used. |
| * @return the normalized fileName, or null if invalid |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| * @since 2.0 |
| */ |
| public static String normalize(final String fileName, final boolean unixSeparator) { |
| return doNormalize(fileName, toSeparator(unixSeparator), true); |
| } |
| |
| /** |
| * Normalizes a path, removing double and single dot path steps, |
| * and removing any final directory separator. |
| * <p> |
| * This method normalizes a path to a standard format. |
| * The input may contain separators in either UNIX or Windows format. |
| * The output will contain separators in the format of the system. |
| * <p> |
| * A trailing slash will be removed. |
| * A double slash will be merged to a single slash (but UNC names are handled). |
| * A single dot path segment will be removed. |
| * A double dot will cause that path segment and the one before to be removed. |
| * If the double dot has no parent path segment to work with, {@code null} |
| * is returned. |
| * <p> |
| * The output will be the same on both UNIX and Windows except |
| * for the separator character. |
| * <pre> |
| * /foo// --> /foo |
| * /foo/./ --> /foo |
| * /foo/../bar --> /bar |
| * /foo/../bar/ --> /bar |
| * /foo/../bar/../baz --> /baz |
| * //foo//./bar --> /foo/bar |
| * /../ --> null |
| * ../foo --> null |
| * foo/bar/.. --> foo |
| * foo/../../bar --> null |
| * foo/../bar --> bar |
| * //server/foo/../bar --> //server/bar |
| * //server/../bar --> null |
| * C:\foo\..\bar --> C:\bar |
| * C:\..\bar --> null |
| * ~/foo/../bar/ --> ~/bar |
| * ~/../bar --> null |
| * </pre> |
| * (Note the file separator returned will be correct for Windows/Unix) |
| * |
| * @param fileName the file name to normalize, null returns null |
| * @return the normalized fileName, or null if invalid |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static String normalizeNoEndSeparator(final String fileName) { |
| return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false); |
| } |
| |
| /** |
| * Normalizes a path, removing double and single dot path steps, |
| * and removing any final directory separator. |
| * <p> |
| * This method normalizes a path to a standard format. |
| * The input may contain separators in either UNIX or Windows format. |
| * The output will contain separators in the format specified. |
| * <p> |
| * A trailing slash will be removed. |
| * A double slash will be merged to a single slash (but UNC names are handled). |
| * A single dot path segment will be removed. |
| * A double dot will cause that path segment and the one before to be removed. |
| * If the double dot has no parent path segment to work with, {@code null} |
| * is returned. |
| * <p> |
| * The output will be the same on both UNIX and Windows including |
| * the separator character. |
| * <pre> |
| * /foo// --> /foo |
| * /foo/./ --> /foo |
| * /foo/../bar --> /bar |
| * /foo/../bar/ --> /bar |
| * /foo/../bar/../baz --> /baz |
| * //foo//./bar --> /foo/bar |
| * /../ --> null |
| * ../foo --> null |
| * foo/bar/.. --> foo |
| * foo/../../bar --> null |
| * foo/../bar --> bar |
| * //server/foo/../bar --> //server/bar |
| * //server/../bar --> null |
| * C:\foo\..\bar --> C:\bar |
| * C:\..\bar --> null |
| * ~/foo/../bar/ --> ~/bar |
| * ~/../bar --> null |
| * </pre> |
| * |
| * @param fileName the file name to normalize, null returns null |
| * @param unixSeparator {@code true} if a UNIX separator should |
| * be used or {@code false} if a Windows separator should be used. |
| * @return the normalized fileName, or null if invalid |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| * @since 2.0 |
| */ |
| public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { |
| return doNormalize(fileName, toSeparator(unixSeparator), false); |
| } |
| |
| /** |
| * Removes the extension from a fileName. |
| * <p> |
| * This method returns the textual part of the file name before the last dot. |
| * There must be no directory separator after the dot. |
| * <pre> |
| * foo.txt --> foo |
| * .txt --> "" (empty string) |
| * a\b\c.jpg --> a\b\c |
| * /a/b/c.jpg --> /a/b/c |
| * a\b\c --> a\b\c |
| * a.b\c --> a.b\c |
| * </pre> |
| * <p> |
| * The output will be the same irrespective of the machine that the code is running on. |
| * |
| * @param fileName the file name, null returns null |
| * @return the file name minus the extension |
| * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) |
| */ |
| public static String removeExtension(final String fileName) { |
| if (fileName == null) { |
| return null; |
| } |
| requireNonNullChars(fileName); |
| |
| final int index = indexOfExtension(fileName); |
| if (index == NOT_FOUND) { |
| return fileName; |
| } |
| return fileName.substring(0, index); |
| } |
| |
| /** |
| * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions. |
| * |
| * This may be used to defend against poison byte attacks. |
| * |
| * @param path the path to check |
| * @return The input |
| * @throws IllegalArgumentException if path contains the null character ({@code U+0000}) |
| */ |
| private static String requireNonNullChars(final String path) { |
| if (path.indexOf(0) >= 0) { |
| throw new IllegalArgumentException( |
| "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it"); |
| } |
| return path; |
| } |
| |
| /** |
| * Converts all separators to the system separator. |
| * |
| * @param path the path to be changed, null ignored. |
| * @return the updated path. |
| */ |
| public static String separatorsToSystem(final String path) { |
| return FileSystem.getCurrent().normalizeSeparators(path); |
| } |
| |
| /** |
| * Converts all separators to the UNIX separator of forward slash. |
| * |
| * @param path the path to be changed, null ignored. |
| * @return the new path. |
| */ |
| public static String separatorsToUnix(final String path) { |
| return FileSystem.LINUX.normalizeSeparators(path); |
| } |
| |
| /** |
| * Converts all separators to the Windows separator of backslash. |
| * |
| * @param path the path to be changed, null ignored. |
| * @return the updated path. |
| */ |
| public static String separatorsToWindows(final String path) { |
| return FileSystem.WINDOWS.normalizeSeparators(path); |
| } |
| |
| /** |
| * Splits a string into a number of tokens. |
| * The text is split by '?' and '*'. |
| * Where multiple '*' occur consecutively they are collapsed into a single '*'. |
| * |
| * @param text the text to split |
| * @return the array of tokens, never null |
| */ |
| static String[] splitOnTokens(final String text) { |
| // used by wildcardMatch |
| // package level so a unit test may run on this |
| |
| if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { |
| return new String[] { text }; |
| } |
| |
| final char[] array = text.toCharArray(); |
| final ArrayList<String> list = new ArrayList<>(); |
| final StringBuilder buffer = new StringBuilder(); |
| char prevChar = 0; |
| for (final char ch : array) { |
| if (ch == '?' || ch == '*') { |
| if (buffer.length() != 0) { |
| list.add(buffer.toString()); |
| buffer.setLength(0); |
| } |
| if (ch == '?') { |
| list.add("?"); |
| } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' |
| list.add("*"); |
| } |
| } else { |
| buffer.append(ch); |
| } |
| prevChar = ch; |
| } |
| if (buffer.length() != 0) { |
| list.add(buffer.toString()); |
| } |
| |
| return list.toArray(EMPTY_STRING_ARRAY); |
| } |
| |
| /** |
| * Returns '/' if given true, '\\' otherwise. |
| * |
| * @param unixSeparator which separator to return. |
| * @return '/' if given true, '\\' otherwise. |
| */ |
| private static char toSeparator(final boolean unixSeparator) { |
| return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR; |
| } |
| |
| /** |
| * Checks a fileName to see if it matches the specified wildcard matcher, |
| * always testing case-sensitive. |
| * <p> |
| * The wildcard matcher uses the characters '?' and '*' to represent a |
| * single or multiple (zero or more) wildcard characters. |
| * This is the same as often found on DOS/Unix command lines. |
| * The check is case-sensitive always. |
| * <pre> |
| * wildcardMatch("c.txt", "*.txt") --> true |
| * wildcardMatch("c.txt", "*.jpg") --> false |
| * wildcardMatch("a/b/c.txt", "a/b/*") --> true |
| * wildcardMatch("c.txt", "*.???") --> true |
| * wildcardMatch("c.txt", "*.????") --> false |
| * </pre> |
| * N.B. the sequence "*?" does not work properly at present in match strings. |
| * |
| * @param fileName the file name to match on |
| * @param wildcardMatcher the wildcard string to match against |
| * @return true if the file name matches the wildcard string |
| * @see IOCase#SENSITIVE |
| */ |
| public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { |
| return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); |
| } |
| |
| /** |
| * Checks a fileName to see if it matches the specified wildcard matcher |
| * allowing control over case-sensitivity. |
| * <p> |
| * The wildcard matcher uses the characters '?' and '*' to represent a |
| * single or multiple (zero or more) wildcard characters. |
| * N.B. the sequence "*?" does not work properly at present in match strings. |
| * |
| * @param fileName the file name to match on |
| * @param wildcardMatcher the wildcard string to match against |
| * @param ioCase what case sensitivity rule to use, null means case-sensitive |
| * @return true if the file name matches the wildcard string |
| * @since 1.3 |
| */ |
| public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) { |
| if (fileName == null && wildcardMatcher == null) { |
| return true; |
| } |
| if (fileName == null || wildcardMatcher == null) { |
| return false; |
| } |
| ioCase = IOCase.value(ioCase, IOCase.SENSITIVE); |
| final String[] wcs = splitOnTokens(wildcardMatcher); |
| boolean anyChars = false; |
| int textIdx = 0; |
| int wcsIdx = 0; |
| final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); |
| |
| // loop around a backtrack stack, to handle complex * matching |
| do { |
| if (!backtrack.isEmpty()) { |
| final int[] array = backtrack.pop(); |
| wcsIdx = array[0]; |
| textIdx = array[1]; |
| anyChars = true; |
| } |
| |
| // loop whilst tokens and text left to process |
| while (wcsIdx < wcs.length) { |
| |
| if (wcs[wcsIdx].equals("?")) { |
| // ? so move to next text char |
| textIdx++; |
| if (textIdx > fileName.length()) { |
| break; |
| } |
| anyChars = false; |
| |
| } else if (wcs[wcsIdx].equals("*")) { |
| // set any chars status |
| anyChars = true; |
| if (wcsIdx == wcs.length - 1) { |
| textIdx = fileName.length(); |
| } |
| |
| } else { |
| // matching text token |
| if (anyChars) { |
| // any chars then try to locate text token |
| textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); |
| if (textIdx == NOT_FOUND) { |
| // token not found |
| break; |
| } |
| final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); |
| if (repeat >= 0) { |
| backtrack.push(new int[] {wcsIdx, repeat}); |
| } |
| } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { |
| // matching from current position |
| // couldn't match token |
| break; |
| } |
| |
| // matched text token, move text index to end of matched token |
| textIdx += wcs[wcsIdx].length(); |
| anyChars = false; |
| } |
| |
| wcsIdx++; |
| } |
| |
| // full match |
| if (wcsIdx == wcs.length && textIdx == fileName.length()) { |
| return true; |
| } |
| |
| } while (!backtrack.isEmpty()); |
| |
| return false; |
| } |
| |
| /** |
| * Checks a fileName to see if it matches the specified wildcard matcher |
| * using the case rules of the system. |
| * <p> |
| * The wildcard matcher uses the characters '?' and '*' to represent a |
| * single or multiple (zero or more) wildcard characters. |
| * This is the same as often found on DOS/Unix command lines. |
| * The check is case-sensitive on UNIX and case-insensitive on Windows. |
| * <pre> |
| * wildcardMatch("c.txt", "*.txt") --> true |
| * wildcardMatch("c.txt", "*.jpg") --> false |
| * wildcardMatch("a/b/c.txt", "a/b/*") --> true |
| * wildcardMatch("c.txt", "*.???") --> true |
| * wildcardMatch("c.txt", "*.????") --> false |
| * </pre> |
| * N.B. the sequence "*?" does not work properly at present in match strings. |
| * |
| * @param fileName the file name to match on |
| * @param wildcardMatcher the wildcard string to match against |
| * @return true if the file name matches the wildcard string |
| * @see IOCase#SYSTEM |
| */ |
| public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { |
| return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); |
| } |
| |
| /** |
| * Instances should NOT be constructed in standard programming. |
| * |
| * @deprecated TODO Make private in 3.0. |
| */ |
| @Deprecated |
| public FilenameUtils() { |
| // empty |
| } |
| } |