| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| package flash.util; |
| |
| import java.util.StringTokenizer; |
| import java.io.File; |
| |
| /** |
| * String utilities which exist in JDK 1.4 but are unavailable in JDK 1.3 |
| * <p> |
| * The jakarta oro package is used for regular expressions support. |
| */ |
| public class StringUtils |
| { |
| /** |
| * Splits this string based on the regular expression. |
| * |
| * @param input string to be split |
| * @param regularExpression pattern used for matching |
| * @return Splits this string around matches of the given regular expression. |
| */ |
| public static String[] split(String input, String regularExpression) |
| { |
| return input.split(regularExpression); |
| } |
| |
| /** |
| * Splits this string based on the regular expression. |
| * |
| * @param input string to be split |
| * @param regularExpression pattern used for matching |
| * @param limit maximum number of strings to return |
| * @return Splits this string around matches of the given regular expression. |
| */ |
| public static String[] split(String input, String regularExpression, int limit) |
| { |
| return input.split(regularExpression, limit); |
| } |
| |
| /** |
| * Replaces the first substring of this string that matches the given <a |
| * href="../util/regex/Pattern.html#sum">regular expression</a> with the |
| * given replacement. |
| * |
| * @param regex |
| * the regular expression to which this string is to be matched |
| * |
| * @return The resulting <tt>String</tt> |
| * |
| * @throws IllegalArgumentException |
| * if the regular expression's syntax is invalid |
| * |
| */ |
| |
| public static String replaceFirst(String target, String regex, String replacement) |
| { |
| return target.replaceFirst(regex, replacement); |
| } |
| |
| public static String[] splitPath(String paths) |
| { |
| // [paul] The natural thing for a Java/Linux guy is to |
| // use ":" for ASCLASSPATH, but this makes using the |
| // same web.xml file for Windows and Linux difficult, |
| // so for Linux use File.pathSeparator and ";". |
| if ( File.pathSeparator.equals(";") ) |
| { |
| return StringUtils.split(paths, File.pathSeparator); |
| } |
| else |
| { |
| return StringUtils.split(paths, File.pathSeparator + "|;"); |
| } |
| } |
| |
| public static String[] concat(String[] a, String[] b) |
| { |
| String[] c = new String[a.length+b.length]; |
| System.arraycopy(a, 0, c, 0, a.length); |
| System.arraycopy(b, 0, c, a.length, b.length); |
| return c; |
| } |
| |
| // FIXME: replaceAll is often called with "[^A-Za-z0-9]" regularExpression. Compiling an expression is |
| // very expensive. We should save the compilation of this expression and create a special method in here to use |
| // instead of replaceAll |
| |
| /** |
| * Replace all occurrences in the original string of the oldString with the newString. |
| * |
| * @param input string to be examined |
| * @param regularExpression the string to be replaced |
| * @param replacement the string to replace with |
| * @return string with all occurrences of oldString replaced with newString |
| */ |
| public static String replaceAll(String input, String regularExpression, String replacement) |
| { |
| return input.replaceAll(regularExpression, replacement); |
| } |
| |
| public static String substitute(String str, String from, String to) |
| { |
| if(from == null || from.equals("") || to == null) |
| return str; |
| |
| int index = str.indexOf(from); |
| |
| if(index == -1) |
| return str; |
| |
| StringBuilder buf = new StringBuilder(str.length()); |
| int lastIndex = 0; |
| |
| while(index != -1) { |
| buf.append(str.substring(lastIndex, index)); |
| buf.append(to); |
| lastIndex = index+from.length(); |
| index = str.indexOf(from, lastIndex); |
| } |
| |
| // add in last chunk |
| buf.append(str.substring(lastIndex)); |
| |
| return buf.toString(); |
| } |
| |
| /** |
| * Find the index of the first unescapped (using backslash) character |
| * @param charToFind the character you're searching for |
| * @param n the instance of the character to start searching from |
| * @param s the string containing the character |
| * @return the index of the character, -1 if not found |
| */ |
| public static int findNthUnescaped(char charToFind, int n, String s) |
| { |
| char[] charArray = s.toCharArray(); |
| int count = 0; |
| for (int i = 0; i < charArray.length; ++i) |
| { |
| if (i > 0 && charArray[i-1] == '\\') continue; |
| if (charArray[i] == charToFind) |
| { |
| if (++count == n) |
| { |
| return i; |
| } |
| } |
| } |
| //if we get out of the loop we didn't find the character |
| return -1; |
| } |
| |
| /** |
| * Find the index of the next unescapped (using backslash) character |
| * @param charToFind the character you're searching for |
| * @param startIdx the index to start searching from |
| * @param s the string containing the character |
| * @return the index of the character, -1 if not found |
| */ |
| public static int findNextUnescaped(char charToFind, int startIdx, String s) |
| { |
| if (startIdx >= s.length()) return -1; |
| char[] charArray = s.toCharArray(); |
| for (int i = startIdx; i < charArray.length; ++i) |
| { |
| if (i > 0 && charArray[i-1] == '\\') continue; |
| if (charArray[i] == charToFind) |
| { |
| return i; |
| } |
| } |
| //if we get out of the loop we didn't find the character |
| return -1; |
| } |
| |
| /** |
| * Get rid of backslashes that were escaping the specified character |
| * @param toClean |
| * @return the cleaned string |
| */ |
| public static String cleanupEscapedChar(char escapedChar, String toClean) |
| { |
| //if there's no char to begin with or no escape character we can just return the orig string |
| if (toClean == null || toClean.indexOf(escapedChar) == -1 || toClean.indexOf('\\') == -1) |
| { |
| return toClean; |
| } |
| StringBuilder buf = new StringBuilder(toClean.length()); |
| char[] chars = toClean.toCharArray(); |
| for (int i = 0; i < chars.length - 1; ++i) |
| { |
| if (chars[i] != '\\' || chars[i+1] != escapedChar) |
| { |
| buf.append(chars[i]); |
| } |
| } |
| buf.append(chars[chars.length - 1]); |
| return buf.toString(); |
| } |
| |
| /** |
| * Get rid of backslashes that were escaping the specified character |
| * @param toClean |
| * @return the cleaned string |
| */ |
| public static String cleanupEscapedCharForXML(char escapedChar, String toClean) |
| { |
| //if there's no char to begin with or no escape character we can just return the orig string |
| if (toClean == null || toClean.indexOf(escapedChar) == -1 || toClean.indexOf('\\') == -1) |
| { |
| return toClean; |
| } |
| StringBuilder buf = new StringBuilder(toClean.length()); |
| char[] chars = toClean.toCharArray(); |
| int i; |
| for (i = 0; i < chars.length - 1; ++i) |
| { |
| if (chars[i] != '\\' || chars[i+1] != escapedChar) |
| { |
| buf.append(chars[i]); |
| } else { |
| buf.append("&#x" + Integer.toString((chars[i+1]), 16) + ";"); |
| i++; |
| } |
| } |
| if (i == chars.length - 1) { |
| buf.append(chars[chars.length - 1]); |
| } |
| |
| return buf.toString(); |
| } |
| |
| public static int findClosingToken(char openToken, char closeToken, String s, int startIdx) |
| { |
| int closeIdx = startIdx + 1; |
| int subTokenCount = 0; |
| while (closeIdx < s.length()) |
| { |
| char c = s.charAt(closeIdx); |
| if (s.charAt(closeIdx - 1) == '\\') |
| { |
| ++closeIdx; |
| continue; |
| } |
| if (c == openToken) |
| ++subTokenCount; |
| else if (c == closeToken) |
| --subTokenCount; |
| |
| if (subTokenCount < 0) break; |
| ++closeIdx; |
| } |
| return (closeIdx < s.length()) ? closeIdx : -1; |
| } |
| |
| public static boolean findMatchWithWildcard(String str, String matchStr) |
| { |
| char [] src = str == null ? null : str.toCharArray(); |
| char [] pat = matchStr == null ? null : matchStr.toCharArray(); |
| return findMatchWithWildcard(src, pat); |
| } |
| |
| /** |
| * Sees if src equals pat, also allowing '*' and '?' as wildcards |
| */ |
| public static boolean findMatchWithWildcard(char[] src, char[] pat) |
| { |
| if (src == null || pat == null) |
| return false; |
| |
| // we consider an empty pattern to be a don't-match-anything pattern |
| if (pat.length == 0) |
| return false; |
| |
| if (src.length == 0) |
| return (pat.length == 0 || (pat.length == 1 && (pat[0] == '*' || pat[0] == '?'))); |
| |
| boolean star = false; |
| |
| int srcLen = src.length; |
| int patLen = pat.length; |
| int srcIdx = 0; |
| int patIdx = 0; |
| |
| for( ; srcIdx < srcLen ; srcIdx++) |
| { |
| if (patIdx == patLen) |
| { |
| if (patLen < (srcLen - srcIdx)) |
| patIdx = 0; //Start the search again |
| else |
| return false; |
| } |
| |
| char s = src[srcIdx]; |
| char m = pat[patIdx]; |
| |
| switch(m) |
| { |
| case'*': |
| // star on the end |
| if(patIdx == pat.length-1) |
| return true; |
| star = true; |
| ++patIdx; |
| break; |
| |
| case '?': |
| ++patIdx; |
| break; |
| |
| default: |
| if(s != m) |
| { |
| if(!star) |
| { |
| if (patLen < (srcLen - srcIdx)) |
| patIdx = 0; //Start the search again |
| else |
| return false; |
| } |
| } |
| else |
| { |
| star = false; |
| ++patIdx; |
| } |
| break; |
| } |
| } |
| |
| if(patIdx < patLen) |
| return false; |
| |
| return !star; |
| } |
| |
| /** |
| * Counts the number of lines in the buffer. |
| * |
| * @param buffer |
| * @return |
| */ |
| public static final int countLines(String buffer) |
| { |
| int count = 0; |
| int index = buffer.indexOf('\n'); |
| |
| while ( index != -1 ) |
| { |
| count++; |
| index = buffer.indexOf('\n', index + 1); |
| } |
| |
| return count; |
| } |
| |
| /** |
| * <p>Entitize the given HTML buffer. This process will convert |
| * the following characters into HTML entities: |
| * <dir><pre> |
| * < to < |
| * > to > |
| * </pre></dir> |
| * @param buffer The HTML buffer |
| * @return The converted buffer |
| */ |
| public static String entitizeHtml(String buffer) |
| { |
| if (buffer == null) return buffer; |
| |
| StringBuilder sb = new StringBuilder(); |
| for (int i = 0; i < buffer.length(); i++) |
| { |
| char c = buffer.charAt(i); |
| switch(c) |
| { |
| case '>': |
| sb.append(">"); |
| break; |
| case '<': |
| sb.append("<"); |
| break; |
| default: |
| sb.append(c); |
| } |
| } |
| return sb.toString(); |
| } |
| |
| public static String formatHtml(String buffer) |
| { |
| if (buffer == null) return buffer; |
| |
| StringBuilder sb = new StringBuilder(); |
| for (int i = 0; i < buffer.length(); i++) |
| { |
| char c = buffer.charAt(i); |
| switch(c) |
| { |
| case '\n': |
| sb.append("<br>"); |
| break; |
| // case ' ': |
| // sb.append(" "); |
| // break; |
| default: |
| sb.append(c); |
| } |
| } |
| return sb.toString(); |
| } |
| |
| /** |
| * character escaping. For example, "\u0041-\u0043" returns "\\u0041-\\u0043". |
| * |
| * @param s |
| * @return a formatted string |
| */ |
| public static String formatString(String s) |
| { |
| StringBuilder result = new StringBuilder(s.length() + 2); |
| |
| result.append('"'); |
| for (int i = 0; i < s.length(); i++) |
| { |
| switch (s.charAt(i)) |
| { |
| case '\\': |
| // Leave unicode characters as is. |
| if ((i + 1 < s.length()) && (s.charAt(i + 1) == 'u')) |
| { |
| result.append("\\"); |
| } |
| else |
| { |
| result.append("\\\\"); |
| } |
| break; |
| case '"': |
| result.append("\\\""); |
| break; |
| case '\b': |
| result.append("\\b"); |
| break; |
| case '\t': |
| result.append("\\t"); |
| break; |
| case '\f': |
| result.append("\\f"); |
| break; |
| case '\r': |
| result.append("\\r"); |
| break; |
| case '\n': |
| result.append("\\n"); |
| break; |
| default: |
| if (s.charAt(i) < ' ') |
| { |
| result.append("\\x").append((int) s.charAt(i)).append("X"); |
| } |
| else |
| { |
| result.append(s.charAt(i)); |
| } |
| } |
| } |
| |
| result.append('"'); |
| |
| return result.toString(); |
| } |
| |
| /** |
| * character unescaping. For example, "\u0041-\u0043" becomes "A-C". |
| * |
| * @param s a formatted String |
| * @return a unformated String |
| */ |
| public static String unformatString(String s) |
| { |
| StringBuffer result = new StringBuffer(); |
| int i = 0; |
| |
| while (i < s.length()) |
| { |
| char c = s.charAt(i++); |
| |
| if ((c == '\\') && (i < s.length())) |
| { |
| c = s.charAt(i++); |
| |
| if ((c == 'u') && (i + 3 < s.length())) |
| { |
| // Read the xxxx |
| int value = 0; |
| |
| for (int j = 0; j < 4; j++) |
| { |
| c = s.charAt(i++); |
| int digit = Character.digit(c, 16); |
| |
| if (digit != -1) |
| { |
| value = (value << 4) + digit; |
| } |
| else |
| { |
| throw new IllegalArgumentException("Malformed \\uxxxx encoding."); |
| } |
| } |
| result.append((char) value); |
| } |
| else if (c == 'u') |
| { |
| result.append(c); |
| } |
| else |
| { |
| result.append('\\'); |
| result.append(c); |
| } |
| } |
| else |
| { |
| result.append(c); |
| } |
| } |
| |
| return result.toString(); |
| } |
| |
| // Remove whitespace from the input string and return a string that contains |
| // at most 1 'replacementChar' character between each word. |
| // |
| // Can be used to strip newlines, tabs, multiple spaces, etc between words |
| // and replace them with a single space. |
| // |
| // @param in input string |
| // @param replacementChar character which replaces whitespace |
| // |
| public static String collapseWhitespace(String in, char replacementChar) |
| { |
| StringBuilder sb = new StringBuilder(); |
| int size = in.length(); |
| boolean lastWasSpace = true; |
| int i = 0; |
| while(i < size) |
| { |
| char c = in.charAt(i++); |
| boolean ws = Character.isWhitespace(c); |
| if (ws) |
| { |
| if (lastWasSpace) |
| ; // consume the character |
| else |
| sb.append(replacementChar); |
| lastWasSpace = true; |
| } |
| else |
| { |
| sb.append(c); |
| lastWasSpace = false; |
| } |
| } |
| return sb.toString().trim(); |
| } |
| |
| public static boolean isEnumerationToken(String str, String target, String delimiter) |
| { |
| // C: indexOf() and startsWith() should be faster and create fewer objects... |
| StringTokenizer t = new StringTokenizer(str, delimiter); |
| while (t.hasMoreTokens()) |
| { |
| if (t.nextToken().trim().equals(target)) |
| { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static final char[] hexchars = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; |
| public static String hexDump( byte[] data ) |
| { |
| int cur = 0; |
| |
| StringBuilder buf = new StringBuilder( 1024 ); |
| while (cur < data.length) |
| { |
| for (int i = 0; i < 16; ++i) |
| { |
| if (cur+i < data.length) |
| { |
| buf.append( hexchars[(data[cur+i]>>4)&0xf] ); |
| buf.append( hexchars[(data[cur+i]&0xf)] ); |
| } |
| else |
| { |
| buf.append( "xx" ); |
| } |
| |
| buf.append( (i==7)? '-':' '); |
| } |
| buf.append(" "); |
| for (int i = 0; i < 16; ++i) |
| { |
| if (cur+i <data.length) |
| { |
| if ((data[cur+i] >= ' ') && (data[cur+i] <= '~')) |
| buf.append((char)data[cur+i]); |
| else |
| buf.append('.'); |
| } |
| } |
| buf.append('\n'); |
| cur += 16; |
| } |
| return buf.toString(); |
| } |
| } |