vault-core/src/main/java/org/apache/jackrabbit/vault/util/Text.java - jackrabbit-filevault - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.jackrabbit.vault.util;

 import java.io.ByteArrayOutputStream;
 import java.io.UnsupportedEncodingException;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Properties;

 /**
  * This Class provides some text related utilities
  */
 public class Text {

     /**
      * Hidden constructor.
      */
     private Text() {
     }

     /**
      * used for the md5
      */
     public static final char[] hexTable = "0123456789abcdef".toCharArray();

     /**
      * Calculate an MD5 hash of the string given.
      *
      * @param data the data to encode
      * @param enc  the character encoding to use
      * @return a hex encoded string of the md5 digested input
      * @throws UnsupportedEncodingException if the encoding is not supported
      */
     public static String md5(String data, String enc)
             throws UnsupportedEncodingException {
         try {
             return digest("MD5", data.getBytes(enc));
         } catch (NoSuchAlgorithmException e) {
             throw new InternalError("MD5 digest not available???");
         }
     }

     /**
      * Calculate an MD5 hash of the string given using 'utf-8' encoding.
      *
      * @param data the data to encode
      * @return a hex encoded string of the md5 digested input
      */
     public static String md5(String data) {
         try {
             return md5(data, "utf-8");
         } catch (UnsupportedEncodingException e) {
             throw new InternalError("UTF8 digest not available???");
         }
     }

     /**
      * Digest the plain string using the given algorithm.
      *
      * @param algorithm The alogrithm for the digest. This algorithm must be
      *                  supported by the MessageDigest class.
      * @param data      The plain text String to be digested.
      * @param enc       The character encoding to use
      * @return The digested plain text String represented as Hex digits.
      * @throws java.security.NoSuchAlgorithmException     if the desired algorithm is not supported by
      *                                      the MessageDigest class.
      * @throws java.io.UnsupportedEncodingException if the encoding is not supported
      */
     public static String digest(String algorithm, String data, String enc)
             throws NoSuchAlgorithmException, UnsupportedEncodingException {

         return digest(algorithm, data.getBytes(enc));
     }

     /**
      * Digest the plain string using the given algorithm.
      *
      * @param algorithm The algorithm for the digest. This algorithm must be
      *                  supported by the MessageDigest class.
      * @param data      the data to digest with the given algorithm
      * @return The digested plain text String represented as Hex digits.
      * @throws java.security.NoSuchAlgorithmException if the desired algorithm is not supported by
      *                                  the MessageDigest class.
      */
     public static String digest(String algorithm, byte[] data)
             throws NoSuchAlgorithmException {

         MessageDigest md = MessageDigest.getInstance(algorithm);
         byte[] digest = md.digest(data);
         StringBuffer res = new StringBuffer(digest.length * 2);
         for (byte b : digest) {
             res.append(hexTable[(b >> 4) & 15]);
             res.append(hexTable[b & 15]);
         }
         return res.toString();
     }

     /**
      * returns an array of strings decomposed of the original string, split at
      * every occurrence of 'ch'. if 2 'ch' follow each other with no intermediate
      * characters, empty "" entries are avoided.
      *
      * @param str the string to decompose
      * @param ch  the character to use a split pattern
      * @return an array of strings
      */
     public static String[] explode(String str, int ch) {
         return explode(str, ch, false);
     }

     /**
      * returns an array of strings decomposed of the original string, split at
      * every occurance of 'ch'.
      *
      * @param str          the string to decompose
      * @param ch           the character to use a split pattern
      * @param respectEmpty if {@code true}, empty elements are generated
      * @return an array of strings
      */
     public static String[] explode(String str, int ch, boolean respectEmpty) {
         if (str == null || str.length() == 0) {
             return new String[0];
         }

         ArrayList<String> strings = new ArrayList<String>();
         int pos;
         int lastpos = 0;

         // add snipples
         while ((pos = str.indexOf(ch, lastpos)) >= 0) {
             if (pos - lastpos > 0 || respectEmpty) {
                 strings.add(str.substring(lastpos, pos));
             }
             lastpos = pos + 1;
         }
         // add rest
         if (lastpos < str.length()) {
             strings.add(str.substring(lastpos));
         } else if (respectEmpty && lastpos == str.length()) {
             strings.add("");
         }

         // return string array
         return strings.toArray(new String[strings.size()]);
     }

     /**
      * Concatenates all strings in the string array using the specified delimiter.
      * @param arr The String array
      * @param delim The delimiter
      * @return the concatenated string
      */
     public static String implode(String[] arr, String delim) {
         StringBuffer buf = new StringBuffer();
         for (int i = 0; i < arr.length; i++) {
             if (i > 0) {
                 buf.append(delim);
             }
             buf.append(arr[i]);
         }
         return buf.toString();
     }

     /**
      * Replaces all occurrences of {@code oldString} in {@code text}
      * with {@code newString}.
      *
      * @param text The test to replace
      * @param oldString old substring to be replaced with {@code newString}
      * @param newString new substring to replace occurrences of {@code oldString}
      * @return a string
      */
     public static String replace(String text, String oldString, String newString) {
         if (text == null || oldString == null || newString == null) {
             throw new IllegalArgumentException("null argument");
         }
         int pos = text.indexOf(oldString);
         if (pos == -1) {
             return text;
         }
         int lastPos = 0;
         StringBuffer sb = new StringBuffer(text.length());
         while (pos != -1) {
             sb.append(text.substring(lastPos, pos));
             sb.append(newString);
             lastPos = pos + oldString.length();
             pos = text.indexOf(oldString, lastPos);
         }
         if (lastPos < text.length()) {
             sb.append(text.substring(lastPos));
         }
         return sb.toString();
     }

     /**
      * Replaces illegal XML characters in the given string by their corresponding
      * predefined entity references.
      *
      * @param text text to be escaped
      * @return a string
      */
     public static String encodeIllegalXMLCharacters(String text) {
         if (text == null) {
             throw new IllegalArgumentException("null argument");
         }
         StringBuffer buf = null;
         int length = text.length();
         int pos = 0;
         for (int i = 0; i < length; i++) {
             int ch = text.charAt(i);
             switch (ch) {
                 case '<':
                 case '>':
                 case '&':
                 case '"':
                 case '\'':
                     if (buf == null) {
                         buf = new StringBuffer();
                     }
                     if (i > 0) {
                         buf.append(text.substring(pos, i));
                     }
                     pos = i + 1;
                     break;
                 default:
                     continue;
             }
             if (ch == '<') {
                 buf.append("&lt;");
             } else if (ch == '>') {
                 buf.append("&gt;");
             } else if (ch == '&') {
                 buf.append("&amp;");
             } else if (ch == '"') {
                 buf.append("&quot;");
             } else if (ch == '\'') {
                 buf.append("&apos;");
             }
         }
         if (buf == null) {
             return text;
         } else {
             if (pos < length) {
                 buf.append(text.substring(pos));
             }
             return buf.toString();
         }
     }

     /**
      * The list of characters that are not encoded by the {@code escape()}
      * and {@code unescape()} METHODS. They contains the characters as
      * defined 'unreserved' in section 2.3 of the RFC 2396 'URI generic syntax':
      *
      * <pre>
      * unreserved  = alphanum | mark
      * mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
      * </pre>
      */
     public static BitSet URISave;

     /**
      * Same as {@link #URISave} but also contains the '/'
      */
     public static BitSet URISaveEx;

     static {
         URISave = new BitSet(256);
         int i;
         for (i = 'a'; i <= 'z'; i++) {
             URISave.set(i);
         }
         for (i = 'A'; i <= 'Z'; i++) {
             URISave.set(i);
         }
         for (i = '0'; i <= '9'; i++) {
             URISave.set(i);
         }
         URISave.set('-');
         URISave.set('_');
         URISave.set('.');
         URISave.set('!');
         URISave.set('~');
         URISave.set('*');
         URISave.set('\'');
         URISave.set('(');
         URISave.set(')');

         URISaveEx = (BitSet) URISave.clone();
         URISaveEx.set('/');
     }

     /**
      * Does an URL encoding of the {@code string} using the
      * {@code escape} character. The characters that don't need encoding
      * are those defined 'unreserved' in section 2.3 of the 'URI generic syntax'
      * RFC 2396, but without the escape character.
      *
      * @param string the string to encode.
      * @param escape the escape character.
      * @return the escaped string
      * @throws NullPointerException if {@code string} is {@code null}.
      */
     public static String escape(String string, char escape) {
         return escape(string, escape, false);
     }

     /**
      * Does an URL encoding of the {@code string} using the
      * {@code escape} character. The characters that don't need encoding
      * are those defined 'unreserved' in section 2.3 of the 'URI generic syntax'
      * RFC 2396, but without the escape character. If {@code isPath} is
      * {@code true}, additionally the slash '/' is ignored, too.
      *
      * @param string the string to encode.
      * @param escape the escape character.
      * @param isPath if {@code true}, the string is treated as path
      * @return the escaped string
      * @throws NullPointerException if {@code string} is {@code null}.
      */
     public static String escape(String string, char escape, boolean isPath) {
         try {
             BitSet validChars = isPath ? URISaveEx : URISave;
             byte[] bytes = string.getBytes("utf-8");
             StringBuffer out = new StringBuffer(bytes.length);
             for (byte aByte : bytes) {
                 int c = aByte & 0xff;
                 if (validChars.get(c) && c != escape) {
                     out.append((char) c);
                 } else {
                     out.append(escape);
                     out.append(hexTable[(c >> 4) & 0x0f]);
                     out.append(hexTable[(c) & 0x0f]);
                 }
             }
             return out.toString();
         } catch (UnsupportedEncodingException e) {
             throw new InternalError(e.toString());
         }
     }

     /**
      * Does a URL encoding of the {@code string}. The characters that
      * don't need encoding are those defined 'unreserved' in section 2.3 of
      * the 'URI generic syntax' RFC 2396.
      *
      * @param string the string to encode
      * @return the escaped string
      * @throws NullPointerException if {@code string} is {@code null}.
      */
     public static String escape(String string) {
         return escape(string, '%');
     }

     /**
      * Does a URL encoding of the {@code path}. The characters that
      * don't need encoding are those defined 'unreserved' in section 2.3 of
      * the 'URI generic syntax' RFC 2396. In contrast to the
      * {@link #escape(String)} method, not the entire path string is escaped,
      * but every individual part (i.e. the slashes are not escaped).
      *
      * @param path the path to encode
      * @return the escaped path
      * @throws NullPointerException if {@code path} is {@code null}.
      */
     public static String escapePath(String path) {
         return escape(path, '%', true);
     }

     /**
      * Does a URL decoding of the {@code string} using the
      * {@code escape} character. Please note that in opposite to the
      * {@link java.net.URLDecoder} it does not transform the + into spaces.
      *
      * @param string the string to decode
      * @param escape the escape character
      * @return the decoded string
      * @throws NullPointerException           if {@code string} is {@code null}.
      * @throws IllegalArgumentException       if the 2 characters following the escape
      *                                        character do not represent a hex-number
      *                                        or if not enough characters follow an
      *                                        escape character
      */
     public static String unescape(String string, char escape)  {
         try {
             byte[] utf8 = string.getBytes("utf-8");

             // Check whether escape occurs at invalid position
             if ((utf8.length >= 1 && utf8[utf8.length - 1] == escape) ||
                 (utf8.length >= 2 && utf8[utf8.length - 2] == escape)) {
                 throw new IllegalArgumentException("Premature end of escape sequence at end of input");
             }

             ByteArrayOutputStream out = new ByteArrayOutputStream(utf8.length);
             for (int k = 0; k < utf8.length; k++) {
                 byte b = utf8[k];
                 if (b == escape) {
                     out.write((decodeDigit(utf8[++k]) << 4) + decodeDigit(utf8[++k]));
                 }
                 else {
                     out.write(b);
                 }
             }

             return new String(out.toByteArray(), "utf-8");
         }
         catch (UnsupportedEncodingException e) {
             throw new InternalError(e.toString());
         }
     }

     /**
      * Does a URL decoding of the {@code string}. Please note that in
      * opposite to the {@link java.net.URLDecoder} it does not transform the +
      * into spaces.
      *
      * @param string the string to decode
      * @return the decoded string
      * @throws NullPointerException           if {@code string} is {@code null}.
      * @throws ArrayIndexOutOfBoundsException if not enough character follow an
      *                                        escape character
      * @throws IllegalArgumentException       if the 2 characters following the escape
      *                                        character do not represent a hex-number.
      */
     public static String unescape(String string) {
         return unescape(string, '%');
     }

     /**
      * Escapes all illegal JCR name characters of a string.
      * The encoding is loosely modeled after URI encoding, but only encodes
      * the characters it absolutely needs to in order to make the resulting
      * string a valid JCR name.
      * Use {@link #unescapeIllegalJcrChars(String)} for decoding.
      * <p>
      * QName EBNF:<br>
      * <pre>
      * simplename ::= onecharsimplename | twocharsimplename | threeormorecharname
      * onecharsimplename ::= (* Any Unicode character except: '.', '/', ':', '[', ']', '*', '|' or any whitespace character *)
      * twocharsimplename ::= '.' onecharsimplename | onecharsimplename '.' | onecharsimplename onecharsimplename
      * threeormorecharname ::= nonspace string nonspace
      * string ::= char | string char
      * char ::= nonspace | ' '
      * nonspace ::= (* Any Unicode character except: '/', ':', '[', ']', '*', '|' or any whitespace character *)
      * </pre>
      *
      * @param name the name to escape
      * @return the escaped name
      */
     public static String escapeIllegalJcrChars(String name) {
         StringBuffer buffer = new StringBuffer(name.length() * 2);
         for (int i = 0; i < name.length(); i++) {
             char ch = name.charAt(i);
             if (ch == '%' || ch == '/' || ch == ':' || ch == '[' || ch == ']'
                 || ch == '*' || ch == '|'
                 || (ch == '.' && name.length() < 3)
                 || (ch == ' ' && (i == 0 || i == name.length() - 1))
                 || ch == '\t' || ch == '\r' || ch == '\n') {
                 buffer.append('%');
                 buffer.append(Character.toUpperCase(Character.forDigit(ch / 16, 16)));
                 buffer.append(Character.toUpperCase(Character.forDigit(ch % 16, 16)));
             } else {
                 buffer.append(ch);
             }
         }
         return buffer.toString();
     }

     /**
      * Escapes illegal XPath search characters at the end of a string.
      * <p>Example:<br>
      * A search string like 'test?' will run into a ParseException
      * documented in http://issues.apache.org/jira/browse/JCR-1248
      *
      * @param s the string to encode
      * @return the escaped string
      */
     public static String escapeIllegalXpathSearchChars(String s) {
         StringBuffer sb = new StringBuffer();
         sb.append(s.substring(0, (s.length() - 1)));
         char c = s.charAt(s.length() - 1);
         // NOTE: keep this in sync with _ESCAPED_CHAR below!
         if (c == '!' || c == '(' || c == ':' || c == '^'
             || c == '[' || c == ']' || c == '{' || c == '}' || c == '?') {
             sb.append('\\');
         }
         sb.append(c);
         return sb.toString();
     }

     /**
      * Unescapes previously escaped jcr chars.
      * <p>
      * Please note, that this does not exactly the same as the url related
      * {@link #unescape(String)}, since it handles the byte-encoding
      * differently.
      *
      * @param name the name to unescape
      * @return the unescaped name
      */
     public static String unescapeIllegalJcrChars(String name) {
         StringBuffer buffer = new StringBuffer(name.length());
         int i = name.indexOf('%');
         while (i > -1 && i + 2 < name.length()) {
             buffer.append(name.toCharArray(), 0, i);
             int a = Character.digit(name.charAt(i + 1), 16);
             int b = Character.digit(name.charAt(i + 2), 16);
             if (a > -1 && b > -1) {
                 buffer.append((char) (a * 16 + b));
                 name = name.substring(i + 3);
             } else {
                 buffer.append('%');
                 name = name.substring(i + 1);
             }
             i = name.indexOf('%');
         }
         buffer.append(name);
         return buffer.toString();
     }

     /**
      * Returns the name part of the path. If the given path is already a name
      * (i.e. contains no slashes) it is returned.
      *
      * @param path the path
      * @return the name part or {@code null} if {@code path} is {@code null}.
      */
     public static String getName(String path) {
         return getName(path, '/');
     }

     /**
      * Returns the name part of the path, delimited by the given {@code delim}.
      * If the given path is already a name (i.e. contains no {@code delim}
      * characters) it is returned.
      *
      * @param path the path
      * @param delim the delimiter
      * @return the name part or {@code null} if {@code path} is {@code null}.
      */
     public static String getName(String path, char delim) {
         return path == null
                 ? null
                 : path.substring(path.lastIndexOf(delim) + 1);
     }

     /**
      * Same as {@link #getName(String)} but adding the possibility
      * to pass paths that end with a trailing '/'
      *
      * @param path the path to get the name from
      * @param ignoreTrailingSlash {@code true} to ignore the trailing slash
      * @see #getName(String)
      * @return the name
      */
     public static String getName(String path, boolean ignoreTrailingSlash) {
         if (ignoreTrailingSlash && path != null && path.endsWith("/") && path.length() > 1) {
             path = path.substring(0, path.length()-1);
         }
         return getName(path);
     }

     /**
      * Returns the namespace prefix of the given {@code qname}. If the
      * prefix is missing, an empty string is returned. Please note, that this
      * method does not validate the name or prefix.
      * <p>
      * the qname has the format: qname := [prefix ':'] local;
      *
      * @param qname a qualified name
      * @return the prefix of the name or "".
      *
      * @see #getLocalName(String)
      *
      * @throws NullPointerException if {@code qname} is {@code null}
      */
     public static String getNamespacePrefix(String qname) {
         int pos = qname.indexOf(':');
         return pos >=0 ? qname.substring(0, pos) : "";
     }

     /**
      * Returns the local name of the given {@code qname}. Please note, that
      * this method does not validate the name.
      * <p>
      * the qname has the format: qname := [prefix ':'] local;
      *
      * @param qname a qualified name
      * @return the localname
      *
      * @see #getNamespacePrefix(String)
      *
      * @throws NullPointerException if {@code qname} is {@code null}
      */
     public static String getLocalName(String qname) {
         int pos = qname.indexOf(':');
         return pos >=0 ? qname.substring(pos+1) : qname;
     }

     /**
      * Determines, if two paths denote hierarchical siblins.
      *
      * @param p1 first path
      * @param p2 second path
      * @return true if on same level, false otherwise
      */
     public static boolean isSibling(String p1, String p2) {
         int pos1 = p1.lastIndexOf('/');
         int pos2 = p2.lastIndexOf('/');
         return (pos1 == pos2 && pos1 >= 0 && p1.regionMatches(0, p2, 0, pos1));
     }

     /**
      * Determines if the {@code descendant} path is hierarchical a
      * descendant of {@code path}.
      *
      * @param path     the current path
      * @param descendant the potential descendant
      * @return {@code true} if the {@code descendant} is a descendant;
      *         {@code false} otherwise.
      */
     public static boolean isDescendant(String path, String descendant) {
         String pattern = path.endsWith("/") ? path : path + "/";
         return !pattern.equals(descendant) &&
                 descendant.startsWith(pattern);
     }

     /**
      * Determines if the {@code descendant} path is hierarchical a
      * descendant of {@code path} or equal to it.
      *
      * @param path       the path to check
      * @param descendant the potential descendant
      * @return {@code true} if the {@code descendant} is a descendant
      *         or equal; {@code false} otherwise.
      */
     public static boolean isDescendantOrEqual(String path, String descendant) {
         if (path.equals(descendant)) {
             return true;
         } else {
             String pattern = path.endsWith("/") ? path : path + "/";
             return descendant.startsWith(pattern);
         }
     }

     /**
      * Returns the n<sup>th</sup> relative parent of the path, where n=level.
      * <p>Example:<br>
      * {@code Text.getRelativeParent("/foo/bar/test", 1) == "/foo/bar" }
      *
      * @param path the path of the page
      * @param level  the level of the parent
      * @return the path of the relative parent
      */
     public static String getRelativeParent(String path, int level) {
         int idx = path.length();
         while (level > 0) {
             idx = path.lastIndexOf('/', idx - 1);
             if (idx < 0) {
                 return "";
             }
             level--;
         }
         return (idx == 0) ? "/" : path.substring(0, idx);
     }

     /**
      * Same as {@link #getRelativeParent(String, int)} but adding the possibility
      * to pass paths that end with a trailing '/'
      *
      * @param path The path.
      * @param level The level if the parent.
      * @param ignoreTrailingSlash {@code true} to ignore the trailing slash
      * @return the path of hte paerent
      * @see #getRelativeParent(String, int)
      */
     public static String getRelativeParent(String path, int level, boolean ignoreTrailingSlash) {
         if (ignoreTrailingSlash && path.endsWith("/") && path.length() > 1) {
             path = path.substring(0, path.length()-1);
         }
         return getRelativeParent(path, level);
     }

     /**
      * Returns the n<sup>th</sup> absolute parent of the path, where n=level.
      * <p>Example:<br>
      * {@code Text.getAbsoluteParent("/foo/bar/test", 1) == "/foo/bar" }
      *
      * @param path the path of the page
      * @param level  the level of the parent
      * @return the absolute parent path
      */
     public static String getAbsoluteParent(String path, int level) {
         int idx = 0;
         int len = path.length();
         while (level >= 0 && idx < len) {
             idx = path.indexOf('/', idx + 1);
             if (idx < 0) {
                 idx = len;
             }
             level--;
         }
         return level >= 0 ? "" : path.substring(0, idx);
     }

     /**
      * Performs variable replacement on the given string value.
      * Each {@code ${...}} sequence within the given value is replaced
      * with the value of the named parser variable. If a variable is not found
      * in the properties an IllegalArgumentException is thrown unless
      * {@code ignoreMissing} is {@code true}. In the later case, the
      * missing variable is replaced by the empty string.
      *
      * @param variables the variables to replace
      * @param value         the original value
      * @param ignoreMissing if {@code true}, missing variables are replaced by the empty string.
      * @return value after variable replacements
      * @throws IllegalArgumentException if the replacement of a referenced variable is not found
      */
     public static String replaceVariables(Properties variables, String value,
                                           boolean ignoreMissing)
             throws IllegalArgumentException {
         StringBuffer result = new StringBuffer();

         // Value:
         // +--+-+--------+-+-----------------+
         // |  |p|-->     |q|-->              |
         // +--+-+--------+-+-----------------+
         int p = 0, q = value.indexOf("${");                // Find first ${
         while (q != -1) {
             result.append(value.substring(p, q));          // Text before ${
             p = q;
             q = value.indexOf("}", q + 2);                 // Find }
             if (q != -1) {
                 String variable = value.substring(p + 2, q);
                 String replacement = variables.getProperty(variable);
                 if (replacement == null) {
                     if (ignoreMissing) {
                         replacement = "";
                     } else {
                         throw new IllegalArgumentException(
                                 "Replacement not found for ${" + variable + "}.");
                     }
                 }
                 result.append(replacement);
                 p = q + 1;
                 q = value.indexOf("${", p);                // Find next ${
             }
         }
         result.append(value.substring(p, value.length())); // Trailing text

         return result.toString();
     }

     private static byte decodeDigit(byte b) {
         if (b >= 0x30 && b <= 0x39) {
             return (byte) (b - 0x30);
         }
         else if (b >= 0x41 && b <= 0x46) {
             return (byte) (b - 0x37);
         }
         else if (b >= 0x61 && b <= 0x66) {
             return (byte) (b - 0x57);
         }
         else {
             throw new IllegalArgumentException("Escape sequence is not hexadecimal: " + (char)b);
         }
     }

 }