src/main/java/org/apache/commons/jexl3/parser/StringParser.java - commons-jexl - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.jexl3.parser;

 /**
  * Common constant strings utilities.
  * <p>
  * This package methods read JEXL string literals and handle escaping through the
  * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
  * and double quotes) and read Unicode hexadecimal encoded characters.
  * </p>
  * <p>
  * The only escapable characters are the single and double quotes - ''' and '"' -,
  * a Unicode sequence starting with 'u' followed by 4 hexadecimals and
  * the backslash character - '\' - itself.
  * </p>
  * <p>
  * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
  * sequence output being the same as the input.
  * </p>
  */
 public class StringParser {
     /** Default constructor.  */
     public StringParser() {
     }

     /**
      * Builds a string, handles escaping through '\' syntax.
      * @param str the string to build from
      * @param eatsep whether the separator, the first character, should be considered
      * @return the built string
      */
     public static String buildString(final CharSequence str, final boolean eatsep) {
         final StringBuilder strb = new StringBuilder(str.length());
         final char sep = eatsep ? str.charAt(0) : 0;
         final int end = str.length() - (eatsep ? 1 : 0);
         final int begin = (eatsep ? 1 : 0);
         read(strb, str, begin, end, sep);
         return strb.toString();
     }

     /**
      * Builds a regex pattern string, handles escaping '/' through '\/' syntax.
      * @param str the string to build from
      * @return the built string
      */
     public static String buildRegex(final CharSequence str) {
         return buildString(str.subSequence(1, str.length()), true);
     }

     /**
      * Read the remainder of a string till a given separator,
      * handles escaping through '\' syntax.
      * @param strb the destination buffer to copy characters into
      * @param str the origin
      * @param index the offset into the origin
      * @param sep the separator, single or double quote, marking end of string
      * @return the offset in origin
      */
     public static int readString(final StringBuilder strb, final CharSequence str, final int index, final char sep) {
         return read(strb, str, index, str.length(), sep);
     }
     /** The length of an escaped unicode sequence. */
     private static final int UCHAR_LEN = 4;

     /**
      * Read the remainder of a string till a given separator,
      * handles escaping through '\' syntax.
      * @param strb the destination buffer to copy characters into
      * @param str the origin
      * @param begin the relative offset in str to begin reading
      * @param end the relative offset in str to end reading
      * @param sep the separator, single or double quote, marking end of string
      * @return the last character offset handled in origin
      */
     private static int read(final StringBuilder strb, final CharSequence str, final int begin, final int end, final char sep) {
         boolean escape = false;
         int index = begin;
         for (; index < end; ++index) {
             final char c = str.charAt(index);
             if (escape) {
                 if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) {
                     index += UCHAR_LEN;
                 } else {
                     // if c is not an escapable character, re-emmit the backslash before it
                     final boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep;
                     if (notSeparator && c != '\\') {
                         switch (c) {
                             // http://es5.github.io/x7.html#x7.8.4
                             case 'b': strb.append('\b'); break; // backspace \u0008
                             case 't': strb.append('\t'); break; // horizontal tab \u0009
                             case 'n': strb.append('\n'); break; // line feed \u000A
                             // We don't support vertical tab. If needed, the unicode (\u000B) should be used instead
                             case 'f': strb.append('\f'); break; // form feed \u000C
                             case 'r': strb.append('\r'); break; // carriage return \u000D
                             default: strb.append('\\').append(c);
                         }
                     } else {
                         strb.append(c);
                     }
                 }
                 escape = false;
                 continue;
             }
             if (c == '\\') {
                 escape = true;
                 continue;
             }
             strb.append(c);
             if (c == sep) {
                 break;
             }
         }
         return index;
     }
     /** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */
     private static final int SHIFT = 12;
     /** The base 10 offset used to convert hexa characters to decimal. */
     private static final int BASE10 = 10;

     /**
      * Reads a Unicode escape character.
      * @param strb the builder to write the character to
      * @param str the sequence
      * @param begin the begin offset in sequence (after the '\\u')
      * @return 0 if char could not be read, 4 otherwise
      */
     private static int readUnicodeChar(final StringBuilder strb, final CharSequence str, final int begin) {
         char xc = 0;
         int bits = SHIFT;
         int value = 0;
         for (int offset = 0; offset < UCHAR_LEN; ++offset) {
             final char c = str.charAt(begin + offset);
             if (c >= '0' && c <= '9') {
                 value = (c - '0');
             } else if (c >= 'a' && c <= 'h') {
                 value = (c - 'a' + BASE10);
             } else if (c >= 'A' && c <= 'H') {
                 value = (c - 'A' + BASE10);
             } else {
                 return 0;
             }
             xc |= value << bits;
             bits -= UCHAR_LEN;
         }
         strb.append(xc);
         return UCHAR_LEN;
     }
     /** The last 7bits ascii character. */
     private static final char LAST_ASCII = 127;
     /** The first printable 7bits ascii character. */
     private static final char FIRST_ASCII = 32;

     /**
      * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
      * @param delim the delimiter character
      * @param str the string to escape
      * @return the escaped representation
      */
     public static String escapeString(final String str, final char delim) {
         if (str == null) {
             return null;
         }
         final int length = str.length();
         final StringBuilder strb = new StringBuilder(length + 2);
         strb.append(delim);
         for (int i = 0; i < length; ++i) {
             final char c = str.charAt(i);
             switch (c) {
                 case 0:
                     continue;
                 case '\b':
                     strb.append("\\b");
                     break;
                 case '\t':
                     strb.append("\\t");
                     break;
                 case '\n':
                     strb.append("\\n");
                     break;
                 case '\f':
                     strb.append("\\f");
                     break;
                 case '\r':
                     strb.append("\\r");
                     break;
                 case '\"':
                     strb.append("\\\"");
                     break;
                 case '\'':
                     strb.append("\\\'");
                     break;
                 case '\\':
                     strb.append("\\\\");
                     break;
                 default:
                     if (c >= FIRST_ASCII && c <= LAST_ASCII) {
                         strb.append(c);
                     } else {
                         // convert to Unicode escape sequence
                         strb.append('\\');
                         strb.append('u');
                         final String hex = Integer.toHexString(c);
                         for (int h = hex.length(); h < UCHAR_LEN; ++h) {
                             strb.append('0');
                         }
                         strb.append(hex);
                     }
             }
         }
         strb.append(delim);
         return strb.toString();
     }

     /**
      * Remove escape char ('\') from an identifier.
      * @param str the identifier escaped string, ie with a backslash before space, quote, double-quote and backslash
      * @return the string with no '\\' character
      */
     public static String unescapeIdentifier(final String str) {
         StringBuilder strb = null;
         if (str != null) {
             int n = 0;
             final int last = str.length();
             while (n < last) {
                 final char c = str.charAt(n);
                 if (c == '\\') {
                     if (strb == null) {
                         strb = new StringBuilder(last);
                         strb.append(str.substring(0, n));
                     }
                 } else if (strb != null) {
                     strb.append(c);
                 }
                 n += 1;
             }
         }
         return strb == null ? str : strb.toString();
     }

     /**
      * Adds a escape char ('\') where needed in a string form of an ide
      * @param str the identifier un-escaped string
      * @return the string with added  backslash character before space, quote, double-quote and backslash
      */
     public static String escapeIdentifier(final String str) {
         StringBuilder strb = null;
         if (str != null) {
             int n = 0;
             final int last = str.length();
             while (n < last) {
                 final char c = str.charAt(n);
                 switch (c) {
                     case ' ':
                     case '\'':
                     case '"':
                     case '\\': {
                         if (strb == null) {
                             strb = new StringBuilder(last);
                             strb.append(str.substring(0, n));
                         }
                         strb.append('\\');
                         strb.append(c);
                         break;
                     }
                     default:
                         if (strb != null) {
                             strb.append(c);
                         }
                 }
                 n += 1;
             }
         }
         return strb == null ? str : strb.toString();
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.jexl3.parser;

	/**
	* Common constant strings utilities.
	* <p>
	* This package methods read JEXL string literals and handle escaping through the
	* 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
	* and double quotes) and read Unicode hexadecimal encoded characters.
	* </p>
	* <p>
	* The only escapable characters are the single and double quotes - ''' and '"' -,
	* a Unicode sequence starting with 'u' followed by 4 hexadecimals and
	* the backslash character - '\' - itself.
	* </p>
	* <p>
	* A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
	* sequence output being the same as the input.
	* </p>
	*/
	public class StringParser {
	/** Default constructor. */
	public StringParser() {
	}

	/**
	* Builds a string, handles escaping through '\' syntax.
	* @param str the string to build from
	* @param eatsep whether the separator, the first character, should be considered
	* @return the built string
	*/
	public static String buildString(final CharSequence str, final boolean eatsep) {
	final StringBuilder strb = new StringBuilder(str.length());
	final char sep = eatsep ? str.charAt(0) : 0;
	final int end = str.length() - (eatsep ? 1 : 0);
	final int begin = (eatsep ? 1 : 0);
	read(strb, str, begin, end, sep);
	return strb.toString();
	}

	/**
	* Builds a regex pattern string, handles escaping '/' through '\/' syntax.
	* @param str the string to build from
	* @return the built string
	*/
	public static String buildRegex(final CharSequence str) {
	return buildString(str.subSequence(1, str.length()), true);
	}

	/**
	* Read the remainder of a string till a given separator,
	* handles escaping through '\' syntax.
	* @param strb the destination buffer to copy characters into
	* @param str the origin
	* @param index the offset into the origin
	* @param sep the separator, single or double quote, marking end of string
	* @return the offset in origin
	*/
	public static int readString(final StringBuilder strb, final CharSequence str, final int index, final char sep) {
	return read(strb, str, index, str.length(), sep);
	}
	/** The length of an escaped unicode sequence. */
	private static final int UCHAR_LEN = 4;

	/**
	* Read the remainder of a string till a given separator,
	* handles escaping through '\' syntax.
	* @param strb the destination buffer to copy characters into
	* @param str the origin
	* @param begin the relative offset in str to begin reading
	* @param end the relative offset in str to end reading
	* @param sep the separator, single or double quote, marking end of string
	* @return the last character offset handled in origin
	*/
	private static int read(final StringBuilder strb, final CharSequence str, final int begin, final int end, final char sep) {
	boolean escape = false;
	int index = begin;
	for (; index < end; ++index) {
	final char c = str.charAt(index);
	if (escape) {
	if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) {
	index += UCHAR_LEN;
	} else {
	// if c is not an escapable character, re-emmit the backslash before it
	final boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep;
	if (notSeparator && c != '\\') {
	switch (c) {
	// http://es5.github.io/x7.html#x7.8.4
	case 'b': strb.append('\b'); break; // backspace \u0008
	case 't': strb.append('\t'); break; // horizontal tab \u0009
	case 'n': strb.append('\n'); break; // line feed \u000A
	// We don't support vertical tab. If needed, the unicode (\u000B) should be used instead
	case 'f': strb.append('\f'); break; // form feed \u000C
	case 'r': strb.append('\r'); break; // carriage return \u000D
	default: strb.append('\\').append(c);
	}
	} else {
	strb.append(c);
	}
	}
	escape = false;
	continue;
	}
	if (c == '\\') {
	escape = true;
	continue;
	}
	strb.append(c);
	if (c == sep) {
	break;
	}
	}
	return index;
	}
	/** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */
	private static final int SHIFT = 12;
	/** The base 10 offset used to convert hexa characters to decimal. */
	private static final int BASE10 = 10;

	/**
	* Reads a Unicode escape character.
	* @param strb the builder to write the character to
	* @param str the sequence
	* @param begin the begin offset in sequence (after the '\\u')
	* @return 0 if char could not be read, 4 otherwise
	*/
	private static int readUnicodeChar(final StringBuilder strb, final CharSequence str, final int begin) {
	char xc = 0;
	int bits = SHIFT;
	int value = 0;
	for (int offset = 0; offset < UCHAR_LEN; ++offset) {
	final char c = str.charAt(begin + offset);
	if (c >= '0' && c <= '9') {
	value = (c - '0');
	} else if (c >= 'a' && c <= 'h') {
	value = (c - 'a' + BASE10);
	} else if (c >= 'A' && c <= 'H') {
	value = (c - 'A' + BASE10);
	} else {
	return 0;
	}
	xc \|= value << bits;
	bits -= UCHAR_LEN;
	}
	strb.append(xc);
	return UCHAR_LEN;
	}
	/** The last 7bits ascii character. */
	private static final char LAST_ASCII = 127;
	/** The first printable 7bits ascii character. */
	private static final char FIRST_ASCII = 32;

	/**
	* Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
	* @param delim the delimiter character
	* @param str the string to escape
	* @return the escaped representation
	*/
	public static String escapeString(final String str, final char delim) {
	if (str == null) {
	return null;
	}
	final int length = str.length();
	final StringBuilder strb = new StringBuilder(length + 2);
	strb.append(delim);
	for (int i = 0; i < length; ++i) {
	final char c = str.charAt(i);
	switch (c) {
	case 0:
	continue;
	case '\b':
	strb.append("\\b");
	break;
	case '\t':
	strb.append("\\t");
	break;
	case '\n':
	strb.append("\\n");
	break;
	case '\f':
	strb.append("\\f");
	break;
	case '\r':
	strb.append("\\r");
	break;
	case '\"':
	strb.append("\\\"");
	break;
	case '\'':
	strb.append("\\\'");
	break;
	case '\\':
	strb.append("\\\\");
	break;
	default:
	if (c >= FIRST_ASCII && c <= LAST_ASCII) {
	strb.append(c);
	} else {
	// convert to Unicode escape sequence
	strb.append('\\');
	strb.append('u');
	final String hex = Integer.toHexString(c);
	for (int h = hex.length(); h < UCHAR_LEN; ++h) {
	strb.append('0');
	}
	strb.append(hex);
	}
	}
	}
	strb.append(delim);
	return strb.toString();
	}

	/**
	* Remove escape char ('\') from an identifier.
	* @param str the identifier escaped string, ie with a backslash before space, quote, double-quote and backslash
	* @return the string with no '\\' character
	*/
	public static String unescapeIdentifier(final String str) {
	StringBuilder strb = null;
	if (str != null) {
	int n = 0;
	final int last = str.length();
	while (n < last) {
	final char c = str.charAt(n);
	if (c == '\\') {
	if (strb == null) {
	strb = new StringBuilder(last);
	strb.append(str.substring(0, n));
	}
	} else if (strb != null) {
	strb.append(c);
	}
	n += 1;
	}
	}
	return strb == null ? str : strb.toString();
	}

	/**
	* Adds a escape char ('\') where needed in a string form of an ide
	* @param str the identifier un-escaped string
	* @return the string with added backslash character before space, quote, double-quote and backslash
	*/
	public static String escapeIdentifier(final String str) {
	StringBuilder strb = null;
	if (str != null) {
	int n = 0;
	final int last = str.length();
	while (n < last) {
	final char c = str.charAt(n);
	switch (c) {
	case ' ':
	case '\'':
	case '"':
	case '\\': {
	if (strb == null) {
	strb = new StringBuilder(last);
	strb.append(str.substring(0, n));
	}
	strb.append('\\');
	strb.append(c);
	break;
	}
	default:
	if (strb != null) {
	strb.append(c);
	}
	}
	n += 1;
	}
	}
	return strb == null ? str : strb.toString();
	}
	}