blob: f86c8dd538fe31489abe0bd501637e4bbd3676cb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.jexl3.parser;
/**
* Common constant strings utilities.
* <p>
* This package methods read JEXL string literals and handle escaping through the
* 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
* and double quotes) and read Unicode hexadecimal encoded characters.
* </p>
* <p>
* The only escapable characters are the single and double quotes - ''' and '"' -,
* a Unicode sequence starting with 'u' followed by 4 hexadecimals and
* the backslash character - '\' - itself.
* </p>
* <p>
* A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
* sequence output being the same as the input.
* </p>
*/
public class StringParser {
/** Default constructor. */
public StringParser() {
}
/**
* Builds a string, handles escaping through '\' syntax.
* @param str the string to build from
* @param eatsep whether the separator, the first character, should be considered
* @return the built string
*/
public static String buildString(final CharSequence str, final boolean eatsep) {
final StringBuilder strb = new StringBuilder(str.length());
final char sep = eatsep ? str.charAt(0) : 0;
final int end = str.length() - (eatsep ? 1 : 0);
final int begin = (eatsep ? 1 : 0);
read(strb, str, begin, end, sep);
return strb.toString();
}
/**
* Builds a regex pattern string, handles escaping '/' through '\/' syntax.
* @param str the string to build from
* @return the built string
*/
public static String buildRegex(final CharSequence str) {
return buildString(str.subSequence(1, str.length()), true);
}
/**
* Read the remainder of a string till a given separator,
* handles escaping through '\' syntax.
* @param strb the destination buffer to copy characters into
* @param str the origin
* @param index the offset into the origin
* @param sep the separator, single or double quote, marking end of string
* @return the offset in origin
*/
public static int readString(final StringBuilder strb, final CharSequence str, final int index, final char sep) {
return read(strb, str, index, str.length(), sep);
}
/** The length of an escaped unicode sequence. */
private static final int UCHAR_LEN = 4;
/**
* Read the remainder of a string till a given separator,
* handles escaping through '\' syntax.
* @param strb the destination buffer to copy characters into
* @param str the origin
* @param begin the relative offset in str to begin reading
* @param end the relative offset in str to end reading
* @param sep the separator, single or double quote, marking end of string
* @return the last character offset handled in origin
*/
private static int read(final StringBuilder strb, final CharSequence str, final int begin, final int end, final char sep) {
boolean escape = false;
int index = begin;
for (; index < end; ++index) {
final char c = str.charAt(index);
if (escape) {
if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) {
index += UCHAR_LEN;
} else {
// if c is not an escapable character, re-emmit the backslash before it
final boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep;
if (notSeparator && c != '\\') {
switch (c) {
// http://es5.github.io/x7.html#x7.8.4
case 'b': strb.append('\b'); break; // backspace \u0008
case 't': strb.append('\t'); break; // horizontal tab \u0009
case 'n': strb.append('\n'); break; // line feed \u000A
// We don't support vertical tab. If needed, the unicode (\u000B) should be used instead
case 'f': strb.append('\f'); break; // form feed \u000C
case 'r': strb.append('\r'); break; // carriage return \u000D
default: strb.append('\\').append(c);
}
} else {
strb.append(c);
}
}
escape = false;
continue;
}
if (c == '\\') {
escape = true;
continue;
}
strb.append(c);
if (c == sep) {
break;
}
}
return index;
}
/** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */
private static final int SHIFT = 12;
/** The base 10 offset used to convert hexa characters to decimal. */
private static final int BASE10 = 10;
/**
* Reads a Unicode escape character.
* @param strb the builder to write the character to
* @param str the sequence
* @param begin the begin offset in sequence (after the '\\u')
* @return 0 if char could not be read, 4 otherwise
*/
private static int readUnicodeChar(final StringBuilder strb, final CharSequence str, final int begin) {
char xc = 0;
int bits = SHIFT;
int value = 0;
for (int offset = 0; offset < UCHAR_LEN; ++offset) {
final char c = str.charAt(begin + offset);
if (c >= '0' && c <= '9') {
value = (c - '0');
} else if (c >= 'a' && c <= 'h') {
value = (c - 'a' + BASE10);
} else if (c >= 'A' && c <= 'H') {
value = (c - 'A' + BASE10);
} else {
return 0;
}
xc |= value << bits;
bits -= UCHAR_LEN;
}
strb.append(xc);
return UCHAR_LEN;
}
/** The last 7bits ascii character. */
private static final char LAST_ASCII = 127;
/** The first printable 7bits ascii character. */
private static final char FIRST_ASCII = 32;
/**
* Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
* @param delim the delimiter character
* @param str the string to escape
* @return the escaped representation
*/
public static String escapeString(final String str, final char delim) {
if (str == null) {
return null;
}
final int length = str.length();
final StringBuilder strb = new StringBuilder(length + 2);
strb.append(delim);
for (int i = 0; i < length; ++i) {
final char c = str.charAt(i);
switch (c) {
case 0:
continue;
case '\b':
strb.append("\\b");
break;
case '\t':
strb.append("\\t");
break;
case '\n':
strb.append("\\n");
break;
case '\f':
strb.append("\\f");
break;
case '\r':
strb.append("\\r");
break;
case '\"':
strb.append("\\\"");
break;
case '\'':
strb.append("\\\'");
break;
case '\\':
strb.append("\\\\");
break;
default:
if (c >= FIRST_ASCII && c <= LAST_ASCII) {
strb.append(c);
} else {
// convert to Unicode escape sequence
strb.append('\\');
strb.append('u');
final String hex = Integer.toHexString(c);
for (int h = hex.length(); h < UCHAR_LEN; ++h) {
strb.append('0');
}
strb.append(hex);
}
}
}
strb.append(delim);
return strb.toString();
}
/**
* Remove escape char ('\') from an identifier.
* @param str the identifier escaped string, ie with a backslash before space, quote, double-quote and backslash
* @return the string with no '\\' character
*/
public static String unescapeIdentifier(final String str) {
StringBuilder strb = null;
if (str != null) {
int n = 0;
final int last = str.length();
while (n < last) {
final char c = str.charAt(n);
if (c == '\\') {
if (strb == null) {
strb = new StringBuilder(last);
strb.append(str.substring(0, n));
}
} else if (strb != null) {
strb.append(c);
}
n += 1;
}
}
return strb == null ? str : strb.toString();
}
/**
* Adds a escape char ('\') where needed in a string form of an ide
* @param str the identifier un-escaped string
* @return the string with added backslash character before space, quote, double-quote and backslash
*/
public static String escapeIdentifier(final String str) {
StringBuilder strb = null;
if (str != null) {
int n = 0;
final int last = str.length();
while (n < last) {
final char c = str.charAt(n);
switch (c) {
case ' ':
case '\'':
case '"':
case '\\': {
if (strb == null) {
strb = new StringBuilder(last);
strb.append(str.substring(0, n));
}
strb.append('\\');
strb.append(c);
break;
}
default:
if (strb != null) {
strb.append(c);
}
}
n += 1;
}
}
return strb == null ? str : strb.toString();
}
}