blob: f11f429a98ccd5326aea659d5a171e662484bda8 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.dubbo.admin.registry.common.util;
* <p>Escapes and unescapes <code>String</code>s for
* Java, Java Script, HTML, XML, and SQL.</p>
* @version $Id: 181192 2012-06-21 05:05:47Z tony.chenl $
* @since 2.0
public class StringEscapeUtils {
private static final char CSV_DELIMITER = ',';
private static final char CSV_QUOTE = '"';
private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
private static final char[] CSV_SEARCH_CHARS = new char[]{CSV_DELIMITER, CSV_QUOTE, '\r', '\n'};
* <p><code>StringEscapeUtils</code> instances should NOT be constructed in
* standard programming.</p>
* <p>
* <p>Instead, the class should be used as:
* <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
public StringEscapeUtils() {
// Java and JavaScript
* <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
* <p>
* <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
* <p>
* <p>So a tab becomes the characters <code>'\\'</code> and
* <code>'t'</code>.</p>
* <p>
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote must be escaped.</p>
* <p>
* <p>Example:
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn't say, \"Stop!\"
* </pre>
* </p>
* @param str String to escape values in, may be null
* @return String with escaped values, <code>null</code> if null string input
public static String escapeJava(String str) {
return escapeJavaStyleString(str, false);
* <p>Escapes the characters in a <code>String</code> using Java String rules to
* a <code>Writer</code>.</p>
* <p>
* <p>A <code>null</code> string input has no effect.</p>
* @param out Writer to write escaped string into
* @param str String to escape values in, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
* @see #escapeJava(java.lang.String)
public static void escapeJava(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, false);
* <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
* <p>Escapes any values it finds into their JavaScript String form.
* Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
* <p>
* <p>So a tab becomes the characters <code>'\\'</code> and
* <code>'t'</code>.</p>
* <p>
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote must be escaped.</p>
* <p>
* <p>Example:
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn\'t say, \"Stop!\"
* </pre>
* </p>
* @param str String to escape values in, may be null
* @return String with escaped values, <code>null</code> if null string input
public static String escapeJavaScript(String str) {
return escapeJavaStyleString(str, true);
* <p>Escapes the characters in a <code>String</code> using JavaScript String rules
* to a <code>Writer</code>.</p>
* <p>
* <p>A <code>null</code> string input has no effect.</p>
* @param out Writer to write escaped string into
* @param str String to escape values in, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
* @see #escapeJavaScript(java.lang.String)
public static void escapeJavaScript(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, true);
* <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
* @param str String to escape values in, may be null
* @param escapeSingleQuotes escapes single quotes if <code>true</code>
* @return the escaped string
private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) {
if (str == null) {
return null;
try {
StringWriter writer = new StringWriter(str.length() * 2);
escapeJavaStyleString(writer, str, escapeSingleQuotes);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
return null;
* <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
* @param out write to receieve the escaped string
* @param str String to escape values in, may be null
* @param escapeSingleQuote escapes single quotes if <code>true</code>
* @throws IOException if an IOException occurs
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
if (str == null) {
int sz;
sz = str.length();
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
// handle unicode
if (ch > 0xfff) {
out.write("\\u" + hex(ch));
} else if (ch > 0xff) {
out.write("\\u0" + hex(ch));
} else if (ch > 0x7f) {
out.write("\\u00" + hex(ch));
} else if (ch < 32) {
switch (ch) {
case '\b':
case '\n':
case '\t':
case '\f':
case '\r':
if (ch > 0xf) {
out.write("\\u00" + hex(ch));
} else {
out.write("\\u000" + hex(ch));
} else {
switch (ch) {
case '\'':
if (escapeSingleQuote) {
case '"':
case '\\':
case '/':
* <p>Returns an upper case hexadecimal <code>String</code> for the given
* character.</p>
* @param ch The character to convert.
* @return An upper case hexadecimal <code>String</code>
private static String hex(char ch) {
return Integer.toHexString(ch).toUpperCase();
* <p>Unescapes any Java literals found in the <code>String</code>.
* For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
public static String unescapeJava(String str) {
if (str == null) {
return null;
try {
StringWriter writer = new StringWriter(str.length());
unescapeJava(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
return null;
* <p>Unescapes any Java literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
* <p>
* <p>For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
* <p>
* <p>A <code>null</code> string input has no effect.</p>
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
public static void unescapeJava(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
if (str == null) {
int sz = str.length();
StringBuffer unicode = new StringBuffer(4);
boolean hadSlash = false;
boolean inUnicode = false;
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
if (unicode.length() == 4) {
// unicode now contains the four hex digits
// which represents our unicode character
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
inUnicode = false;
hadSlash = false;
} catch (NumberFormatException nfe) {
throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe);
if (hadSlash) {
// handle an escaped value
hadSlash = false;
switch (ch) {
case '\\':
case '\'':
case '\"':
case 'r':
case 'f':
case 't':
case 'n':
case 'b':
case 'u': {
// uh-oh, we're in unicode country....
inUnicode = true;
} else if (ch == '\\') {
hadSlash = true;
if (hadSlash) {
// then we're in the weird case of a \ at the end of the
// string, let's output it anyway.
* <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
* <p>
* <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
* into a newline character, unless the <code>'\'</code> is preceded by another
* <code>'\'</code>.</p>
* @param str the <code>String</code> to unescape, may be null
* @return A new unescaped <code>String</code>, <code>null</code> if null string input
* @see #unescapeJava(String)
public static String unescapeJavaScript(String str) {
return unescapeJava(str);
* <p>Unescapes any JavaScript literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
* <p>
* <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
* into a newline character, unless the <code>'\'</code> is preceded by another
* <code>'\'</code>.</p>
* <p>
* <p>A <code>null</code> string input has no effect.</p>
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
* @see #unescapeJava(Writer, String)
public static void unescapeJavaScript(Writer out, String str) throws IOException {
unescapeJava(out, str);
// HTML and XML
* <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
* <p>
* <p>
* For example:
* </p>
* <p><code>"bread" & "butter"</code></p>
* becomes:
* <p>
* <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
* </p>
* <p>
* <p>Supports all known HTML 4.0 entities, including funky accents.
* Note that the commonly used apostrophe escape character (&amp;apos;)
* is not a legal entity and so is not supported). </p>
* @param str the <code>String</code> to escape, may be null
* @return a new escaped <code>String</code>, <code>null</code> if null string input
* @see #unescapeHtml(String)
* @see <a href="">ISO Entities</a>
* @see <a href="">HTML 3.2 Character Entities for ISO Latin-1</a>
* @see <a href="">HTML 4.0 Character entity references</a>
* @see <a href="">HTML 4.01 Character References</a>
* @see <a href="">HTML 4.01 Code positions</a>
public static String escapeHtml(String str) {
if (str == null) {
return null;
try {
StringWriter writer = new StringWriter((int) (str.length() * 1.5));
escapeHtml(writer, str);
return writer.toString();
} catch (IOException e) {
//assert false;
//should be impossible
return null;
* <p>Escapes the characters in a <code>String</code> using HTML entities and writes
* them to a <code>Writer</code>.</p>
* <p>
* <p>
* For example:
* </p>
* <code>"bread" & "butter"</code>
* <p>becomes:</p>
* <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
* <p>
* <p>Supports all known HTML 4.0 entities, including funky accents.
* Note that the commonly used apostrophe escape character (&amp;apos;)
* is not a legal entity and so is not supported). </p>
* @param writer the writer receiving the escaped string, not null
* @param string the <code>String</code> to escape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException when <code>Writer</code> passed throws the exception from
* calls to the {@link Writer#write(int)} methods.
* @see #escapeHtml(String)
* @see #unescapeHtml(String)
* @see <a href="">ISO Entities</a>
* @see <a href="">HTML 3.2 Character Entities for ISO Latin-1</a>
* @see <a href="">HTML 4.0 Character entity references</a>
* @see <a href="">HTML 4.01 Character References</a>
* @see <a href="">HTML 4.01 Code positions</a>
public static void escapeHtml(Writer writer, String string) throws IOException {
if (writer == null) {
throw new IllegalArgumentException("The Writer must not be null.");
if (string == null) {
Entities.HTML40.escape(writer, string);
* <p>Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.</p>
* <p>
* <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
* will become "&lt;Fran&ccedil;ais&gt;"</p>
* <p>
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
* become "&gt;&amp;zzzz;x".</p>
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
* @see #escapeHtml(Writer, String)
public static String unescapeHtml(String str) {
if (str == null) {
return null;
try {
StringWriter writer = new StringWriter((int) (str.length() * 1.5));
unescapeHtml(writer, str);
return writer.toString();
} catch (IOException e) {
//assert false;
//should be impossible
return null;
* <p>Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.</p>
* <p>
* <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
* will become "&lt;Fran&ccedil;ais&gt;"</p>
* <p>
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
* become "&gt;&amp;zzzz;x".</p>
* @param writer the writer receiving the unescaped string, not null
* @param string the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if an IOException occurs
* @see #escapeHtml(String)
public static void unescapeHtml(Writer writer, String string) throws IOException {
if (writer == null) {
throw new IllegalArgumentException("The Writer must not be null.");
if (string == null) {
Entities.HTML40.unescape(writer, string);
* <p>Escapes the characters in a <code>String</code> using XML entities.</p>
* <p>
* <p>For example: <tt>"bread" & "butter"</tt> =>
* <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
* </p>
* <p>
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
* <p>
* <p>Note that unicode characters greater than 0x7f are currently escaped to
* their numerical \\u equivalent. This may change in future releases. </p>
* @param writer the writer receiving the unescaped string, not null
* @param str the <code>String</code> to escape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if there is a problem writing
* @see #unescapeXml(java.lang.String)
public static void escapeXml(Writer writer, String str) throws IOException {
if (writer == null) {
throw new IllegalArgumentException("The Writer must not be null.");
if (str == null) {
Entities.XML.escape(writer, str);
* <p>Escapes the characters in a <code>String</code> using XML entities.</p>
* <p>
* <p>For example: <tt>"bread" & "butter"</tt> =>
* <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
* </p>
* <p>
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
* <p>
* <p>Note that unicode characters greater than 0x7f are currently escaped to
* their numerical \\u equivalent. This may change in future releases. </p>
* @param str the <code>String</code> to escape, may be null
* @return a new escaped <code>String</code>, <code>null</code> if null string input
* @see #unescapeXml(java.lang.String)
public static String escapeXml(String str) {
if (str == null) {
return null;
return Entities.XML.escape(str);
* <p>Unescapes a string containing XML entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes.</p>
* <p>
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
* <p>
* <p>Note that numerical \\u unicode codes are unescaped to their respective
* unicode characters. This may change in future releases. </p>
* @param writer the writer receiving the unescaped string, not null
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if there is a problem writing
* @see #escapeXml(String)
public static void unescapeXml(Writer writer, String str) throws IOException {
if (writer == null) {
throw new IllegalArgumentException("The Writer must not be null.");
if (str == null) {
Entities.XML.unescape(writer, str);
* <p>Unescapes a string containing XML entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes.</p>
* <p>
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
* <p>
* <p>Note that numerical \\u unicode codes are unescaped to their respective
* unicode characters. This may change in future releases. </p>
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
* @see #escapeXml(String)
public static String unescapeXml(String str) {
if (str == null) {
return null;
return Entities.XML.unescape(str);