| /******************************************************************************* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| *******************************************************************************/ |
| package org.apache.ofbiz.htmlreport.util; |
| |
| import java.io.UnsupportedEncodingException; |
| import java.net.URLDecoder; |
| import java.net.URLEncoder; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetEncoder; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| /** |
| * The ReportEncoder class provides static methods to decode and encode data.<p> |
| * |
| * The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and |
| * <code>java.net.URLDecoder.decode()</code>.<p> |
| * |
| * The de- and encoding uses the same coding mechanism as JavaScript, special characters are |
| * replaced with <code>%hex</code> where hex is a two digit hex number.<p> |
| * |
| * <b>Note:</b> On the client side (browser) instead of using corresponding <code>escape</code> |
| * and <code>unescape</code> JavaScript functions, better use <code>encodeURIComponent</code> and |
| * <code>decodeURIComponent</code> functions which are work properly with unicode characters. |
| * These functions are supported in IE 5.5+ and NS 6+ only.<p> |
| * |
| */ |
| public final class ReportEncoder { |
| |
| /** Constant for the standard <code>ISO-8859-1</code> encoding. */ |
| public static final String ENCODING_ISO_8859_1 = "ISO-8859-1"; |
| |
| /** Constant for the standard <code>US-ASCII</code> encoding. */ |
| public static final String ENCODING_US_ASCII = "US-ASCII"; |
| |
| /** |
| * Constant for the standard <code>UTF-8</code> encoding.<p> |
| * |
| * Default encoding for JavaScript decodeUriComponent methods is <code>UTF-8</code> by w3c standard. |
| */ |
| public static final String ENCODING_UTF_8 = "UTF-8"; |
| |
| /** The regex pattern to match HTML entities. */ |
| private static final Pattern ENTITIY_PATTERN = Pattern.compile("\\&#\\d+;"); |
| |
| /** The prefix for HTML entities. */ |
| private static final String ENTITY_PREFIX = "&#"; |
| |
| /** The replacement for HTML entity prefix in parameters. */ |
| private static final String ENTITY_REPLACEMENT = "$$"; |
| |
| /** A cache for encoding name lookup. */ |
| private static Map<String, String> encodingCache = new HashMap<String, String>(16); |
| |
| /** The plus entity. */ |
| private static final String PLUS_ENTITY = ENTITY_PREFIX + "043;"; |
| |
| /** |
| * Constructor.<p> |
| */ |
| private ReportEncoder() { |
| // empty |
| } |
| |
| /** |
| * Adjusts the given String by making sure all characters that can be displayed |
| * in the given charset are contained as chars, whereas all other non-displayable |
| * characters are converted to HTML entities.<p> |
| * |
| * Just calls {@link #decodeHtmlEntities(String, String)} first and feeds the result |
| * to {@link #encodeHtmlEntities(String, String)}. <p> |
| * |
| * @param input the input to adjust the HTML encoding for |
| * @param encoding the charset to encode the result with\ |
| * |
| * @return the input with the decoded/encoded HTML entities |
| */ |
| public static String adjustHtmlEncoding(String input, String encoding) { |
| |
| return encodeHtmlEntities(decodeHtmlEntities(input, encoding), encoding); |
| } |
| |
| /** |
| * Changes the encoding of a byte array that represents a String.<p> |
| * |
| * @param input the byte array to convert |
| * @param oldEncoding the current encoding of the byte array |
| * @param newEncoding the new encoding of the byte array |
| * |
| * @return the byte array encoded in the new encoding |
| */ |
| public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) { |
| |
| if ((oldEncoding == null) || (newEncoding == null)) { |
| return input; |
| } |
| if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) { |
| return input; |
| } |
| byte[] result = input; |
| try { |
| result = (new String(input, oldEncoding)).getBytes(newEncoding); |
| } catch (UnsupportedEncodingException e) { |
| // return value will be input value |
| } |
| return result; |
| } |
| |
| /** |
| * Creates a String out of a byte array with the specified encoding, falling back |
| * to the system default in case the encoding name is not valid.<p> |
| * |
| * Use this method as a replacement for <code>new String(byte[], encoding)</code> |
| * to avoid possible encoding problems.<p> |
| * |
| * @param bytes the bytes to decode |
| * @param encoding the encoding scheme to use for decoding the bytes |
| * |
| * @return the bytes decoded to a String |
| */ |
| public static String createString(byte[] bytes, String encoding) { |
| |
| String enc = encoding.intern(); |
| if (enc != ENCODING_UTF_8) { |
| enc = lookupEncoding(enc, null); |
| } |
| if (enc != null) { |
| try { |
| return new String(bytes, enc); |
| } catch (UnsupportedEncodingException e) { |
| // this can _never_ happen since the charset was looked up first |
| } |
| } else { |
| enc = ENCODING_UTF_8; |
| try { |
| return new String(bytes, enc); |
| } catch (UnsupportedEncodingException e) { |
| // this can also _never_ happen since the default encoding is always valid |
| } |
| } |
| // this code is unreachable in practice |
| return null; |
| } |
| |
| /** |
| * Decodes a String using UTF-8 encoding, which is the standard for http data transmission |
| * with GET ant POST requests.<p> |
| * |
| * @param source the String to decode |
| * |
| * @return String the decoded source String |
| */ |
| public static String decode(String source) { |
| |
| return decode(source, ENCODING_UTF_8); |
| } |
| |
| /** |
| * This method is a substitute for <code>URLDecoder.decode()</code>.<p> |
| * |
| * In case you don't know what encoding to use, set the value of |
| * the <code>encoding</code> parameter to <code>null</code>. |
| * This method will then default to UTF-8 encoding, which is probably the right one.<p> |
| * |
| * @param source The string to decode |
| * @param encoding The encoding to use (if null, the system default is used) |
| * |
| * @return The decoded source String |
| */ |
| public static String decode(String source, String encoding) { |
| |
| if (source == null) { |
| return null; |
| } |
| if (encoding != null) { |
| try { |
| return URLDecoder.decode(source, encoding); |
| } catch (java.io.UnsupportedEncodingException e) { |
| // will fallback to default |
| } |
| } |
| // fallback to default decoding |
| try { |
| return URLDecoder.decode(source, ENCODING_UTF_8); |
| } catch (java.io.UnsupportedEncodingException e) { |
| // ignore |
| } |
| return source; |
| } |
| |
| /** |
| * Decodes HTML entity references like <code>&#8364;</code> that are contained in the |
| * String to a regular character, but only if that character is contained in the given |
| * encodings charset.<p> |
| * |
| * @param input the input to decode the HTML entities in |
| * @param encoding the charset to decode the input for |
| * @return the input with the decoded HTML entities |
| * |
| * @see #encodeHtmlEntities(String, String) |
| */ |
| public static String decodeHtmlEntities(String input, String encoding) { |
| |
| Matcher matcher = ENTITIY_PATTERN.matcher(input); |
| StringBuffer result = new StringBuffer(input.length()); |
| Charset charset = Charset.forName(encoding); |
| CharsetEncoder encoder = charset.newEncoder(); |
| |
| while (matcher.find()) { |
| String entity = matcher.group(); |
| String value = entity.substring(2, entity.length() - 1); |
| int c = Integer.valueOf(value).intValue(); |
| if (c < 128) { |
| // first 128 chars are contained in almost every charset |
| entity = new String(new char[] {(char)c}); |
| // this is intended as performance improvement since |
| // the canEncode() operation appears quite CPU heavy |
| } else if (encoder.canEncode((char)c)) { |
| // encoder can encode this char |
| entity = new String(new char[] {(char)c}); |
| } |
| matcher.appendReplacement(result, entity); |
| } |
| matcher.appendTail(result); |
| return result.toString(); |
| } |
| |
| /** |
| * Decodes a string used as parameter in an uri in a way independent of other encodings/decodings applied before.<p> |
| * |
| * @param input the encoded parameter string |
| * |
| * @return the decoded parameter string |
| * |
| * @see #encodeParameter(String) |
| */ |
| public static String decodeParameter(String input) { |
| |
| String result = ReportStringUtil.substitute(input, ENTITY_REPLACEMENT, ENTITY_PREFIX); |
| return ReportEncoder.decodeHtmlEntities(result, ENCODING_UTF_8); |
| } |
| |
| /** |
| * Encodes a String using UTF-8 encoding, which is the standard for http data transmission |
| * with GET ant POST requests.<p> |
| * |
| * @param source the String to encode |
| * |
| * @return String the encoded source String |
| */ |
| public static String encode(String source) { |
| |
| return encode(source, ENCODING_UTF_8); |
| } |
| |
| /** |
| * This method is a substitute for <code>URLEncoder.encode()</code>.<p> |
| * |
| * In case you don't know what encoding to use, set the value of |
| * the <code>encoding</code> parameter to <code>null</code>. |
| * This method will then default to UTF-8 encoding, which is probably the right one.<p> |
| * |
| * @param source the String to encode |
| * @param encoding the encoding to use (if null, the system default is used) |
| * |
| * @return the encoded source String |
| */ |
| public static String encode(String source, String encoding) { |
| |
| if (source == null) { |
| return null; |
| } |
| if (encoding != null) { |
| try { |
| return URLEncoder.encode(source, encoding); |
| } catch (java.io.UnsupportedEncodingException e) { |
| // will fallback to default |
| } |
| } |
| // fallback to default encoding |
| try { |
| return URLEncoder.encode(source, ENCODING_UTF_8); |
| } catch (java.io.UnsupportedEncodingException e) { |
| // ignore |
| } |
| return source; |
| } |
| |
| /** |
| * Encodes all characters that are contained in the String which can not displayed |
| * in the given encodings charset with HTML entity references |
| * like <code>&#8364;</code>.<p> |
| * |
| * This is required since a Java String is |
| * internally always stored as Unicode, meaning it can contain almost every character, but |
| * the HTML charset used might not support all such characters.<p> |
| * |
| * @param input the input to encode for HTML |
| * @param encoding the charset to encode the result with |
| * |
| * @return the input with the encoded HTML entities |
| * |
| * @see #decodeHtmlEntities(String, String) |
| */ |
| public static String encodeHtmlEntities(String input, String encoding) { |
| |
| StringBuffer result = new StringBuffer(input.length() * 2); |
| CharBuffer buffer = CharBuffer.wrap(input.toCharArray()); |
| Charset charset = Charset.forName(encoding); |
| CharsetEncoder encoder = charset.newEncoder(); |
| for (int i = 0; i < buffer.length(); i++) { |
| int c = buffer.get(i); |
| if (c < 128) { |
| // first 128 chars are contained in almost every charset |
| result.append((char)c); |
| // this is intended as performance improvement since |
| // the canEncode() operation appears quite CPU heavy |
| } else if (encoder.canEncode((char)c)) { |
| // encoder can encode this char |
| result.append((char)c); |
| } else { |
| // append HTML entity reference |
| result.append(ENTITY_PREFIX); |
| result.append(c); |
| result.append(";"); |
| } |
| } |
| return result.toString(); |
| } |
| |
| /** |
| * Encodes all characters that are contained in the String which can not displayed |
| * in the given encodings charset with Java escaping like <code>\u20ac</code>.<p> |
| * |
| * This can be used to escape values used in Java property files.<p> |
| * |
| * @param input the input to encode for Java |
| * @param encoding the charset to encode the result with |
| * |
| * @return the input with the encoded Java entities |
| */ |
| public static String encodeJavaEntities(String input, String encoding) { |
| |
| StringBuffer result = new StringBuffer(input.length() * 2); |
| CharBuffer buffer = CharBuffer.wrap(input.toCharArray()); |
| Charset charset = Charset.forName(encoding); |
| CharsetEncoder encoder = charset.newEncoder(); |
| for (int i = 0; i < buffer.length(); i++) { |
| int c = buffer.get(i); |
| if (c < 128) { |
| // first 128 chars are contained in almost every charset |
| result.append((char)c); |
| // this is intended as performance improvement since |
| // the canEncode() operation appears quite CPU heavy |
| } else if (encoder.canEncode((char)c)) { |
| // encoder can encode this char |
| result.append((char)c); |
| } else { |
| // append Java entity reference |
| result.append("\\u"); |
| String hex = Integer.toHexString(c); |
| int pad = 4 - hex.length(); |
| for (int p = 0; p < pad; p++) { |
| result.append('0'); |
| } |
| result.append(hex); |
| } |
| } |
| return result.toString(); |
| } |
| |
| /** |
| * Encodes a string used as parameter in an uri in a way independent of other encodings/decodings applied later.<p> |
| * |
| * Used to ensure that GET parameters are not wrecked by wrong or incompatible configuration settings. |
| * In order to ensure this, the String is first encoded with html entities for any character that cannot encoded |
| * in US-ASCII; additionally, the plus sign is also encoded to avoid problems with the white-space replacer. |
| * Finally, the entity prefix is replaced with characters not used as delimiters in urls.<p> |
| * |
| * @param input the parameter string |
| * |
| * @return the encoded parameter string |
| */ |
| public static String encodeParameter(String input) { |
| |
| String result = ReportEncoder.encodeHtmlEntities(input, ReportEncoder.ENCODING_US_ASCII); |
| result = ReportStringUtil.substitute(result, "+", PLUS_ENTITY); |
| return ReportStringUtil.substitute(result, ENTITY_PREFIX, ENTITY_REPLACEMENT); |
| } |
| |
| /** |
| * Encodes a String in a way that is compatible with the JavaScript escape function. |
| * |
| * @param source The text to be encoded |
| * @param encoding the encoding type |
| * |
| * @return The JavaScript escaped string |
| */ |
| public static String escape(String source, String encoding) { |
| |
| // the blank is encoded into "+" not "%20" when using standard encode call |
| return ReportStringUtil.substitute(encode(source, encoding), "+", "%20"); |
| } |
| |
| /** |
| * Escapes special characters in a HTML-String with their number-based |
| * entity representation, for example & becomes &#38;.<p> |
| * |
| * A character <code>num</code> is replaced if<br> |
| * <code>((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p> |
| * |
| * @param source the String to escape |
| * |
| * @return String the escaped String |
| * |
| * @see #escapeXml(String) |
| */ |
| public static String escapeHtml(String source) { |
| |
| int terminatorIndex; |
| if (source == null) { |
| return null; |
| } |
| StringBuffer result = new StringBuffer(source.length() * 2); |
| for (int i = 0; i < source.length(); i++) { |
| int ch = source.charAt(i); |
| // avoid escaping already escaped characters |
| if (ch == 38) { |
| terminatorIndex = source.indexOf(";", i); |
| if (terminatorIndex > 0) { |
| if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) { |
| result.append(source.substring(i, terminatorIndex + 1)); |
| // Skip remaining chars up to (and including) ";" |
| i = terminatorIndex; |
| continue; |
| } |
| } |
| } |
| if ((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) { |
| result.append(ENTITY_PREFIX); |
| result.append(ch); |
| result.append(";"); |
| } else { |
| result.append((char)ch); |
| } |
| } |
| return new String(result); |
| } |
| |
| /** |
| * Escapes non ASCII characters in a HTML-String with their number-based |
| * entity representation, for example & becomes &#38;.<p> |
| * |
| * A character <code>num</code> is replaced if<br> |
| * <code>(ch > 255)</code><p> |
| * |
| * @param source the String to escape |
| * |
| * @return String the escaped String |
| * |
| * @see #escapeXml(String) |
| */ |
| public static String escapeNonAscii(String source) { |
| |
| if (source == null) { |
| return null; |
| } |
| StringBuffer result = new StringBuffer(source.length() * 2); |
| for (int i = 0; i < source.length(); i++) { |
| int ch = source.charAt(i); |
| if (ch > 255) { |
| result.append(ENTITY_PREFIX); |
| result.append(ch); |
| result.append(";"); |
| } else { |
| result.append((char)ch); |
| } |
| } |
| return new String(result); |
| } |
| |
| /** |
| * Encodes a String in a way that is compatible with the JavaScript escape function. |
| * Multiple blanks are encoded _multiply _with <code>%20</code>.<p> |
| * |
| * @param source The text to be encoded |
| * @param encoding the encoding type |
| * |
| * @return The JavaScript escaped string |
| */ |
| public static String escapeWBlanks(String source, String encoding) { |
| |
| if (ReportStringUtil.isEmpty(source)) { |
| return source; |
| } |
| StringBuffer ret = new StringBuffer(source.length() * 2); |
| |
| // URLEncode the text string |
| // this produces a very similar encoding to JavaSscript encoding, |
| // except the blank which is not encoded into "%20" instead of "+" |
| |
| String enc = encode(source, encoding); |
| for (int z = 0; z < enc.length(); z++) { |
| char c = enc.charAt(z); |
| if (c == '+') { |
| ret.append("%20"); |
| } else { |
| ret.append(c); |
| } |
| } |
| return ret.toString(); |
| } |
| |
| /** |
| * Escapes a String so it may be printed as text content or attribute |
| * value in a HTML page or an XML file.<p> |
| * |
| * This method replaces the following characters in a String: |
| * <ul> |
| * <li><b><</b> with &lt; |
| * <li><b>></b> with &gt; |
| * <li><b>&</b> with &amp; |
| * <li><b>"</b> with &quot; |
| * </ul><p> |
| * |
| * @param source the string to escape |
| * |
| * @return the escaped string |
| * |
| * @see #escapeHtml(String) |
| */ |
| public static String escapeXml(String source) { |
| |
| return escapeXml(source, false); |
| } |
| |
| /** |
| * Escapes a String so it may be printed as text content or attribute |
| * value in a HTML page or an XML file.<p> |
| * |
| * This method replaces the following characters in a String: |
| * <ul> |
| * <li><b><</b> with &lt; |
| * <li><b>></b> with &gt; |
| * <li><b>&</b> with &amp; |
| * <li><b>"</b> with &quot; |
| * </ul><p> |
| * |
| * @param source the string to escape |
| * @param doubleEscape if <code>false</code>, all entities that already are escaped are left untouched |
| * |
| * @return the escaped string |
| * |
| * @see #escapeHtml(String) |
| */ |
| public static String escapeXml(String source, boolean doubleEscape) { |
| |
| if (source == null) { |
| return null; |
| } |
| StringBuffer result = new StringBuffer(source.length() * 2); |
| |
| for (int i = 0; i < source.length(); ++i) { |
| char ch = source.charAt(i); |
| switch (ch) { |
| case '<': |
| result.append("<"); |
| break; |
| case '>': |
| result.append(">"); |
| break; |
| case '&': |
| // don't escape already escaped international and special characters |
| if (!doubleEscape) { |
| int terminatorIndex = source.indexOf(";", i); |
| if (terminatorIndex > 0) { |
| if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+")) { |
| result.append(ch); |
| break; |
| } |
| } |
| } |
| // note that to other "break" in the above "if" block |
| result.append("&"); |
| break; |
| case '"': |
| result.append("""); |
| break; |
| default: |
| result.append(ch); |
| } |
| } |
| return new String(result); |
| } |
| |
| /** |
| * Checks if a given encoding name is actually supported, and if so |
| * resolves it to it's canonical name, if not it returns the given fallback |
| * value.<p> |
| * |
| * Charsets have a set of aliases. For example, valid aliases for "UTF-8" |
| * are "UTF8", "utf-8" or "utf8". This method resolves any given valid charset name |
| * to it's "canonical" form, so that simple String comparison can be used |
| * when checking charset names internally later.<p> |
| * |
| * Please see <a href="http://www.iana.org/assignments/character-sets">http://www.iana.org/assignments/character-sets</a> |
| * for a list of valid charset alias names.<p> |
| * |
| * @param encoding the encoding to check and resolve |
| * @param fallback the fallback encoding scheme |
| * |
| * @return the resolved encoding name, or the fallback value |
| */ |
| public static String lookupEncoding(String encoding, String fallback) { |
| |
| String result = (String) encodingCache.get(encoding); |
| if (result != null) { |
| return result; |
| } |
| |
| try { |
| result = Charset.forName(encoding).name(); |
| encodingCache.put(encoding, result); |
| return result; |
| } catch (Throwable t) { |
| // we will use the default value as fallback |
| } |
| |
| return fallback; |
| } |
| |
| /** |
| * Decodes a String in a way that is compatible with the JavaScript |
| * unescape function.<p> |
| * |
| * @param source The String to be decoded |
| * @param encoding the encoding type |
| * |
| * @return The JavaScript unescaped String |
| */ |
| public static String unescape(String source, String encoding) { |
| |
| if (source == null) { |
| return null; |
| } |
| int len = source.length(); |
| // to use standard decoder we need to replace '+' with "%20" (space) |
| StringBuffer preparedSource = new StringBuffer(len); |
| for (int i = 0; i < len; i++) { |
| char c = source.charAt(i); |
| if (c == '+') { |
| preparedSource.append("%20"); |
| } else { |
| preparedSource.append(c); |
| } |
| } |
| return decode(preparedSource.toString(), encoding); |
| } |
| } |