| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to you under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.calcite.util; |
| |
| import com.google.common.collect.Lists; |
| |
| import java.io.PrintWriter; |
| import java.io.Writer; |
| import java.util.ArrayDeque; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Deque; |
| |
| /** |
| * Streaming XML output. |
| * |
| * <p>Use this class to write XML to any streaming source. |
| * While the class itself is unstructured and doesn't enforce any DTD |
| * specification, use of the class |
| * does ensure that the output is syntactically valid XML.</p> |
| */ |
| public class XmlOutput { |
| |
| // This Writer is the underlying output stream to which all XML is |
| // written. |
| private final PrintWriter out; |
| |
| // The tagStack is maintained to check that tags are balanced. |
| private final Deque<String> tagStack = new ArrayDeque<>(); |
| |
| // The class maintains an indentation level to improve output quality. |
| private int indent; |
| |
| // The class also maintains the total number of tags written. This |
| // is used to monitor changes to the output |
| private int tagsWritten; |
| |
| /** Whehter output should be compacted. Compacted output is free of |
| * extraneous whitespace and is designed for easier transport. */ |
| private boolean compact; |
| |
| /** String to write for each indent level; see {@link #setIndentString}. */ |
| private String indentString = "\t"; |
| |
| /** Whether to detect that tags are empty; see {@link #setGlob}. */ |
| private boolean glob; |
| |
| /** |
| * Whether we have started but not finished a start tag. This only happens |
| * if <code>glob</code> is true. The start tag is automatically closed |
| * when we start a child node. If there are no child nodes, {@link #endTag} |
| * creates an empty tag. |
| */ |
| private boolean inTag; |
| |
| /** Whether to always quote CDATA segments (even if they don't contain |
| * special characters); see {@link #setAlwaysQuoteCData}. */ |
| private boolean alwaysQuoteCData; |
| |
| /** Whether to ignore unquoted text, such as whitespace; see |
| * {@link #setIgnorePcdata}. */ |
| private boolean ignorePcdata; |
| |
| /** |
| * Private helper function to display a degree of indentation. |
| * |
| * @param out the PrintWriter to which to display output. |
| * @param indent the degree of indentation. |
| */ |
| private void displayIndent(PrintWriter out, int indent) { |
| if (!compact) { |
| for (int i = 0; i < indent; i++) { |
| out.print(indentString); |
| } |
| } |
| } |
| |
| /** |
| * Constructs a new XmlOutput based on any {@link Writer}. |
| * |
| * @param out the writer to which this XmlOutput generates results. |
| */ |
| public XmlOutput(Writer out) { |
| this(new PrintWriter(out, true)); |
| } |
| |
| /** |
| * Constructs a new XmlOutput based on a {@link PrintWriter}. |
| * |
| * @param out the writer to which this XmlOutput generates results. |
| */ |
| public XmlOutput(PrintWriter out) { |
| this.out = out; |
| indent = 0; |
| tagsWritten = 0; |
| } |
| |
| /** |
| * Sets or unsets the compact mode. Compact mode causes the generated |
| * XML to be free of extraneous whitespace and other unnecessary |
| * characters. |
| * |
| * @param compact true to turn on compact mode, or false to turn it off. |
| */ |
| public void setCompact(boolean compact) { |
| this.compact = compact; |
| } |
| |
| public boolean getCompact() { |
| return compact; |
| } |
| |
| /** |
| * Sets the string to print for each level of indentation. The default is a |
| * tab. The value must not be <code>null</code>. Set this to the empty |
| * string to achieve no indentation (note that |
| * <code>{@link #setCompact}(true)</code> removes indentation <em>and</em> |
| * newlines). |
| */ |
| public void setIndentString(String indentString) { |
| this.indentString = indentString; |
| } |
| |
| /** |
| * Sets whether to detect that tags are empty. |
| */ |
| public void setGlob(boolean glob) { |
| this.glob = glob; |
| } |
| |
| /** |
| * Sets whether to always quote cdata segments (even if they don't contain |
| * special characters). |
| */ |
| public void setAlwaysQuoteCData(boolean alwaysQuoteCData) { |
| this.alwaysQuoteCData = alwaysQuoteCData; |
| } |
| |
| /** |
| * Sets whether to ignore unquoted text, such as whitespace. |
| */ |
| public void setIgnorePcdata(boolean ignorePcdata) { |
| this.ignorePcdata = ignorePcdata; |
| } |
| |
| public boolean getIgnorePcdata() { |
| return ignorePcdata; |
| } |
| |
| /** |
| * Sends a string directly to the output stream, without escaping any |
| * characters. Use with caution! |
| */ |
| public void print(String s) { |
| out.print(s); |
| } |
| |
| /** |
| * Starts writing a new tag to the stream. The tag's name must be given and |
| * its attributes should be specified by a fully constructed AttrVector |
| * object. |
| * |
| * @param tagName the name of the tag to write. |
| * @param attributes an XMLAttrVector containing the attributes to include |
| * in the tag. |
| */ |
| public void beginTag(String tagName, XMLAttrVector attributes) { |
| beginBeginTag(tagName); |
| if (attributes != null) { |
| attributes.display(out, indent); |
| } |
| endBeginTag(tagName); |
| } |
| |
| public void beginBeginTag(String tagName) { |
| if (inTag) { |
| // complete the parent's start tag |
| if (compact) { |
| out.print(">"); |
| } else { |
| out.println(">"); |
| } |
| inTag = false; |
| } |
| displayIndent(out, indent); |
| out.print("<"); |
| out.print(tagName); |
| } |
| |
| public void endBeginTag(String tagName) { |
| if (glob) { |
| inTag = true; |
| } else if (compact) { |
| out.print(">"); |
| } else { |
| out.println(">"); |
| } |
| out.flush(); |
| tagStack.push(tagName); |
| indent++; |
| tagsWritten++; |
| } |
| |
| /** |
| * Writes an attribute. |
| */ |
| public void attribute(String name, String value) { |
| printAtt(out, name, value); |
| } |
| |
| /** |
| * If we are currently inside the start tag, finishes it off. |
| */ |
| public void beginNode() { |
| if (inTag) { |
| // complete the parent's start tag |
| if (compact) { |
| out.print(">"); |
| } else { |
| out.println(">"); |
| } |
| inTag = false; |
| } |
| } |
| |
| /** |
| * Completes a tag. This outputs the end tag corresponding to the |
| * last exposed beginTag. The tag name must match the name of the |
| * corresponding beginTag. |
| * @param tagName the name of the end tag to write. |
| */ |
| public void endTag(String tagName) { |
| // Check that the end tag matches the corresponding start tag |
| String x = tagStack.pop(); |
| assert x.equals(tagName); |
| |
| // Lower the indent and display the end tag |
| indent--; |
| if (inTag) { |
| // we're still in the start tag -- this element had no children |
| if (compact) { |
| out.print("/>"); |
| } else { |
| out.println("/>"); |
| } |
| inTag = false; |
| } else { |
| displayIndent(out, indent); |
| out.print("</"); |
| out.print(tagName); |
| if (compact) { |
| out.print(">"); |
| } else { |
| out.println(">"); |
| } |
| } |
| out.flush(); |
| } |
| |
| /** |
| * Writes an empty tag to the stream. An empty tag is one with no |
| * tags inside it, although it may still have attributes. |
| * |
| * @param tagName the name of the empty tag. |
| * @param attributes an XMLAttrVector containing the attributes to |
| * include in the tag. |
| */ |
| public void emptyTag(String tagName, XMLAttrVector attributes) { |
| if (inTag) { |
| // complete the parent's start tag |
| if (compact) { |
| out.print(">"); |
| } else { |
| out.println(">"); |
| } |
| inTag = false; |
| } |
| displayIndent(out, indent); |
| out.print("<"); |
| out.print(tagName); |
| if (attributes != null) { |
| out.print(" "); |
| attributes.display(out, indent); |
| } |
| |
| if (compact) { |
| out.print("/>"); |
| } else { |
| out.println("/>"); |
| } |
| out.flush(); |
| tagsWritten++; |
| } |
| |
| /** |
| * Writes a CDATA section. Such sections always appear on their own line. |
| * The nature in which the CDATA section is written depends on the actual |
| * string content with respect to these special characters/sequences: |
| * <ul> |
| * <li><code>&</code> |
| * <li><code>"</code> |
| * <li><code>'</code> |
| * <li><code><</code> |
| * <li><code>></code> |
| * </ul> |
| * Additionally, the sequence <code>]]></code> is special. |
| * <ul> |
| * <li>Content containing no special characters will be left as-is. |
| * <li>Content containing one or more special characters but not the |
| * sequence <code>]]></code> will be enclosed in a CDATA section. |
| * <li>Content containing special characters AND at least one |
| * <code>]]></code> sequence will be left as-is but have all of its |
| * special characters encoded as entities. |
| * </ul> |
| * These special treatment rules are required to allow cdata sections |
| * to contain XML strings which may themselves contain cdata sections. |
| * Traditional CDATA sections <b>do not nest</b>. |
| */ |
| public void cdata(String data) { |
| cdata(data, false); |
| } |
| |
| /** |
| * Writes a CDATA section (as {@link #cdata(String)}). |
| * |
| * @param data string to write |
| * @param quote if true, quote in a <code><![CDATA[</code> |
| * ... <code>]]></code> regardless of the content of |
| * <code>data</code>; if false, quote only if the content needs it |
| */ |
| public void cdata(String data, boolean quote) { |
| if (inTag) { |
| // complete the parent's start tag |
| if (compact) { |
| out.print(">"); |
| } else { |
| out.println(">"); |
| } |
| inTag = false; |
| } |
| if (data == null) { |
| data = ""; |
| } |
| boolean specials = false; |
| boolean cdataEnd = false; |
| |
| // Scan the string for special characters |
| // If special characters are found, scan the string for ']]>' |
| if (stringHasXMLSpecials(data)) { |
| specials = true; |
| if (data.contains("]]>")) { |
| cdataEnd = true; |
| } |
| } |
| |
| // Display the result |
| displayIndent(out, indent); |
| if (quote || alwaysQuoteCData) { |
| out.print("<![CDATA["); |
| out.print(data); |
| out.println("]]>"); |
| } else if (!specials) { |
| out.println(data); |
| } else { |
| stringEncodeXML(data, out); |
| out.println(); |
| } |
| out.flush(); |
| tagsWritten++; |
| } |
| |
| /** |
| * Writes a String tag; a tag containing nothing but a CDATA section. |
| */ |
| public void stringTag(String name, String data) { |
| beginTag(name, null); |
| cdata(data); |
| endTag(name); |
| } |
| |
| /** |
| * Writes content. |
| */ |
| public void content(String content) { |
| // This method previously used a LineNumberReader, but that class is |
| // susceptible to a form of DoS attack. It uses lots of memory and CPU if a |
| // malicious client gives it input with very long lines. |
| if (content != null) { |
| indent++; |
| final char[] chars = content.toCharArray(); |
| int prev = 0; |
| for (int i = 0; i < chars.length; i++) { |
| if (chars[i] == '\n' |
| || chars[i] == '\r' |
| && i + 1 < chars.length |
| && chars[i + 1] == '\n') { |
| displayIndent(out, indent); |
| out.println(content.substring(prev, i)); |
| if (chars[i] == '\r') { |
| ++i; |
| } |
| prev = i + 1; |
| } |
| } |
| displayIndent(out, indent); |
| out.println(content.substring(prev, chars.length)); |
| indent--; |
| out.flush(); |
| } |
| tagsWritten++; |
| } |
| |
| /** |
| * Write header. Use default version 1.0. |
| */ |
| public void header() { |
| out.println("<?xml version=\"1.0\" ?>"); |
| out.flush(); |
| tagsWritten++; |
| } |
| |
| /** |
| * Write header, take version as input. |
| */ |
| public void header(String version) { |
| out.print("<?xml version=\""); |
| out.print(version); |
| out.println("\" ?>"); |
| out.flush(); |
| tagsWritten++; |
| } |
| |
| /** |
| * Returns the total number of tags written. |
| * |
| * @return the total number of tags written to the XML stream. |
| */ |
| public int numTagsWritten() { |
| return tagsWritten; |
| } |
| |
| /** Prints an XML attribute name and value for string {@code val}. */ |
| private static void printAtt(PrintWriter pw, String name, String val) { |
| if (val != null /* && !val.equals("") */) { |
| pw.print(" "); |
| pw.print(name); |
| pw.print("=\""); |
| pw.print(escapeForQuoting(val)); |
| pw.print("\""); |
| } |
| } |
| |
| /** |
| * Encode a String for XML output, displaying it to a PrintWriter. |
| * The String to be encoded is displayed, except that |
| * special characters are converted into entities. |
| * @param input a String to convert. |
| * @param out a PrintWriter to which to write the results. |
| */ |
| private static void stringEncodeXML(String input, PrintWriter out) { |
| for (int i = 0; i < input.length(); i++) { |
| char c = input.charAt(i); |
| switch (c) { |
| case '<': |
| case '>': |
| case '"': |
| case '\'': |
| case '&': |
| case '\t': |
| case '\n': |
| case '\r': |
| out.print("&#" + (int) c + ";"); |
| break; |
| default: |
| out.print(c); |
| } |
| } |
| } |
| |
| private static String escapeForQuoting(String val) { |
| return StringEscaper.XML_NUMERIC_ESCAPER.escapeString(val); |
| } |
| |
| /** |
| * Returns whether a string contains any XML special characters. |
| * |
| * <p>If this function returns true, the string will need to be |
| * encoded either using the stringEncodeXML function above or using a |
| * CDATA section. Note that MSXML has a nasty bug whereby whitespace |
| * characters outside of a CDATA section are lost when parsing. To |
| * avoid hitting this bug, this method treats many whitespace characters |
| * as "special".</p> |
| * |
| * @param input the String to scan for XML special characters. |
| * @return true if the String contains any such characters. |
| */ |
| private static boolean stringHasXMLSpecials(String input) { |
| for (int i = 0; i < input.length(); i++) { |
| char c = input.charAt(i); |
| switch (c) { |
| case '<': |
| case '>': |
| case '"': |
| case '\'': |
| case '&': |
| case '\t': |
| case '\n': |
| case '\r': |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Utility for replacing special characters |
| * with escape sequences in strings. |
| * |
| * <p>A StringEscaper starts out as an identity transform in the "mutable" |
| * state. Call {@link #defineEscape} as many times as necessary to set up |
| * mappings, and then call {@link #makeImmutable} before |
| * actually applying the defined transform. Or, |
| * use one of the global mappings pre-defined here.</p> |
| */ |
| static class StringEscaper implements Cloneable { |
| private ArrayList<String> translationVector; |
| private String [] translationTable; |
| |
| public static final StringEscaper XML_ESCAPER; |
| public static final StringEscaper XML_NUMERIC_ESCAPER; |
| public static final StringEscaper HTML_ESCAPER; |
| public static final StringEscaper URL_ARG_ESCAPER; |
| public static final StringEscaper URL_ESCAPER; |
| |
| /** Identity transform. */ |
| StringEscaper() { |
| translationVector = new ArrayList<>(); |
| } |
| |
| /** |
| * Map character "from" to escape sequence "to". |
| */ |
| public void defineEscape(char from, String to) { |
| int i = (int) from; |
| if (i >= translationVector.size()) { |
| // Extend list by adding the requisite number of nulls. |
| final int count = i + 1 - translationVector.size(); |
| translationVector.addAll(Collections.nCopies(count, null)); |
| } |
| translationVector.set(i, to); |
| } |
| |
| /** |
| * Call this before attempting to escape strings; after this, |
| * defineEscape may not be called again. |
| */ |
| public void makeImmutable() { |
| translationTable = |
| translationVector.toArray(new String[0]); |
| translationVector = null; |
| } |
| |
| /** |
| * Apply an immutable transformation to the given string. |
| */ |
| public String escapeString(String s) { |
| StringBuilder sb = null; |
| int n = s.length(); |
| for (int i = 0; i < n; i++) { |
| char c = s.charAt(i); |
| String escape; |
| // codes >= 128 (e.g. Euro sign) are always escaped |
| if (c > 127) { |
| escape = "&#" + Integer.toString(c) + ";"; |
| } else if (c >= translationTable.length) { |
| escape = null; |
| } else { |
| escape = translationTable[c]; |
| } |
| if (escape == null) { |
| if (sb != null) { |
| sb.append(c); |
| } |
| } else { |
| if (sb == null) { |
| sb = new StringBuilder(n * 2); |
| sb.append(s, 0, i); |
| } |
| sb.append(escape); |
| } |
| } |
| |
| if (sb == null) { |
| return s; |
| } else { |
| return sb.toString(); |
| } |
| } |
| |
| protected StringEscaper clone() { |
| StringEscaper clone = new StringEscaper(); |
| if (translationVector != null) { |
| clone.translationVector = new ArrayList<>(translationVector); |
| } |
| if (translationTable != null) { |
| clone.translationTable = translationTable.clone(); |
| } |
| return clone; |
| } |
| |
| /** |
| * Create a mutable escaper from an existing escaper, which may |
| * already be immutable. |
| */ |
| public StringEscaper getMutableClone() { |
| StringEscaper clone = clone(); |
| if (clone.translationVector == null) { |
| clone.translationVector = Lists.newArrayList(clone.translationTable); |
| clone.translationTable = null; |
| } |
| return clone; |
| } |
| |
| static { |
| HTML_ESCAPER = new StringEscaper(); |
| HTML_ESCAPER.defineEscape('&', "&"); |
| HTML_ESCAPER.defineEscape('"', """); |
| // htmlEscaper.defineEscape('\'',"'"); |
| HTML_ESCAPER.defineEscape('\'', "'"); |
| HTML_ESCAPER.defineEscape('<', "<"); |
| HTML_ESCAPER.defineEscape('>', ">"); |
| |
| XML_NUMERIC_ESCAPER = new StringEscaper(); |
| XML_NUMERIC_ESCAPER.defineEscape('&', "&"); |
| XML_NUMERIC_ESCAPER.defineEscape('"', """); |
| XML_NUMERIC_ESCAPER.defineEscape('\'', "'"); |
| XML_NUMERIC_ESCAPER.defineEscape('<', "<"); |
| XML_NUMERIC_ESCAPER.defineEscape('>', ">"); |
| |
| URL_ARG_ESCAPER = new StringEscaper(); |
| URL_ARG_ESCAPER.defineEscape('?', "%3f"); |
| URL_ARG_ESCAPER.defineEscape('&', "%26"); |
| URL_ESCAPER = URL_ARG_ESCAPER.getMutableClone(); |
| URL_ESCAPER.defineEscape('%', "%%"); |
| URL_ESCAPER.defineEscape('"', "%22"); |
| URL_ESCAPER.defineEscape('\r', "+"); |
| URL_ESCAPER.defineEscape('\n', "+"); |
| URL_ESCAPER.defineEscape(' ', "+"); |
| URL_ESCAPER.defineEscape('#', "%23"); |
| |
| HTML_ESCAPER.makeImmutable(); |
| XML_ESCAPER = HTML_ESCAPER; |
| XML_NUMERIC_ESCAPER.makeImmutable(); |
| URL_ARG_ESCAPER.makeImmutable(); |
| URL_ESCAPER.makeImmutable(); |
| } |
| } |
| |
| /** List of attribute names and values. */ |
| static class XMLAttrVector { |
| public void display(PrintWriter out, int indent) { |
| throw new UnsupportedOperationException(); |
| } |
| } |
| } |