src/main/java/org/apache/sling/scripting/javascript/io/EspReader.java - sling-org-apache-sling-scripting-javascript - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.sling.scripting.javascript.io;

 import java.io.FilterReader;
 import java.io.IOException;
 import java.io.PushbackReader;
 import java.io.Reader;
 import java.util.Stack;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
  * The <code>EspReader</code> is a <code>FilterReader</code> which takes
  * JSP like input and produces plain ECMA script output. The filtering
  * modifications done on the input comprise the following :
  * <ul>
  * <li>Template text (HTML) is wrapped by out.write(). At most one line of
  * text is wrapped into a single write() call. Double quote characters in the
  * template text (e.g. for HTML tag attribute values) are escaped.
  * <li>ECMA code is written to the output as is.
  * <li>ECMA slash star (/*) comments are also written as is.
  * <li>ECMA slash slash (//) comments are written as is.
  * <li>JSP style template comments (&lt;%-- --&gt;) are also removed from the
  * stream. Lineendings (LFs and CRLFs) are written, though.
  * <li>HTML comments (&lt;!-- --&gt;) are not treated specially. Rather they are
  * handled as plain template text written to the output wrapped in
  * out.write(). The consequence of this behaviour is, that as in JSP ECMA
  * expressions may be included within the comments.
  * </ul>
  * <p>
  * The nice thing about this reader is, that the line numbers of the resulting
  * stream match the line numbers of the matching contents of the input stream.
  * Due to the insertion of write() calls, column numbers will not necessarily
  * match, though. This is especially true if you mix ECMA code tags (&lt;% %&gt;)
  * with template text on the same line.
  * <p>
  * For maximum performance it is advisable to not create the EspReader with a
  * plain FileReader or InputStreamReader but rather with a BufferedReader based
  * on one of the simpler Readers. The reasons for this is, that we call the base
  * reader character by character. This in turn is not too performing if the base
  * reader does not buffer its input.
  */
 public class EspReader extends FilterReader {

     /** default log */
     private final Logger log = LoggerFactory.getLogger(EspReader.class);

     /**
      * Default parser state. This is the state the parser starts running in. In
      * this state all text is treated as template text, which should be wrapped
      * by out.write() line by line.
      */
     private static final byte PARSE_STATE_ESP = 1;

     /**
      * ECMA script reading state. When in this state everything upto to the next
      * <code>%&gt;</code> is written to the output verbatim with three
      * exceptions : ECMA slash star comments are handed over to handled by the
      * {@link #PARSE_STATE_ECMA_COMMENT} state, quoted strings are handled in
      * the {@link #PARSE_STATE_QUOTE} state and ECMA slash slash comments are
      * handled in {@link #PARSE_STATE_ECMA_COMMENTL} state.
      */
     private static final byte PARSE_STATE_ECMA = 2;

     /**
      * ECMA script expression reading state. This state works exactly the same
      * as the {@link #PARSE_STATE_ECMA} state with one exception: The whole
      * code enclosed in the <code>&lt;%=</code> ... <code>%&gt;</code> tags
      * is itself wrapped with a <code>out.write()</code> statement
      * verbatim.
      */
     private static final byte PARSE_STATE_ECMA_EXPR = 3;

     /**
      * Compact ESP expression syntax similar to JSP Expression Language notation
      */
     private static final byte PARSE_STATE_ECMA_EXPR_COMPACT = 4;

     /**
      * JSP comment reading state. When in this state everything upto the closing
      * <code>--&gt;</code> tag is removed from the stream.
      */
     private static final byte PARSE_STATE_JSP_COMMENT = 5;

     /**
      * ECMA quoted string reading state. When in this state everything is
      * written exactly as in the input stream upto the closing quote, which
      * matches the opening quote.
      */
     private static final byte PARSE_STATE_QUOTE = 6;

     /**
      * Verbatim copy state. When in this state as many as verbatimChars
      * characters are returned unchecked. As soon as this number of characters
      * is returned, the last state is popped from the stack. This state is
      * mainly used to (re-)inject static text into the output without further
      * processing.
      */
     private static final byte PARSE_STATE_VERBATIM = 7;

     /**
      * ECMA Comment reading state. When in this state, an ECMA slash star
      * comment is read (and completely returned).
      */
     private static final byte PARSE_STATE_ECMA_COMMENT = 8;

     /**
      * ECMA Comment reading state. When in this state, an ECMA slash slash
      * comment is read (and completely returned).
      */
     private static final byte PARSE_STATE_ECMA_COMMENTL = 9;

     /**
      * To work with lookahead and character insertion, we use a PushbackReader.
      */
     private PushbackReader input;

     /**
      * Current parse state. This field contains one of the
      * <code>PARSE_STATE</code> constants.
      */
     private byte state;

     /**
      * Stack of states. Whenever we enter a new state, the old state is pushed
      * onto the stack. When a state is left, the previous one is popped from the
      * stack.
      *
      * @see #pushState(byte)
      * @see #popState()
      * @see #state
      */
     private Stack<Byte> stateStack;

     /**
      * This value is set to true, if the parser is expected to insert a
      * out.write() call into the input stream when in state
      * {@link #PARSE_STATE_ESP}. When this field is true, it is not
      * necessairily the case, that we are at the start of a real text line.
      */
     private boolean lineStart;

     /**
      * If characters are put into the pushback Stream that should be given back
      * verbatim, this value is set to the number of such consecutive characters.
      */
     private int verbatimChars;

     /**
      * During String matching this is the character used for string quoting.
      */
     private char quoteChar;

     /**
      * Set to true if an escape character (\) has been encountered within a
      * quoted string.
      */
     private boolean escape;

     /**
      * Whether the definition of the out variable has already been written or not.
      * The initial value is <code>true</code> indicating it has still to be
      * defined.
      *
      * @see #startWrite(String)
      */
     private boolean outUndefined = true;

     /**
      * Javascript statement that sets the "out" variable that's used
      * to output data. Automatically inserted by the reader in code,
      * where needed.
      */
     public static final String DEFAULT_OUT_INIT_STATEMENT = "out=response.writer;";
     private String outInitStatement = DEFAULT_OUT_INIT_STATEMENT;

     /**
      * Create an EspReader on top of the given <code>baseReader</code>. The
      * constructor wraps the input reader with a <code>PushbackReader</code>,
      * so that input stream modifications may be handled transparently by our
      * {@link #doRead()} method.
      *
      * @param baseReader the wrapped reader
      */
     public EspReader(Reader baseReader) {
         super(baseReader);
         this.input = new PushbackReader(baseReader, 100);
         this.stateStack = new Stack<Byte>();
         this.lineStart = true;
         this.verbatimChars = -1;
         this.quoteChar = 0;
         this.escape = false;

         // Start in ESP (template text) state
         pushState(PARSE_STATE_ESP);
     }

     /**
      * Set the code fragment used to initialize the "out" variable
      *
      * @param statement the statement used for initialization
      */
     public void setOutInitStatement(String statement) {
         outInitStatement = statement;
     }

     /**
      * Check whether we may block at the next read() operation. We may be ready
      * if and only if our input reader is ready. But this does not guarantee
      * that we won't block, as due to filtering there may be more than one
      * character needed from the input to return one.
      *
      * @return <code>true</code> if a character is available on the
      *         <code>PushbackReader</code>.
      * @throws IOException if the reader is not open
      */
     public boolean ready() throws IOException {
         ensureOpen();
         return input.ready();
     }

     /**
      * Return the next filtered character. This need not be the next character
      * of the input stream. It may be a character from the input reader, after
      * having skipped filtered characters or it may be a character injected due
      * to translation of template text to ECMA code.
      *
      * @return the next character after filtering or -1 at the end of the input
      *         reader
      * @throws IOException if the reader is not open
      */
     public int read() throws IOException {
         ensureOpen();
         return doRead();
     }

     /**
      * Fill the given buffer with filtered or injected characters. This need not
      * be the next characters of the input stream. It may be characters from the
      * input reader, after having skipped filtered characters or it may be a
      * characters injected due to translation of template text to ECMA code.
      * This method is exactly the same as
      * <code>read(cbuf, 0, cbuf.length)</code>.
      *
      * @param cbuf The character buffer to fill with (filtered) characters
      * @return the number of characters filled in the buffer or -1 at the end of
      *         the input reader.
      * @throws IOException if the reader is not open
      */
     public int read(char[] cbuf) throws IOException {
         return read(cbuf, 0, cbuf.length);
     }

     /**
      * Fill the buffer from the offset with the number of characters given. This
      * need not be the next characters of the input stream. It may be characters
      * from the input reader, after having skipped filtered characters or it may
      * be a characters injected due to translation of template text to ECMA
      * code.
      *
      * @param cbuf The character buffer to fill with (filtered) characters
      * @param off Offset from where to start in the buffer
      * @param len The number of characters to fill into the buffer
      * @return the number of characters filled in the buffer or -1 at the end of
      *         the input reader.
      * @throws IOException if the reader is not open
      * @throws IndexOutOfBoundsException if len is negative, off is negative or
      *             higher than the buffer length or off+len is negative or
      *             beyond the buffer size.
      */
     public int read(char[] cbuf, int off, int len) throws java.io.IOException {
         ensureOpen();

         // Check lines (taken from InputStreamReader ;-)
         if ((off < 0) || (off > cbuf.length) || (len < 0)
             || ((off + len) > cbuf.length) || ((off + len) < 0)) {
             throw new IndexOutOfBoundsException();
         } else if (len == 0) {
             return 0;
         }

         int i;
         for (i = 0; i < len; i++, off++) {
             int c = doRead();
             if (c < 0) {
                 break;
             }
             cbuf[off] = (char) c;
         }

         // return EOF (-1) if none have been read, else return the number read
         return (i == 0) ? -1 : i;
     }

     /**
      * Skip the number of filtered characters. The skip method is the same as
      * calling read() repeatedly for the given number of characters and throwing
      * away the result. If the end of input reader is reached before having
      * skipped the number of characters, the method returns the number
      * characters skipped so far.
      *
      * @param n the number of (filtered) characters to skip
      * @return the number of (filtered) characters actually skipped
      * @throws IllegalArgumentException if n is negative
      * @throws IOException if the reading the characters throws
      */
     public long skip(long n) throws IOException {
         if (n < 0L) {
             throw new IllegalArgumentException("skip value is negative");
         }

         long i = -1;
         while (++i < n) {
             if (doRead() < 0) {
                 break;
             }
         }
         return i;
     }

     /**
      * Close the EspReader.
      */
     public void close() throws java.io.IOException {
         if (input != null) {
             input.close();
             input = null;
         }

         // I dont' know what happens ??
         super.close();
     }

     /**
      * Mark the present position in the stream. The <code>mark</code> for
      * class <code>EspReader</code> always throws an throwable.
      *
      * @param readAheadLimit The number of characters to read ahead
      * @exception IOException Always, since mark is not supported
      */
     public void mark(int readAheadLimit) throws IOException {
         throw new IOException("mark() not supported");
     }

     /**
      * Tell whether this stream supports the mark() operation, which it does
      * not.
      *
      * @return false Always, since mark is not supported
      */
     public boolean markSupported() {
         return false;
     }

     /**
      * Reset the stream. The <code>reset</code> method of
      * <code>EspReader</code> always throws an throwable.
      *
      * @exception IOException Always, since reset is not supported
      */
     public void reset() throws IOException {
         throw new IOException("reset() not supported");
     }

     /**
      * Internal routine doing all the footwork of reading one character at a
      * time from the <code>PushbackReader</code> and acting according to the
      * current state.
      * <p>
      * This filter is implemented using a finite state machine using the states
      * defined above with the <code>PARSE_STATE</code> constants. Each state
      * may do a look ahead in certain situations to decide on further steps.
      * Characters looked ahead may or may not be inserted back into the input
      * stream depending on the concrete state.
      *
      * @return the next character from the input stream according to the current
      *         state or -1 to indicate end of file.
      * @throws IOException if the input <code>PushbackReader</code> throws it
      */
     private int doRead() throws IOException {

         // we return out of the loop, if we find a character passing the filter
         for (;;) {

             // Get a character from the input, which may well have been
             // injected using the unread() method
             int c = input.read();

             // catch EOF
             if (c < 0) {

                 // if a template text line is still incomplete, inject
                 // proper line ending and continue until this has been returned
                 if (!lineStart && state == PARSE_STATE_ESP) {
                     doVerbatim("\");"); // line ending injection
                     lineStart = true; // mark the line having ended
                     continue; // let's start read the injection
                 }

                 return c; // return the marker, we're done
             }

             // Do the finite state machine
             switch (state) {

                 // NOTE :
                 // - continue means ignore current character, read next
                 // - break means return current character

                 // Template text state - text is wrapped in out.write()
                 case PARSE_STATE_ESP:
                     if (c == '$') { // might start EL-like ECMA expr
                     	int c2 = input.read();
                     	if (c2 == '{') {
                             // ECMA expression ${ ... }
                             pushState(PARSE_STATE_ECMA_EXPR_COMPACT);
                             startWrite(null);
                             if (!lineStart) {
                                 doVerbatim("\");");
                             }
                             continue;
                     	}

                     	input.unread(c2);

                     } else  if (c == '<') { // might start ECMA code/expr, ESP comment or JSP comment
                         int c2 = input.read();
                         int c3 = input.read();

                         if (c2 == '%') {
                             // ECMA or JSP comment

                             if (c3 == '=') {

                                 // ECMA expression <%= ... %>
                                 pushState(PARSE_STATE_ECMA_EXPR);
                                 startWrite(null);
                                 if (!lineStart) {
                                     doVerbatim("\");");
                                 }
                                 continue;

                             } else if (c3 == '-') {

                                 // (Possible) JSP Comment <%-- ... --%>
                                 int c4 = input.read();
                                 if (c4 == '-') {
                                     pushState(PARSE_STATE_JSP_COMMENT);
                                     continue;
                                 }
                                 input.unread(c4);

                             }

                             // We only get here if we are sure about ECMA

                             // ECMA code <% ... %>
                             input.unread(c3);
                             pushState(PARSE_STATE_ECMA);
                             if (!lineStart) {
                                 doVerbatim("\");");
                             }
                             continue;

                         }

                         // Nothing special, push back read ahead
                         input.unread(c3);
                         input.unread(c2);

                         // End of template text line
                     } else if (c == '\r' || c == '\n') {
                         String lineEnd; // will be injected

                         // Check for real CRLF
                         if (c == '\r') {
                             int c2 = input.read();
                             if (c2 != '\n') {
                                 input.unread(c2);
                                 lineEnd = "\\r";
                             } else {
                                 lineEnd = "\\r\\n";
                             }
                         } else {
                             lineEnd = "\\n";
                         }

                         // Only write line ending if not empty
                         if (!lineStart) {
                             doVerbatim("\");\n");
                             doVerbatim(lineEnd);
                             lineStart = true;

                         } else { // if (lineEnd.length() > 1) {
                             // no matter what line ending we have, make it LF
                             doVerbatim("\");\n");
                             doVerbatim(lineEnd);
                             startWrite("\"");
                         }

                         continue;

                         // template text is wrapped with double quotes, which
                         // when occurring in the text must be escaped.
                         // We also escape the escape character..
                     } else if (c == '"' || c == '\\') {

                         doVerbatim(String.valueOf((char) c));
                         c = '\\';

                     }

                     // If in template text at the beginning of a line
                     if (lineStart) {
                         lineStart = false;
                         startWrite("\"" + (char) c);
                         continue;
                     }

                     break;

                 // Reading ECMA code or and ECMA expression
                 case PARSE_STATE_ECMA_EXPR:
                 case PARSE_STATE_ECMA:

                     if (c == '%') {

                         // might return to PARSE_STATE_ESP
                         int c2 = input.read();
                         if (c2 == '>') {

                             // An expression is wrapped in out.write()
                             if (popState() == PARSE_STATE_ECMA_EXPR) {
                                 doVerbatim(");");
                             }

                             // next ESP needs out.write(
                             lineStart = true;

                             continue;

                         }

                         // false alert, push back
                         input.unread(c2);

                     } else if (c == '/') {

                         // might be ECMA Comment
                         int c2 = input.read();
                         if (c2 == '/') {
                             // single line comment
                             pushState(PARSE_STATE_ECMA_COMMENTL);
                         } else if (c2 == '*') {
                             // multiline comment
                             pushState(PARSE_STATE_ECMA_COMMENT);
                         }

                         // false alert, push back
                         input.unread(c2);

                     } else if (c == '\'' || c == '"') {

                         // an ECMA string
                         escape = false; // start unescaped
                         quoteChar = (char) c; // to recognize the end
                         pushState(PARSE_STATE_QUOTE);

                     }
                     break;

                 // reading compact (EL-like) ECMA Expression
                 case PARSE_STATE_ECMA_EXPR_COMPACT:
                     if (c == '}') { //might be the end of a compact expression
                         // An expression is wrapped in out.write()
                         popState();
                         doVerbatim(");");

                         // next ESP needs out.write(
                         lineStart = true;

                         continue;

                     }
                     break;

                 // Reading a JSP comment, only returning line endings
                 case PARSE_STATE_JSP_COMMENT:

                     // JSP comments end complexly with --%>
                     if (c == '-') {
                         int c2 = input.read();
                         if (c2 == '-') {
                             int c3 = input.read();
                             if (c3 == '%') {
                                 int c4 = input.read();
                                 if (c4 == '>') {

                                     // we really reached the end ...
                                     popState();
                                     continue;

                                 }
                                 input.unread(c4);
                             }
                             input.unread(c3);
                         }
                         input.unread(c2);

                         // well, not definitely correct but reasonably accurate
                         // ;-)
                     } else if (c == '\r' || c == '\n') {

                         // terminate an open template line
                         if (!lineStart) {
                             input.unread(c); // push back the character
                             doVerbatim("\");"); // insert ");
                             lineStart = true; // mark the line start
                             continue; // Force read of the "
                         }

                         break;
                     }

                     // continue reading another character in the comment
                     continue;

                     // Read an ECMA string upto the ending quote character
                 case PARSE_STATE_QUOTE:

                     // if unescaped quote character
                     if (c == quoteChar && !escape) {
                         popState();
                     } else {
                         // mark escape - only if not already escaped (bug 7079)
                         escape = c == '\\' && !escape;
                     }

                     break;

                 // Return characters unfiltered
                 case PARSE_STATE_VERBATIM:

                     // Go back to previous state if all characters read
                     if (--verbatimChars < 0) {
                         popState();
                     }

                     break;

                 // Return an ECMA multiline comment, ending with */
                 case PARSE_STATE_ECMA_COMMENT:

                     // Might be the end of the comment
                     if (c == '*') {
                         int c2 = input.read();
                         if (c2 == '/') {
                             popState(); // back to previous
                             doVerbatim("/"); // return slash verbatim
                         } else {
                             input.unread(c2);
                         }
                     }

                     break;

                 // Return an ECMA single line comment, ending with end of line
                 case PARSE_STATE_ECMA_COMMENTL:

                     // CRLF recognition
                     if (c == '\r') {
                         int c2 = input.read();
                         if (c2 == '\n') {
                             popState();
                         }
                         input.unread(c2);

                         // LF only line end
                     } else if (c == '\n') {
                         popState();
                     }

                     break;

                 // What ???!!!
                 default:

                     // we warn and go back to default state
                     log.warn("doRead(): unknown state " + state);
                     state = PARSE_STATE_ESP;

                     break;

             } // switch

             // Exiting the switch normally we return the current character
             return c;

         } // for(;;)

     }

     /**
      * Throw an IOException if the reader is not open
      *
      * @throws IOException if the reader is (already) closed
      */
     private void ensureOpen() throws IOException {
         if (input == null) {
             throw new IOException("Reader is closed");
         }
     }

     /**
      * Injects the call to write template text and checks whether the global
      * <em>out</em> variable has also to be defined such that the writer is
      * acquired on demand.
      *
      * @param startString Additional data to be injected as initial argument
      *      to the <em>out.write</em> call written. If <code>null</code> just
      *      the method call is injected.
      *
      * @throws IOException if the 'unreading' throws
      */
     private void startWrite(String startString) throws IOException {

         // inject the out.write( part and the initial string
         if (startString != null && startString.length() > 0) {
             doVerbatim(startString);
         }
         doVerbatim("out.write(");

         // if out is not set yet, we also acquire it now setting it
         // globally
         if (outUndefined) {
             doVerbatim(outInitStatement);
             outUndefined = false;
         }
     }

     /**
      * Injects a string into the input stream, sets the number of characters to
      * return verbatim and change state. The state change only happens if we are
      * not in verbatim state already. Else the current string is simply
      * prepended to the previous injection. This is simply a convenience method
      * ;-)
      *
      * @param verbatimString The string to inject into the input stream
      * @throws IOException if the 'unreading' throws
      */
     private void doVerbatim(String verbatimString) throws IOException {

         // Push 'back' into PushbackReader
         input.unread(verbatimString.toCharArray());

         // Set the number of characters to return verbatim
         verbatimChars += verbatimString.length();

         // Change state if not already in verbatim state
         if (state != PARSE_STATE_VERBATIM) {
             pushState(PARSE_STATE_VERBATIM);
         }
     }

     /**
      * Push the current state on stack and set to <code>newState</code>. This
      * new state is also returned.
      *
      * @param newState the new state to set
      * @return the new state set according to <code>newState</code>
      */
     private byte pushState(byte newState) {
         stateStack.push(state);
         return state = newState;
     }

     /**
      * Sets the current state to the state stored at the top of the stack. If
      * the stack is empty prior to this call, the default template text state is
      * set. The method returns the state prior to setting to the new state.
      *
      * @return the state prior to calling this method
      */
     private byte popState() {
         byte oldState = state;
         state = stateStack.isEmpty() ? PARSE_STATE_ESP : stateStack.pop();
         return oldState;
     }

 }