java/org/apache/jasper/compiler/AttributeParser.java - tomcat80 - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.jasper.compiler;

 /**
  * Converts a JSP attribute value into the unquoted equivalent. The attribute
  * may contain EL expressions, in which case care needs to be taken to avoid any
  * ambiguities. For example, consider the attribute values "${1+1}" and
  * "\${1+1}". After unquoting, both appear as "${1+1}" but the first should
  * evaluate to "2" and the second to "${1+1}". Literal \, $ and # need special
  * treatment to ensure there is no ambiguity. The JSP attribute unquoting
  * covers \\, \", \', \$, \#, %\&gt;, &lt;\%, &amp;apos; and &amp;quot;
  */
 public class AttributeParser {

     /**
      * Parses the provided input String as a JSP attribute and returns an
      * unquoted value.
      *
      * @param input         The input.
      * @param quote         The quote character for the attribute or 0 for
      *                      scripting expressions.
      * @param isELIgnored   Is expression language being ignored on the page
      *                      where the JSP attribute is defined.
      * @param isDeferredSyntaxAllowedAsLiteral
      *                      Are deferred expressions treated as literals?
      * @param strict        Should the rules of JSP.1.6 for escaping of quotes
      *                      be strictly applied?
      * @param quoteAttributeEL Should the rules of JSP.1.6 for escaping in
      *                      attributes be applied to EL in attribute values?
      * @return              An unquoted JSP attribute that, if it contains
      *                      expression language can be safely passed to the EL
      *                      processor without fear of ambiguity.
      */
     public static String getUnquoted(String input, char quote,
             boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
             boolean strict, boolean quoteAttributeEL) {
         return (new AttributeParser(input, quote, isELIgnored,
                 isDeferredSyntaxAllowedAsLiteral, strict, quoteAttributeEL)).getUnquoted();
     }

     /* The quoted input string. */
     private final String input;

     /* The quote used for the attribute - null for scripting expressions. */
     private final char quote;

     /* Is expression language being ignored - affects unquoting. \$ and \# are
      * treated as literals rather than quoted values. */
     private final boolean isELIgnored;

     /* Are deferred expression treated as literals */
     private final boolean isDeferredSyntaxAllowedAsLiteral;

     /* If a quote appears that matches quote, must it always be escaped? See
      * JSP.1.6.
      */
     private final boolean strict;

     private final boolean quoteAttributeEL;

     /* The type ($ or #) of expression. Literals have a type of null. */
     private final char type;

     /* The length of the quoted input string. */
     private final int size;

     /* Tracks the current position of the parser in the input String. */
     private int i = 0;

     /* Indicates if the last character returned by nextChar() was escaped. */
     private boolean lastChEscaped = false;

     /* The unquoted result. */
     private final StringBuilder result;


     /**
      * For test purposes.
      * @param input
      * @param quote
      * @param strict
      */
     private AttributeParser(String input, char quote,
             boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
             boolean strict, boolean quoteAttributeEL) {
         this.input = input;
         this.quote = quote;
         this.isELIgnored = isELIgnored;
         this.isDeferredSyntaxAllowedAsLiteral =
             isDeferredSyntaxAllowedAsLiteral;
         this.strict = strict;
         this.quoteAttributeEL = quoteAttributeEL;
         this.type = getType(input);
         this.size = input.length();
         result = new StringBuilder(size);
     }

     /*
      * Work through input looking for literals and expressions until the input
      * has all been read.
      */
     private String getUnquoted() {
         while (i < size) {
             parseLiteral();
             parseEL();
         }
         return result.toString();
     }

     /*
      * This method gets the next unquoted character and looks for
      * - literals that need to be converted for EL processing
      *   \ -> type{'\\'}
      *   $ -> type{'$'}
      *   # -> type{'#'}
      * - start of EL
      *   ${
      *   #{
      * Note all the examples above *do not* include the escaping required to use
      * the values in Java code.
      */
     private void parseLiteral() {
         boolean foundEL = false;
         while (i < size && !foundEL) {
             char ch = nextChar();
             if (!isELIgnored && ch == '\\') {
                 if (type == 0) {
                     result.append("\\");
                 } else {
                     result.append(type);
                     result.append("{'\\\\'}");
                 }
             } else if (!isELIgnored && ch == '$' && lastChEscaped){
                 if (type == 0) {
                     result.append("\\$");
                 } else {
                     result.append(type);
                     result.append("{'$'}");
                 }
             } else if (!isELIgnored && ch == '#' && lastChEscaped){
                 // Note if isDeferredSyntaxAllowedAsLiteral==true, \# will
                 // not be treated as an escape
                 if (type == 0) {
                     result.append("\\#");
                 } else {
                     result.append(type);
                     result.append("{'#'}");
                 }
             } else if (ch == type){
                 if (i < size) {
                     char next = input.charAt(i);
                     if (next == '{') {
                         foundEL = true;
                         // Move back to start of EL
                         i--;
                     } else {
                         result.append(ch);
                     }
                 } else {
                     result.append(ch);
                 }
             } else {
                 result.append(ch);
             }
         }
     }

     /*
      * Once inside EL, no need to unquote or convert anything. The EL is
      * terminated by '}'. The only other valid location for '}' is inside a
      * StringLiteral. The literals are delimited by '\'' or '\"'. The only other
      * valid location for '\'' or '\"' is also inside a StringLiteral. A quote
      * character inside a StringLiteral must be escaped if the same quote
      * character is used to delimit the StringLiteral.
      */
     private void parseEL() {
         boolean endEL = false;
         boolean insideLiteral = false;
         char literalQuote = 0;
         while (i < size && !endEL) {
             char ch;
             if (quoteAttributeEL) {
                 ch = nextChar();
             } else {
                 ch = input.charAt(i++);
             }
             if (ch == '\'' || ch == '\"') {
                 if (insideLiteral) {
                     if (literalQuote == ch) {
                         insideLiteral = false;
                     }
                 } else {
                     insideLiteral = true;
                     literalQuote = ch;
                 }
                 result.append(ch);
             } else if (ch == '\\') {
                 result.append(ch);
                 if (insideLiteral && size < i) {
                     if (quoteAttributeEL) {
                         ch = nextChar();
                     } else {
                         ch = input.charAt(i++);
                     }
                     result.append(ch);
                 }
             } else if (ch == '}') {
                 if (!insideLiteral) {
                     endEL = true;
                 }
                 result.append(ch);
             } else {
                 result.append(ch);
             }
         }
     }

     /*
      * Returns the next unquoted character and sets the lastChEscaped flag to
      * indicate if it was quoted/escaped or not.
      * &apos; is always unquoted to '
      * &quot; is always unquoted to "
      * \" is always unquoted to "
      * \' is always unquoted to '
      * \\ is always unquoted to \
      * \$ is unquoted to $ if EL is not being ignored
      * \# is unquoted to # if EL is not being ignored
      * <\% is always unquoted to <%
      * %\> is always unquoted to %>
      */
     private char nextChar() {
         lastChEscaped = false;
         char ch = input.charAt(i);

         if (ch == '&') {
             if (i + 5 < size && input.charAt(i + 1) == 'a' &&
                     input.charAt(i + 2) == 'p' && input.charAt(i + 3) == 'o' &&
                     input.charAt(i + 4) == 's' && input.charAt(i + 5) == ';') {
                 ch = '\'';
                 i += 6;
             } else if (i + 5 < size && input.charAt(i + 1) == 'q' &&
                     input.charAt(i + 2) == 'u' && input.charAt(i + 3) == 'o' &&
                     input.charAt(i + 4) == 't' && input.charAt(i + 5) == ';') {
                 ch = '\"';
                 i += 6;
             } else {
                 ++i;
             }
         } else if (ch == '\\' && i + 1 < size) {
             ch = input.charAt(i + 1);
             if (ch == '\\' || ch == '\"' || ch == '\'' ||
                     (!isELIgnored &&
                             (ch == '$' ||
                                     (!isDeferredSyntaxAllowedAsLiteral &&
                                             ch == '#')))) {
                 i += 2;
                 lastChEscaped = true;
             } else {
                 ch = '\\';
                 ++i;
             }
         } else if (ch == '<' && (i + 2 < size) && input.charAt(i + 1) == '\\' &&
                 input.charAt(i + 2) == '%') {
             // Note this is a hack since nextChar only returns a single char
             // It is safe since <% does not require special treatment for EL
             // or for literals
             result.append('<');
             i+=3;
             return '%';
         } else if (ch == '%' && i + 2 < size && input.charAt(i + 1) == '\\' &&
                 input.charAt(i + 2) == '>') {
             // Note this is a hack since nextChar only returns a single char
             // It is safe since %> does not require special treatment for EL
             // or for literals
             result.append('%');
             i+=3;
             return '>';
         } else if (ch == quote && strict) {
             String msg = Localizer.getMessage("jsp.error.attribute.noescape",
                     input, ""+ quote);
             throw new IllegalArgumentException(msg);
         } else {
             ++i;
         }

         return ch;
     }

     /*
      * Determines the type of expression by looking for the first unquoted ${
      * or #{.
      */
     private char getType(String value) {
         if (value == null) {
             return 0;
         }

         if (isELIgnored) {
             return 0;
         }

         int j = 0;
         int len = value.length();
         char current;

         while (j < len) {
             current = value.charAt(j);
             if (current == '\\') {
                 // Escape character - skip a character
                 j++;
             } else if (current == '#' && !isDeferredSyntaxAllowedAsLiteral) {
                 if (j < (len -1) && value.charAt(j + 1) == '{') {
                     return '#';
                 }
             } else if (current == '$') {
                 if (j < (len - 1) && value.charAt(j + 1) == '{') {
                     return '$';
                 }
             }
             j++;
         }
         return 0;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.jasper.compiler;

	/**
	* Converts a JSP attribute value into the unquoted equivalent. The attribute
	* may contain EL expressions, in which case care needs to be taken to avoid any
	* ambiguities. For example, consider the attribute values "${1+1}" and
	* "\${1+1}". After unquoting, both appear as "${1+1}" but the first should
	* evaluate to "2" and the second to "${1+1}". Literal \, $ and # need special
	* treatment to ensure there is no ambiguity. The JSP attribute unquoting
	* covers \\, \", \', \$, \#, %\>, <\%, &apos; and &quot;
	*/
	public class AttributeParser {

	/**
	* Parses the provided input String as a JSP attribute and returns an
	* unquoted value.
	*
	* @param input The input.
	* @param quote The quote character for the attribute or 0 for
	* scripting expressions.
	* @param isELIgnored Is expression language being ignored on the page
	* where the JSP attribute is defined.
	* @param isDeferredSyntaxAllowedAsLiteral
	* Are deferred expressions treated as literals?
	* @param strict Should the rules of JSP.1.6 for escaping of quotes
	* be strictly applied?
	* @param quoteAttributeEL Should the rules of JSP.1.6 for escaping in
	* attributes be applied to EL in attribute values?
	* @return An unquoted JSP attribute that, if it contains
	* expression language can be safely passed to the EL
	* processor without fear of ambiguity.
	*/
	public static String getUnquoted(String input, char quote,
	boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
	boolean strict, boolean quoteAttributeEL) {
	return (new AttributeParser(input, quote, isELIgnored,
	isDeferredSyntaxAllowedAsLiteral, strict, quoteAttributeEL)).getUnquoted();
	}

	/* The quoted input string. */
	private final String input;

	/* The quote used for the attribute - null for scripting expressions. */
	private final char quote;

	/* Is expression language being ignored - affects unquoting. \$ and \# are
	* treated as literals rather than quoted values. */
	private final boolean isELIgnored;

	/* Are deferred expression treated as literals */
	private final boolean isDeferredSyntaxAllowedAsLiteral;

	/* If a quote appears that matches quote, must it always be escaped? See
	* JSP.1.6.
	*/
	private final boolean strict;

	private final boolean quoteAttributeEL;

	/* The type ($ or #) of expression. Literals have a type of null. */
	private final char type;

	/* The length of the quoted input string. */
	private final int size;

	/* Tracks the current position of the parser in the input String. */
	private int i = 0;

	/* Indicates if the last character returned by nextChar() was escaped. */
	private boolean lastChEscaped = false;

	/* The unquoted result. */
	private final StringBuilder result;


	/**
	* For test purposes.
	* @param input
	* @param quote
	* @param strict
	*/
	private AttributeParser(String input, char quote,
	boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
	boolean strict, boolean quoteAttributeEL) {
	this.input = input;
	this.quote = quote;
	this.isELIgnored = isELIgnored;
	this.isDeferredSyntaxAllowedAsLiteral =
	isDeferredSyntaxAllowedAsLiteral;
	this.strict = strict;
	this.quoteAttributeEL = quoteAttributeEL;
	this.type = getType(input);
	this.size = input.length();
	result = new StringBuilder(size);
	}

	/*
	* Work through input looking for literals and expressions until the input
	* has all been read.
	*/
	private String getUnquoted() {
	while (i < size) {
	parseLiteral();
	parseEL();
	}
	return result.toString();
	}

	/*
	* This method gets the next unquoted character and looks for
	* - literals that need to be converted for EL processing
	* \ -> type{'\\'}
	* $ -> type{'$'}
	* # -> type{'#'}
	* - start of EL
	* ${
	* #{
	* Note all the examples above do not include the escaping required to use
	* the values in Java code.
	*/
	private void parseLiteral() {
	boolean foundEL = false;
	while (i < size && !foundEL) {
	char ch = nextChar();
	if (!isELIgnored && ch == '\\') {
	if (type == 0) {
	result.append("\\");
	} else {
	result.append(type);
	result.append("{'\\\\'}");
	}
	} else if (!isELIgnored && ch == '$' && lastChEscaped){
	if (type == 0) {
	result.append("\\$");
	} else {
	result.append(type);
	result.append("{'$'}");
	}
	} else if (!isELIgnored && ch == '#' && lastChEscaped){
	// Note if isDeferredSyntaxAllowedAsLiteral==true, \# will
	// not be treated as an escape
	if (type == 0) {
	result.append("\\#");
	} else {
	result.append(type);
	result.append("{'#'}");
	}
	} else if (ch == type){
	if (i < size) {
	char next = input.charAt(i);
	if (next == '{') {
	foundEL = true;
	// Move back to start of EL
	i--;
	} else {
	result.append(ch);
	}
	} else {
	result.append(ch);
	}
	} else {
	result.append(ch);
	}
	}
	}

	/*
	* Once inside EL, no need to unquote or convert anything. The EL is
	* terminated by '}'. The only other valid location for '}' is inside a
	* StringLiteral. The literals are delimited by '\'' or '\"'. The only other
	* valid location for '\'' or '\"' is also inside a StringLiteral. A quote
	* character inside a StringLiteral must be escaped if the same quote
	* character is used to delimit the StringLiteral.
	*/
	private void parseEL() {
	boolean endEL = false;
	boolean insideLiteral = false;
	char literalQuote = 0;
	while (i < size && !endEL) {
	char ch;
	if (quoteAttributeEL) {
	ch = nextChar();
	} else {
	ch = input.charAt(i++);
	}
	if (ch == '\'' \|\| ch == '\"') {
	if (insideLiteral) {
	if (literalQuote == ch) {
	insideLiteral = false;
	}
	} else {
	insideLiteral = true;
	literalQuote = ch;
	}
	result.append(ch);
	} else if (ch == '\\') {
	result.append(ch);
	if (insideLiteral && size < i) {
	if (quoteAttributeEL) {
	ch = nextChar();
	} else {
	ch = input.charAt(i++);
	}
	result.append(ch);
	}
	} else if (ch == '}') {
	if (!insideLiteral) {
	endEL = true;
	}
	result.append(ch);
	} else {
	result.append(ch);
	}
	}
	}

	/*
	* Returns the next unquoted character and sets the lastChEscaped flag to
	* indicate if it was quoted/escaped or not.
	* ' is always unquoted to '
	* " is always unquoted to "
	* \" is always unquoted to "
	* \' is always unquoted to '
	* \\ is always unquoted to \
	* \$ is unquoted to $ if EL is not being ignored
	* \# is unquoted to # if EL is not being ignored
	* <\% is always unquoted to <%
	* %\> is always unquoted to %>
	*/
	private char nextChar() {
	lastChEscaped = false;
	char ch = input.charAt(i);

	if (ch == '&') {
	if (i + 5 < size && input.charAt(i + 1) == 'a' &&
	input.charAt(i + 2) == 'p' && input.charAt(i + 3) == 'o' &&
	input.charAt(i + 4) == 's' && input.charAt(i + 5) == ';') {
	ch = '\'';
	i += 6;
	} else if (i + 5 < size && input.charAt(i + 1) == 'q' &&
	input.charAt(i + 2) == 'u' && input.charAt(i + 3) == 'o' &&
	input.charAt(i + 4) == 't' && input.charAt(i + 5) == ';') {
	ch = '\"';
	i += 6;
	} else {
	++i;
	}
	} else if (ch == '\\' && i + 1 < size) {
	ch = input.charAt(i + 1);
	if (ch == '\\' \|\| ch == '\"' \|\| ch == '\'' \|\|
	(!isELIgnored &&
	(ch == '$' \|\|
	(!isDeferredSyntaxAllowedAsLiteral &&
	ch == '#')))) {
	i += 2;
	lastChEscaped = true;
	} else {
	ch = '\\';
	++i;
	}
	} else if (ch == '<' && (i + 2 < size) && input.charAt(i + 1) == '\\' &&
	input.charAt(i + 2) == '%') {
	// Note this is a hack since nextChar only returns a single char
	// It is safe since <% does not require special treatment for EL
	// or for literals
	result.append('<');
	i+=3;
	return '%';
	} else if (ch == '%' && i + 2 < size && input.charAt(i + 1) == '\\' &&
	input.charAt(i + 2) == '>') {
	// Note this is a hack since nextChar only returns a single char
	// It is safe since %> does not require special treatment for EL
	// or for literals
	result.append('%');
	i+=3;
	return '>';
	} else if (ch == quote && strict) {
	String msg = Localizer.getMessage("jsp.error.attribute.noescape",
	input, ""+ quote);
	throw new IllegalArgumentException(msg);
	} else {
	++i;
	}

	return ch;
	}

	/*
	* Determines the type of expression by looking for the first unquoted ${
	* or #{.
	*/
	private char getType(String value) {
	if (value == null) {
	return 0;
	}

	if (isELIgnored) {
	return 0;
	}

	int j = 0;
	int len = value.length();
	char current;

	while (j < len) {
	current = value.charAt(j);
	if (current == '\\') {
	// Escape character - skip a character
	j++;
	} else if (current == '#' && !isDeferredSyntaxAllowedAsLiteral) {
	if (j < (len -1) && value.charAt(j + 1) == '{') {
	return '#';
	}
	} else if (current == '$') {
	if (j < (len - 1) && value.charAt(j + 1) == '{') {
	return '$';
	}
	}
	j++;
	}
	return 0;
	}
	}