Backport SetCharacterEncodingFilter


git-svn-id: https://svn.apache.org/repos/asf/tomcat/tc5.5.x/trunk@1228181 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/STATUS.txt b/STATUS.txt
index 58ebe08..963cdee 100644
--- a/STATUS.txt
+++ b/STATUS.txt
@@ -24,19 +24,6 @@
 PATCHES ACCEPTED TO BACKPORT FROM TRUNK/OTHER:
   [ start all new proposals below, under PATCHES PROPOSED. ]
 
-* Backport SetCharacterEncodingFilter
-  Justification:
-    FailedRequestFilter is easier to configure when this one is
-    available as well.
-
-  1) patch
-   http://people.apache.org/~kkolinko/patches/2011-12-22_tc55_SetCharacterEncodingFilter.patch
-  2)
-   svn propset svn:eol-style native container/catalina/src/share/org/apache/catalina/filters/Constants.java
-   svn propset svn:eol-style native container/catalina/src/share/org/apache/catalina/filters/LocalStrings.properties
-   svn propset svn:eol-style native container/catalina/src/share/org/apache/catalina/filters/SetCharacterEncodingFilter.java
-  +1: kkolinko, rjung, jim
-  -1:
 
 PATCHES PROPOSED TO BACKPORT:
   [ New proposals should be added at the end of the list ]
diff --git a/container/catalina/src/share/org/apache/catalina/filters/Constants.java b/container/catalina/src/share/org/apache/catalina/filters/Constants.java
new file mode 100644
index 0000000..443e00d
--- /dev/null
+++ b/container/catalina/src/share/org/apache/catalina/filters/Constants.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.catalina.filters;
+
+
+/**
+ * Constants for this Java package.
+ */
+
+public final class Constants {
+
+    public static final String Package = "org.apache.catalina.filters";
+}
diff --git a/container/catalina/src/share/org/apache/catalina/filters/LocalStrings.properties b/container/catalina/src/share/org/apache/catalina/filters/LocalStrings.properties
new file mode 100644
index 0000000..7e8a595
--- /dev/null
+++ b/container/catalina/src/share/org/apache/catalina/filters/LocalStrings.properties
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filterbase.noSuchProperty=The property "{0}" is not defined for filters of type "{1}"
diff --git a/container/catalina/src/share/org/apache/catalina/filters/SetCharacterEncodingFilter.java b/container/catalina/src/share/org/apache/catalina/filters/SetCharacterEncodingFilter.java
new file mode 100644
index 0000000..ea8643c
--- /dev/null
+++ b/container/catalina/src/share/org/apache/catalina/filters/SetCharacterEncodingFilter.java
@@ -0,0 +1,154 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.catalina.filters;
+
+import java.io.IOException;
+import java.util.Enumeration;
+
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tomcat.util.res.StringManager;
+
+
+/**
+ * <p>Example filter that sets the character encoding to be used in parsing the
+ * incoming request, either unconditionally or only if the client did not
+ * specify a character encoding.  Configuration of this filter is based on
+ * the following initialization parameters:</p>
+ * <ul>
+ * <li><strong>encoding</strong> - The character encoding to be configured
+ *     for this request, either conditionally or unconditionally based on
+ *     the <code>ignore</code> initialization parameter.  This parameter
+ *     is required, so there is no default.</li>
+ * <li><strong>ignore</strong> - If set to "true", any character encoding
+ *     specified by the client is ignored, and the value returned by the
+ *     <code>selectEncoding()</code> method is set.  If set to "false,
+ *     <code>selectEncoding()</code> is called <strong>only</strong> if the
+ *     client has not already specified an encoding.  By default, this
+ *     parameter is set to "false".</li>
+ * </ul>
+ *
+ * <p>Although this filter can be used unchanged, it is also easy to
+ * subclass it and make the <code>selectEncoding()</code> method more
+ * intelligent about what encoding to choose, based on characteristics of
+ * the incoming request (such as the values of the <code>Accept-Language</code>
+ * and <code>User-Agent</code> headers, or a value stashed in the current
+ * user's session.</p>
+ */
+public class SetCharacterEncodingFilter {
+
+    private static final Log log =
+        LogFactory.getLog(SetCharacterEncodingFilter.class);
+
+    protected static final StringManager sm =
+            StringManager.getManager(Constants.Package);
+
+    // ----------------------------------------------------- Instance Variables
+
+    /**
+     * The default character encoding to set for requests that pass through
+     * this filter.
+     */
+    private String encoding = null;
+    public void setEncoding(String encoding) { this.encoding = encoding; }
+    public String getEncoding() { return encoding; }
+
+
+    /**
+     * Should a character encoding specified by the client be ignored?
+     */
+    private boolean ignore = false;
+    public void setIgnore(boolean ignore) { this.ignore = ignore; }
+    public boolean isIgnore() { return ignore; }
+
+
+    // --------------------------------------------------------- Public Methods
+
+
+    public void init(FilterConfig filterConfig) throws ServletException {
+
+        Enumeration paramNames = filterConfig.getInitParameterNames();
+
+        while (paramNames.hasMoreElements()) {
+            String paramName = (String) paramNames.nextElement();
+            if ("encoding".equals(paramName)) {
+                setEncoding(filterConfig.getInitParameter(paramName));
+            } else if ("ignore".equals(paramName)) {
+                setIgnore(Boolean.valueOf(
+                        filterConfig.getInitParameter(paramName))
+                        .booleanValue());
+            } else {
+                String msg = sm.getString("filterbase.noSuchProperty",
+                        paramName, this.getClass().getName());
+                log.warn(msg);
+            }
+        }
+    }
+
+    /**
+     * Select and set (if specified) the character encoding to be used to
+     * interpret request parameters for this request.
+     *
+     * @param request The servlet request we are processing
+     * @param response The servlet response we are creating
+     * @param chain The filter chain we are processing
+     *
+     * @exception IOException if an input/output error occurs
+     * @exception ServletException if a servlet error occurs
+     */
+    public void doFilter(ServletRequest request, ServletResponse response,
+                         FilterChain chain)
+        throws IOException, ServletException {
+
+        // Conditionally select and set the character encoding to be used
+        if (ignore || (request.getCharacterEncoding() == null)) {
+            String characterEncoding = selectEncoding(request);
+            if (characterEncoding != null) {
+                request.setCharacterEncoding(characterEncoding);
+            }
+        }
+
+        // Pass control on to the next filter
+        chain.doFilter(request, response);
+    }
+
+
+    // ------------------------------------------------------ Protected Methods
+
+
+    /**
+     * Select an appropriate character encoding to be used, based on the
+     * characteristics of the current request and/or filter initialization
+     * parameters.  If no character encoding should be set, return
+     * <code>null</code>.
+     * <p>
+     * The default implementation unconditionally returns the value configured
+     * by the <strong>encoding</strong> initialization parameter for this
+     * filter.
+     *
+     * @param request The servlet request we are processing
+     */
+    protected String selectEncoding(ServletRequest request) {
+        return this.encoding;
+    }
+}
diff --git a/container/webapps/docs/config/filter.xml b/container/webapps/docs/config/filter.xml
index ba040ae..56ed3bb 100644
--- a/container/webapps/docs/config/filter.xml
+++ b/container/webapps/docs/config/filter.xml
@@ -51,6 +51,58 @@
 </section>
 
 
+<section name="Set Character Encoding Filter">
+
+  <subsection name="Introduction">
+
+    <p>User agents don&apos;t always include character encoding information in
+    requests. Depending on the how the request is processed, usually the
+    default encoding of ISO-8859-1 is used. This is not always
+    desirable. This filter provides options for setting that encoding or
+    forcing it to a particular value. Essentially this filter calls
+    <code>ServletRequest.setCharacterEncoding()</code> method.</p>
+
+    <p>Effectively the value set by this filter is used when parsing parameters
+    in a POST request, if parameter parsing occurs later than this filter. Thus
+    the order of filter mappings is important. Note that the encoding for GET
+    requests is not set here, but on a <strong>Connector</strong>. See
+    CharacterEncoding page in the FAQ for details.</p>
+
+  </subsection>
+
+  <subsection name="Filter Class Name">
+
+    <p>The filter class name for the Set Character Encoding Filter is
+    <strong><code>org.apache.catalina.filters.SetCharacterEncodingFilter</code></strong>.</p>
+
+  </subsection>
+
+  <subsection name="Initialisation parameters">
+
+    <p>The Set Character Encoding Filter supports the following initialization
+    parameters:</p>
+
+    <attributes>
+
+      <attribute name="encoding" required="true">
+        <p>Name of the character encoding which should be set.</p>
+      </attribute>
+
+      <attribute name="ignore" required="false">
+        <p>Determines if any character encoding specified by the user agent is
+        ignored. If this attribute is <code>true</code>, any value provided by
+        the user agent is ignored. If <code>false</code>, the encoding is only
+        set if the user agent did not specify an encoding. The default value
+        is <code>false</code>.</p>
+      </attribute>
+
+    </attributes>
+
+  </subsection>
+
+</section>
+
+
 <section name="Failed Request Filter">
 
   <subsection name="Introduction">