| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tomcat.util.buf; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.CharConversionException; |
| import java.io.IOException; |
| import java.io.OutputStreamWriter; |
| import java.nio.charset.Charset; |
| import java.nio.charset.StandardCharsets; |
| |
| import org.apache.tomcat.util.res.StringManager; |
| |
| /** |
| * All URL decoding happens here. This way we can reuse, review, optimize without adding complexity to the buffers. The |
| * conversion will modify the original buffer. |
| * |
| * @author Costin Manolache |
| */ |
| public final class UDecoder { |
| |
| private static final StringManager sm = StringManager.getManager(UDecoder.class); |
| |
| private static class DecodeException extends CharConversionException { |
| private static final long serialVersionUID = 1L; |
| |
| DecodeException(String s) { |
| super(s); |
| } |
| |
| @Override |
| public synchronized Throwable fillInStackTrace() { |
| // This class does not provide a stack trace |
| return this; |
| } |
| } |
| |
| /** Unexpected end of data. */ |
| private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof")); |
| |
| /** %xx with not-hex digit */ |
| private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(sm.getString("uDecoder.isHexDigit")); |
| |
| /** %-encoded slash is forbidden in resource path */ |
| private static final IOException EXCEPTION_SLASH = new DecodeException(sm.getString("uDecoder.noSlash")); |
| |
| |
| /** |
| * URLDecode, will modify the source. Assumes source bytes are encoded using a superset of US-ASCII as per RFC 7230. |
| * "%2f" will be rejected unless the input is a query string. |
| * |
| * @param mb The URL encoded bytes |
| * @param query {@code true} if this is a query string. For a query string '+' will be decoded to ' ' |
| * |
| * @throws IOException Invalid %xx URL encoding |
| */ |
| public void convert(ByteChunk mb, boolean query) throws IOException { |
| if (query) { |
| convert(mb, true, EncodedSolidusHandling.DECODE); |
| } else { |
| convert(mb, false, EncodedSolidusHandling.REJECT); |
| } |
| } |
| |
| |
| /** |
| * URLDecode, will modify the source. Assumes source bytes are encoded using a superset of US-ASCII as per RFC 7230. |
| * |
| * @param mb The URL encoded bytes |
| * @param encodedSolidusHandling How should the %2f sequence handled by the decoder? For query strings this |
| * parameter will be ignored and the %2f sequence will be decoded |
| * |
| * @throws IOException Invalid %xx URL encoding |
| */ |
| public void convert(ByteChunk mb, EncodedSolidusHandling encodedSolidusHandling) throws IOException { |
| convert(mb, false, encodedSolidusHandling); |
| } |
| |
| |
| private void convert(ByteChunk mb, boolean query, EncodedSolidusHandling encodedSolidusHandling) |
| throws IOException { |
| |
| int start = mb.getOffset(); |
| byte buff[] = mb.getBytes(); |
| int end = mb.getEnd(); |
| |
| int idx = ByteChunk.findByte(buff, start, end, (byte) '%'); |
| int idx2 = -1; |
| if (query) { |
| idx2 = ByteChunk.findByte(buff, start, (idx >= 0 ? idx : end), (byte) '+'); |
| } |
| if (idx < 0 && idx2 < 0) { |
| return; |
| } |
| |
| // idx will be the smallest positive index ( first % or + ) |
| if ((idx2 >= 0 && idx2 < idx) || idx < 0) { |
| idx = idx2; |
| } |
| |
| for (int j = idx; j < end; j++, idx++) { |
| if (buff[j] == '+' && query) { |
| buff[idx] = (byte) ' '; |
| } else if (buff[j] != '%') { |
| buff[idx] = buff[j]; |
| } else { |
| // read next 2 digits |
| if (j + 2 >= end) { |
| throw EXCEPTION_EOF; |
| } |
| byte b1 = buff[j + 1]; |
| byte b2 = buff[j + 2]; |
| if (!isHexDigit(b1) || !isHexDigit(b2)) { |
| throw EXCEPTION_NOT_HEX_DIGIT; |
| } |
| |
| j += 2; |
| int res = x2c(b1, b2); |
| if (res == '/') { |
| switch (encodedSolidusHandling) { |
| case DECODE: { |
| buff[idx] = (byte) res; |
| break; |
| } |
| case REJECT: { |
| throw EXCEPTION_SLASH; |
| } |
| case PASS_THROUGH: { |
| buff[idx++] = buff[j - 2]; |
| buff[idx++] = buff[j - 1]; |
| buff[idx] = buff[j]; |
| } |
| } |
| } else { |
| buff[idx] = (byte) res; |
| } |
| } |
| } |
| |
| mb.setEnd(idx); |
| } |
| |
| // -------------------- Additional methods -------------------- |
| |
| /** |
| * Decode and return the specified URL-encoded String. It is assumed the string is not a query string. |
| * |
| * @param str The url-encoded string |
| * @param charset The character encoding to use; if null, UTF-8 is used. |
| * |
| * @return the decoded string |
| * |
| * @exception IllegalArgumentException if a '%' character is not followed by a valid 2-digit hexadecimal number |
| */ |
| public static String URLDecode(String str, Charset charset) { |
| if (str == null) { |
| return null; |
| } |
| |
| if (str.indexOf('%') == -1) { |
| // No %nn sequences, so return string unchanged |
| return str; |
| } |
| |
| if (charset == null) { |
| charset = StandardCharsets.UTF_8; |
| } |
| |
| /* |
| * Decoding is required. |
| * |
| * Potential complications: |
| * |
| * - The source String may be partially decoded so it is not valid to assume that the source String is ASCII. |
| * |
| * - Have to process as characters since there is no guarantee that the byte sequence for '%' is going to be the |
| * same in all character sets. |
| * |
| * - We don't know how many '%nn' sequences are required for a single character. It varies between character |
| * sets and some use a variable length. |
| */ |
| |
| // This isn't perfect but it is a reasonable guess for the size of the |
| // array required |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2); |
| |
| OutputStreamWriter osw = new OutputStreamWriter(baos, charset); |
| |
| char[] sourceChars = str.toCharArray(); |
| int len = sourceChars.length; |
| int ix = 0; |
| |
| try { |
| while (ix < len) { |
| char c = sourceChars[ix++]; |
| if (c == '%') { |
| osw.flush(); |
| if (ix + 2 > len) { |
| throw new IllegalArgumentException(sm.getString("uDecoder.urlDecode.missingDigit", str)); |
| } |
| char c1 = sourceChars[ix++]; |
| char c2 = sourceChars[ix++]; |
| if (isHexDigit(c1) && isHexDigit(c2)) { |
| baos.write(x2c(c1, c2)); |
| } else { |
| throw new IllegalArgumentException(sm.getString("uDecoder.urlDecode.missingDigit", str)); |
| } |
| } else { |
| osw.append(c); |
| } |
| } |
| osw.flush(); |
| |
| return baos.toString(charset.name()); |
| } catch (IOException ioe) { |
| throw new IllegalArgumentException(sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), |
| ioe); |
| } |
| } |
| |
| |
| private static boolean isHexDigit(int c) { |
| return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); |
| } |
| |
| |
| private static int x2c(byte b1, byte b2) { |
| int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0'); |
| digit *= 16; |
| digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0'); |
| return digit; |
| } |
| |
| |
| private static int x2c(char b1, char b2) { |
| int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0'); |
| digit *= 16; |
| digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0'); |
| return digit; |
| } |
| } |