blob: 21a0c58ae09aef5e5226d88f4ac3b9bfd4a37f68 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.vfs2.util;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.vfs2.provider.GenericURLFileName;
/**
* The URI escape and character encoding and decoding utility.
* <p>
* This was forked from some needed methods such as {@code #encodePath(...)} in {@code org.apache.commons.httpclient.util.URIUtil},
* in order to not be dependent on HttpClient v3 API, when generating and handling {@link GenericURLFileName}s,
* but it should work with any different HTTP backend provider implementations.
* </p>
*/
public class URIUtils {
/**
* Internal character encoding utilities.
* <p>
* This was forked from some needed methods such as {@code #getBytes(...)} and {@code #getAsciiString(...)}
* in {@code org.apache.commons.httpclient.util.EncodingUtil},
* in order to not be dependent on HttpClient v3 API, when generating and handling {@link GenericURLFileName}s,
* but it should work with any different HTTP backend provider implementations.
* </p>
*/
private static class EncodingUtils {
/**
* Converts the byte array of ASCII characters to a string. This method is
* to be used when decoding content of HTTP elements (such as response
* headers)
*
* @param data the byte array to be encoded
* @param offset the index of the first byte to encode
* @param length the number of bytes to encode
* @return The string representation of the byte array
*/
static String getAsciiString(final byte[] data, final int offset, final int length) {
return new String(data, offset, length, StandardCharsets.US_ASCII);
}
/**
* Converts the specified string to a byte array. If the charset is not supported the
* default system charset is used.
*
* @param data the string to be encoded
* @param charsetName the desired character encoding
* @return The resulting byte array.
*/
static byte[] getBytes(final String data, final String charsetName) {
if (data == null) {
throw new IllegalArgumentException("data may not be null");
}
if (StringUtils.isEmpty(charsetName)) {
throw new IllegalArgumentException("charset may not be null or empty");
}
try {
return data.getBytes(charsetName);
} catch (final UnsupportedEncodingException e) {
if (LOG.isWarnEnabled()) {
LOG.warn("Unsupported encoding: " + charsetName + ". System encoding used.");
}
return data.getBytes(Charset.defaultCharset());
}
}
private EncodingUtils() {
}
}
/**
* Internal URL codec utilities.
* <p>
* This was forked from some needed methods such as {@code #encodeUrl(...)} and {@code #hexDigit(int)}
* in {@code org.apache.commons.codec.net.URLCodec}, as commons-codec library cannot be pulled in transitively
* via Http Client v3 library any more.
* </p>
*/
private static class URLCodecUtils {
private static final byte ESCAPE_CHAR = '%';
private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256);
// Static initializer for www_form_url
static {
// alpha characters
for (int i = 'a'; i <= 'z'; i++) {
WWW_FORM_URL_SAFE.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
WWW_FORM_URL_SAFE.set(i);
}
// numeric characters
for (int i = '0'; i <= '9'; i++) {
WWW_FORM_URL_SAFE.set(i);
}
// special chars
WWW_FORM_URL_SAFE.set('-');
WWW_FORM_URL_SAFE.set('_');
WWW_FORM_URL_SAFE.set('.');
WWW_FORM_URL_SAFE.set('*');
// blank to be replaced with +
WWW_FORM_URL_SAFE.set(' ');
}
/**
* Radix used in encoding and decoding.
*/
private static final int RADIX = 16;
static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
if (bytes == null) {
return null;
}
if (urlsafe == null) {
urlsafe = WWW_FORM_URL_SAFE;
}
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (final byte c : bytes) {
int b = c;
if (b < 0) {
b = 256 + b;
}
if (urlsafe.get(b)) {
if (b == ' ') {
b = '+';
}
buffer.write(b);
} else {
buffer.write(ESCAPE_CHAR);
final char hex1 = hexDigit(b >> 4);
final char hex2 = hexDigit(b);
buffer.write(hex1);
buffer.write(hex2);
}
}
return buffer.toByteArray();
}
private static char hexDigit(final int b) {
return Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
}
private URLCodecUtils() {
}
}
private static final Log LOG = LogFactory.getLog(URIUtils.class);
/**
* The default charset of the protocol. RFC 2277, 2396
*/
private static final String DEFAULT_PROTOCOL_CHARSET = "UTF-8";
private static String encode(final String unescaped, final BitSet allowed, final String charset) {
final byte[] rawdata = URLCodecUtils.encodeUrl(allowed, EncodingUtils.getBytes(unescaped, charset));
return EncodingUtils.getAsciiString(rawdata, 0, rawdata.length);
}
/**
* Escape and encode a string regarded as the path component of an URI with
* the default protocol charset.
*
* @param unescaped an unescaped string
* @return the escaped string
*/
public static String encodePath(final String unescaped) {
return encodePath(unescaped, DEFAULT_PROTOCOL_CHARSET);
}
/**
* Escape and encode a string regarded as the path component of an URI with
* a given charset.
*
* @param unescaped an unescaped string
* @param charset the charset
* @return the escaped string
*/
public static String encodePath(final String unescaped, final String charset) {
if (unescaped == null) {
throw new IllegalArgumentException("The string to encode may not be null.");
}
return encode(unescaped, URIBitSets.allowed_abs_path, charset);
}
private URIUtils() {
}
}