| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * Contributors: |
| * Original contributors from geronimo-javamail_1.4_spec-1.7.1 |
| * Florent Guillaume |
| */ |
| package org.apache.chemistry.opencmis.commons.impl; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.UnsupportedEncodingException; |
| import java.util.HashMap; |
| import java.util.Locale; |
| import java.util.Map; |
| |
| import org.apache.chemistry.opencmis.commons.exceptions.CmisRuntimeException; |
| |
| /** |
| * MIME helper class. |
| */ |
| public final class MimeHelper { |
| |
| public static final String CONTENT_DISPOSITION = "Content-Disposition"; |
| |
| public static final String DISPOSITION_ATTACHMENT = "attachment"; |
| |
| public static final String DISPOSITION_INLINE = "inline"; |
| |
| public static final String DISPOSITION_FILENAME = "filename"; |
| |
| public static final String DISPOSITION_NAME = "name"; |
| |
| public static final String DISPOSITION_FORM_DATA_CONTENT = "form-data; " + DISPOSITION_NAME + "=\"content\""; |
| |
| // RFC 2045 |
| private static final String MIME_SPECIALS = "()<>@,;:\\\"/[]?=" + "\t "; |
| |
| private static final String RFC2231_SPECIALS = "*'%" + MIME_SPECIALS; |
| |
| private static final String WHITE = " \t\n\r"; |
| |
| private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray(); |
| |
| private static final byte[] HEX_DECODE = new byte[0x80]; |
| static { |
| for (int i = 0; i < HEX_DIGITS.length; i++) { |
| HEX_DECODE[HEX_DIGITS[i]] = (byte) i; |
| HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i; |
| } |
| } |
| |
| private MimeHelper() { |
| } |
| |
| /** |
| * Encodes a value per RFC 2231. |
| * <p> |
| * This is used to pass non-ASCII parameters to MIME parameter lists. |
| * <p> |
| * This implementation always uses UTF-8 and no language. |
| * <p> |
| * See <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a> for |
| * details. |
| * |
| * @param value |
| * the value to encode |
| * @param buf |
| * the buffer to fill |
| * @return {@code true} if an encoding was needed, or {@code false} if no |
| * encoding was actually needed |
| */ |
| protected static boolean encodeRFC2231value(String value, StringBuilder buf) { |
| assert value != null; |
| assert buf != null; |
| |
| String charset = IOUtils.UTF8; |
| buf.append(charset); |
| buf.append("''"); // no language |
| byte[] bytes = IOUtils.toUTF8Bytes(value); |
| |
| boolean encoded = false; |
| for (int i = 0; i < bytes.length; i++) { |
| int ch = bytes[i] & 0xff; |
| if (ch <= 32 || ch >= 127 || RFC2231_SPECIALS.indexOf(ch) != -1) { |
| buf.append('%'); |
| buf.append(HEX_DIGITS[ch >> 4]); |
| buf.append(HEX_DIGITS[ch & 0xf]); |
| encoded = true; |
| } else { |
| buf.append((char) ch); |
| } |
| } |
| return encoded; |
| } |
| |
| /** |
| * Encodes a MIME parameter per RFC 2231. |
| * <p> |
| * This implementation always uses UTF-8 and no language. |
| * <p> |
| * See <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a> for |
| * details. |
| * |
| * @param value |
| * the string to encode |
| * @return the encoded string |
| */ |
| protected static String encodeRFC2231(String key, String value) { |
| StringBuilder buf = new StringBuilder(32); |
| boolean encoded = encodeRFC2231value(value, buf); |
| if (encoded) { |
| return "; " + key + "*=" + buf.toString(); |
| } else { |
| return "; " + key + "=" + value; |
| } |
| } |
| |
| /** |
| * Encodes the Content-Disposition header value according to RFC 2183 and |
| * RFC 2231. |
| * <p> |
| * See <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a> for |
| * details. |
| * |
| * @param disposition |
| * the disposition |
| * @param filename |
| * the file name |
| * @return the encoded header value |
| */ |
| public static String encodeContentDisposition(String disposition, String filename) { |
| if (disposition == null) { |
| disposition = DISPOSITION_ATTACHMENT; |
| } |
| return disposition + encodeRFC2231(DISPOSITION_FILENAME, filename); |
| } |
| |
| /** |
| * Decodes a filename from the Content-Disposition header value according to |
| * RFC 2183 and RFC 2231. |
| * <p> |
| * See <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a> for |
| * details. |
| * |
| * @param value |
| * the header value to decode |
| * @return the filename |
| */ |
| public static String decodeContentDispositionFilename(String value) { |
| Map<String, String> params = new HashMap<String, String>(); |
| decodeContentDisposition(value, params); |
| return params.get(DISPOSITION_FILENAME); |
| } |
| |
| /** |
| * Decodes the Content-Disposition header value according to RFC 2183 and |
| * RFC 2231. |
| * <p> |
| * Does not deal with continuation lines. |
| * <p> |
| * See <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a> for |
| * details. |
| * |
| * @param value |
| * the header value to decode |
| * @param params |
| * the map of parameters to fill |
| * @return the disposition |
| * |
| */ |
| public static String decodeContentDisposition(String value, Map<String, String> params) { |
| try { |
| HeaderTokenizer tokenizer = new HeaderTokenizer(value); |
| // get the first token, which must be an ATOM |
| Token token = tokenizer.next(); |
| if (token.getType() != Token.ATOM) { |
| return null; |
| } |
| String disposition = token.getValue(); |
| // value ignored in this method |
| |
| // the remainder is the parameters |
| String remainder = tokenizer.getRemainder(); |
| if (remainder != null) { |
| getParameters(remainder, params); |
| } |
| return disposition; |
| } catch (ParseException e) { |
| return null; |
| } |
| } |
| |
| /** |
| * Gets charset from a content type header. |
| * |
| * @param value |
| * the header value to decode |
| * @return the charset or <code>null</code> if no valid boundary available |
| */ |
| public static String getCharsetFromContentType(String value) { |
| try { |
| HeaderTokenizer tokenizer = new HeaderTokenizer(value, ";", true); |
| |
| // get the first token, which must be an ATOM |
| Token token = tokenizer.next(); |
| if (token.getType() != Token.ATOM) { |
| return null; |
| } |
| |
| // the remainder is the parameters |
| String remainder = tokenizer.getRemainder(); |
| Map<String, String> params; |
| if (remainder != null) { |
| params = new HashMap<String, String>(); |
| getParameters(remainder, params); |
| |
| return params.get("charset"); |
| } |
| } catch (ParseException e) { |
| return null; |
| } |
| |
| return null; |
| } |
| |
| /** |
| * Parses a WWW-Authenticate header value. |
| * |
| * @param value |
| * the header value to parse |
| * |
| * @return a map with the (lower case) challenge name as key and as the |
| * value a sub-map with parameters of the challenge |
| */ |
| public static Map<String, Map<String, String>> getChallengesFromAuthenticateHeader(String value) { |
| if (value == null || value.length() == 0) { |
| return null; |
| } |
| |
| final String trimValue = value.trim(); |
| |
| Map<String, Map<String, String>> result = new HashMap<String, Map<String, String>>(); |
| |
| boolean inQuotes = false; |
| boolean inName = true; |
| String challenge = null; |
| String paramName = ""; |
| StringBuilder sb = new StringBuilder(64); |
| for (int i = 0; i < trimValue.length(); i++) { |
| char c = trimValue.charAt(i); |
| |
| if (c == '\\') { |
| if (!inQuotes) { |
| return null; |
| } |
| if (trimValue.length() > i && trimValue.charAt(i + 1) == '\\') { |
| sb.append('\\'); |
| i++; |
| } else if (trimValue.length() > i && trimValue.charAt(i + 1) == '"') { |
| sb.append('"'); |
| i++; |
| } else { |
| return null; |
| } |
| } else if (c == '"') { |
| if (inName) { |
| return null; |
| } |
| if (inQuotes) { |
| Map<String, String> authMap = result.get(challenge); |
| if (authMap == null) { |
| return null; |
| } |
| authMap.put(paramName, sb.toString()); |
| } |
| sb.setLength(0); |
| inQuotes = !inQuotes; |
| } else if (c == '=') { |
| if (inName) { |
| paramName = sb.toString().trim(); |
| |
| int spcIdx = paramName.indexOf(' '); |
| if (spcIdx > -1) { |
| challenge = paramName.substring(0, spcIdx).toLowerCase(Locale.ENGLISH); |
| result.put(challenge, new HashMap<String, String>()); |
| paramName = paramName.substring(spcIdx).trim(); |
| } |
| |
| sb.setLength(0); |
| inName = false; |
| } else if (!inQuotes) { |
| return null; |
| } |
| } else if (c == ',') { |
| if (inName) { |
| challenge = sb.toString().trim().toLowerCase(Locale.ENGLISH); |
| result.put(challenge, new HashMap<String, String>()); |
| sb.setLength(0); |
| } else { |
| if (inQuotes) { |
| sb.append(c); |
| } else { |
| Map<String, String> authMap = result.get(challenge); |
| if (authMap == null) { |
| return null; |
| } |
| if (!authMap.containsKey(paramName)) { |
| authMap.put(paramName, sb.toString().trim()); |
| } |
| sb.setLength(0); |
| inName = true; |
| } |
| } |
| } else { |
| sb.append(c); |
| } |
| } |
| if (inQuotes) { |
| return null; |
| } |
| if (inName) { |
| challenge = sb.toString().trim().toLowerCase(Locale.ENGLISH); |
| result.put(challenge, new HashMap<String, String>()); |
| } else { |
| Map<String, String> authMap = result.get(challenge); |
| if (authMap == null) { |
| return null; |
| } |
| if (!authMap.containsKey(paramName)) { |
| authMap.put(paramName, sb.toString().trim()); |
| } |
| } |
| |
| return result; |
| } |
| |
| /** |
| * Gets the boundary from a <code>multipart/formdata</code> content type |
| * header. |
| * |
| * @param value |
| * the header value to decode |
| * @return the boundary as a byte array or <code>null</code> if no valid |
| * boundary available |
| */ |
| public static byte[] getBoundaryFromMultiPart(String value) { |
| try { |
| HeaderTokenizer tokenizer = new HeaderTokenizer(value, ";", true); |
| |
| // get the first token, which must be an ATOM |
| Token token = tokenizer.next(); |
| if (token.getType() != Token.ATOM) { |
| return null; |
| } |
| |
| // check content type |
| String multipartContentType = token.getValue(); |
| if (multipartContentType == null |
| || !(multipartContentType.equalsIgnoreCase("multipart/form-data") || multipartContentType |
| .equalsIgnoreCase("multipart/related"))) { |
| return null; |
| } |
| |
| // the remainder is the parameters |
| String remainder = tokenizer.getRemainder(); |
| if (remainder != null) { |
| Map<String, String> params = new HashMap<String, String>(); |
| getParameters(remainder, params); |
| |
| String boundaryStr = params.get("boundary"); |
| if (boundaryStr != null && boundaryStr.length() > 0) { |
| try { |
| return boundaryStr.getBytes(IOUtils.ISO_8859_1); |
| } catch (UnsupportedEncodingException e) { |
| // shouldn't happen... |
| throw new CmisRuntimeException("Unsupported encoding 'ISO-8859-1'", e); |
| } |
| } |
| } |
| } catch (ParseException e) { |
| return null; |
| } |
| |
| return null; |
| } |
| |
| protected static class ParseException extends Exception { |
| private static final long serialVersionUID = 1L; |
| |
| public ParseException() { |
| super(); |
| } |
| |
| public ParseException(String message) { |
| super(message); |
| } |
| } |
| |
| /* |
| * From geronimo-javamail_1.4_spec-1.7.1. Token |
| */ |
| protected static class Token { |
| // Constant values from J2SE 1.4 API Docs (Constant values) |
| public static final int ATOM = -1; |
| |
| public static final int COMMENT = -3; |
| |
| public static final int EOF = -4; |
| |
| public static final int QUOTEDSTRING = -2; |
| |
| private final int type; |
| |
| private final String value; |
| |
| public Token(int type, String value) { |
| this.type = type; |
| this.value = value; |
| } |
| |
| public int getType() { |
| return type; |
| } |
| |
| public String getValue() { |
| return value; |
| } |
| } |
| |
| /* |
| * Tweaked from geronimo-javamail_1.4_spec-1.7.1. HeaderTokenizer |
| */ |
| protected static class HeaderTokenizer { |
| |
| private static final Token EOF = new Token(Token.EOF, null); |
| |
| private final String header; |
| |
| private final String delimiters; |
| |
| private final boolean skipComments; |
| |
| private int pos; |
| |
| public HeaderTokenizer(String header) { |
| this(header, MIME_SPECIALS, true); |
| } |
| |
| protected HeaderTokenizer(String header, String delimiters, boolean skipComments) { |
| this.header = header; |
| this.delimiters = delimiters; |
| this.skipComments = skipComments; |
| } |
| |
| public String getRemainder() { |
| return header.substring(pos); |
| } |
| |
| public Token next() throws ParseException { |
| return readToken(); |
| } |
| |
| /** |
| * Read an ATOM token from the parsed header. |
| * |
| * @return A token containing the value of the atom token. |
| */ |
| private Token readAtomicToken() { |
| // skip to next delimiter |
| int start = pos; |
| while (++pos < header.length()) { |
| // break on the first non-atom character. |
| char ch = header.charAt(pos); |
| if (delimiters.indexOf(header.charAt(pos)) != -1 || ch < 32 || ch >= 127) { |
| break; |
| } |
| } |
| return new Token(Token.ATOM, header.substring(start, pos)); |
| } |
| |
| /** |
| * Read the next token from the header. |
| * |
| * @return The next token from the header. White space is skipped, and |
| * comment tokens are also skipped if indicated. |
| */ |
| private Token readToken() throws ParseException { |
| if (pos >= header.length()) { |
| return EOF; |
| } else { |
| char c = header.charAt(pos); |
| // comment token...read and skip over this |
| if (c == '(') { |
| Token comment = readComment(); |
| if (skipComments) { |
| return readToken(); |
| } else { |
| return comment; |
| } |
| // quoted literal |
| } else if (c == '\"') { |
| return readQuotedString(); |
| // white space, eat this and find a real token. |
| } else if (WHITE.indexOf(c) != -1) { |
| eatWhiteSpace(); |
| return readToken(); |
| // either a CTL or special. These characters have a |
| // self-defining token type. |
| } else if (c < 32 || c >= 127 || delimiters.indexOf(c) != -1) { |
| pos++; |
| return new Token(c, String.valueOf(c)); |
| } else { |
| // start of an atom, parse it off. |
| return readAtomicToken(); |
| } |
| } |
| } |
| |
| /** |
| * Extract a substring from the header string and apply any |
| * escaping/folding rules to the string. |
| * |
| * @param start |
| * The starting offset in the header. |
| * @param end |
| * The header end offset + 1. |
| * @return The processed string value. |
| */ |
| private String getEscapedValue(int start, int end) throws ParseException { |
| StringBuilder value = new StringBuilder(32); |
| for (int i = start; i < end; i++) { |
| char ch = header.charAt(i); |
| // is this an escape character? |
| if (ch == '\\') { |
| i++; |
| if (i == end) { |
| throw new ParseException("Invalid escape character"); |
| } |
| value.append(header.charAt(i)); |
| } else if (ch == '\r') { |
| // line breaks are ignored, except for naked '\n' |
| // characters, which are consider parts of linear |
| // whitespace. |
| // see if this is a CRLF sequence, and skip the second if it |
| // is. |
| if (i < end - 1 && header.charAt(i + 1) == '\n') { |
| i++; |
| } |
| } else { |
| // just append the ch value. |
| value.append(ch); |
| } |
| } |
| return value.toString(); |
| } |
| |
| /** |
| * Read a comment from the header, applying nesting and escape rules to |
| * the content. |
| * |
| * @return A comment token with the token value. |
| */ |
| private Token readComment() throws ParseException { |
| int start = pos + 1; |
| int nesting = 1; |
| boolean requiresEscaping = false; |
| // skip to end of comment/string |
| while (++pos < header.length()) { |
| char ch = header.charAt(pos); |
| if (ch == ')') { |
| nesting--; |
| if (nesting == 0) { |
| break; |
| } |
| } else if (ch == '(') { |
| nesting++; |
| } else if (ch == '\\') { |
| pos++; |
| requiresEscaping = true; |
| } else if (ch == '\r') { |
| // we need to process line breaks also |
| requiresEscaping = true; |
| } |
| } |
| if (nesting != 0) { |
| throw new ParseException("Unbalanced comments"); |
| } |
| String value; |
| if (requiresEscaping) { |
| value = getEscapedValue(start, pos); |
| } else { |
| value = header.substring(start, pos++); |
| } |
| return new Token(Token.COMMENT, value); |
| } |
| |
| /** |
| * Parse out a quoted string from the header, applying escaping rules to |
| * the value. |
| * |
| * @return The QUOTEDSTRING token with the value. |
| * @exception ParseException |
| */ |
| private Token readQuotedString() throws ParseException { |
| int start = pos + 1; |
| boolean requiresEscaping = false; |
| // skip to end of comment/string |
| while (++pos < header.length()) { |
| char ch = header.charAt(pos); |
| if (ch == '"') { |
| String value; |
| if (requiresEscaping) { |
| value = getEscapedValue(start, pos++); |
| } else { |
| value = header.substring(start, pos++); |
| } |
| return new Token(Token.QUOTEDSTRING, value); |
| } else if (ch == '\\') { |
| pos++; |
| requiresEscaping = true; |
| } else if (ch == '\r') { |
| // we need to process line breaks also |
| requiresEscaping = true; |
| } |
| } |
| throw new ParseException("Missing '\"'"); |
| } |
| |
| /** |
| * Skip white space in the token string. |
| */ |
| private void eatWhiteSpace() { |
| // skip to end of whitespace |
| while (++pos < header.length() && WHITE.indexOf(header.charAt(pos)) != -1) { |
| // just read |
| } |
| } |
| } |
| |
| /* |
| * Tweaked from geronimo-javamail_1.4_spec-1.7.1. ParameterList |
| */ |
| protected static Map<String, String> getParameters(String list, Map<String, String> params) throws ParseException { |
| HeaderTokenizer tokenizer = new HeaderTokenizer(list); |
| while (true) { |
| Token token = tokenizer.next(); |
| switch (token.getType()) { |
| case Token.EOF: |
| // the EOF token terminates parsing. |
| return params; |
| |
| case ';': |
| // each new parameter is separated by a semicolon, including |
| // the first, which separates |
| // the parameters from the main part of the header. |
| // the next token needs to be a parameter name |
| token = tokenizer.next(); |
| // allow a trailing semicolon on the parameters. |
| if (token.getType() == Token.EOF) { |
| return params; |
| } |
| |
| if (token.getType() != Token.ATOM) { |
| throw new ParseException("Invalid parameter name: " + token.getValue()); |
| } |
| |
| // get the parameter name as a lower case version for better |
| // mapping. |
| String name = token.getValue().toLowerCase(Locale.ENGLISH); |
| |
| token = tokenizer.next(); |
| |
| // parameters are name=value, so we must have the "=" here. |
| if (token.getType() != '=') { |
| throw new ParseException("Missing '='"); |
| } |
| |
| // now the value, which may be an atom or a literal |
| token = tokenizer.next(); |
| |
| if (token.getType() != Token.ATOM && token.getType() != Token.QUOTEDSTRING) { |
| throw new ParseException("Invalid parameter value: " + token.getValue()); |
| } |
| |
| String value = token.getValue(); |
| |
| // we might have to do some additional decoding. A name that |
| // ends with "*" is marked as being encoded, so if requested, we |
| // decode the value. |
| if (name.endsWith("*")) { |
| name = name.substring(0, name.length() - 1); |
| value = decodeRFC2231value(value); |
| } |
| params.put(name, value); |
| break; |
| default: |
| throw new ParseException("Missing ';'"); |
| } |
| } |
| } |
| |
| protected static String decodeRFC2231value(String value) { |
| int q1 = value.indexOf('\''); |
| if (q1 == -1) { |
| // missing charset |
| return value; |
| } |
| String mimeCharset = value.substring(0, q1); |
| int q2 = value.indexOf('\'', q1 + 1); |
| if (q2 == -1) { |
| // missing language |
| return value; |
| } |
| byte[] bytes = fromHex(value.substring(q2 + 1)); |
| try { |
| return new String(bytes, getJavaCharset(mimeCharset)); |
| } catch (UnsupportedEncodingException e) { |
| // incorrect encoding |
| return value; |
| } |
| } |
| |
| protected static byte[] fromHex(String data) { |
| ByteArrayOutputStream out = new ByteArrayOutputStream(data.length()); |
| for (int i = 0; i < data.length();) { |
| char c = data.charAt(i++); |
| if (c == '%') { |
| if (i > data.length() - 2) { |
| break; // unterminated sequence |
| } |
| byte b1 = HEX_DECODE[data.charAt(i++) & 0x7f]; |
| byte b2 = HEX_DECODE[data.charAt(i++) & 0x7f]; |
| out.write((b1 << 4) | b2); |
| } else { |
| out.write((byte) c); |
| } |
| } |
| return out.toByteArray(); |
| } |
| |
| protected static String getJavaCharset(String mimeCharset) { |
| // good enough for standard values |
| return mimeCharset; |
| } |
| |
| } |