blob: b607c7b36baa2f39bf7f53ec9c4c39a76fe6447d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.olingo.server.core.deserializer.batch;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.olingo.commons.api.format.ContentType;
import org.apache.olingo.server.api.deserializer.batch.BatchDeserializerException;
public class BatchParserCommon {
// Multipart boundaries are defined in RFC 2046:
// boundary := 0*69<bchars> bcharsnospace
// bchars := bcharsnospace / " "
// bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
// The first alternative is for the case that only characters are used that don't need quoting.
private static final Pattern PATTERN_BOUNDARY = Pattern.compile(
"((?:\\w|[-.'+]){1,70})|"
+ "\"((?:\\w|[-.'+(),/:=?]|\\s){0,69}(?:\\w|[-.'+(),/:=?]))\"");
private static final Pattern PATTERN_LAST_CRLF = Pattern.compile("(.*)\\r\\n\\s*", Pattern.DOTALL);
// HTTP header fields are defined in RFC 7230:
// header-field = field-name ":" OWS field-value OWS
// field-name = token
// field-value = *( field-content / obs-fold )
// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
// field-vchar = VCHAR / obs-text
// obs-fold = CRLF 1*( SP / HTAB )
// token = 1*tchar
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
// / DIGIT / ALPHA
// For the field-name the specification is followed strictly,
// but for the field-value the pattern currently accepts more than specified.
private static final Pattern PATTERN_HEADER_LINE = Pattern.compile("((?:\\w|[!#$%\\&'*+\\-.^`|~])+):\\s?(.*)\\s*");
public static final String CONTENT_TRANSFER_ENCODING = "Content-Transfer-Encoding";
protected static final String BOUNDARY = "boundary";
public static final String BINARY_ENCODING = "binary";
private BatchParserCommon() { /* private constructor for helper class */ }
public static String getBoundary(final String contentType, final int line) throws BatchDeserializerException {
final ContentType type = parseContentType(contentType, ContentType.MULTIPART_MIXED, line);
final String boundary = type.getParameter(BOUNDARY);
if (boundary == null) {
throw new BatchDeserializerException("Missing boundary.",
BatchDeserializerException.MessageKeys.MISSING_BOUNDARY_DELIMITER, Integer.toString(line));
}
final Matcher matcher = PATTERN_BOUNDARY.matcher(boundary);
if (matcher.matches()) {
return matcher.group(1) == null ? matcher.group(2) : matcher.group(1);
} else {
throw new BatchDeserializerException("Invalid boundary format",
BatchDeserializerException.MessageKeys.INVALID_BOUNDARY, Integer.toString(line));
}
}
/**
* Get the content type based on <code>contentType</code> parameter.
* If this content type is not compatible to the expected ContentType a
* BatchDeserializerException is thrown.
*
* @param contentType content type string which is parsed
* @param expected content type to which the parsed must be compatible
* @param line parsed line
* @return the parsed content type or if not compatible or parseable an exception is thrown (never returns null)
* @throws BatchDeserializerException
*/
public static ContentType parseContentType(final String contentType, final ContentType expected, final int line)
throws BatchDeserializerException {
if (contentType == null) {
throw new BatchDeserializerException("Missing content type",
BatchDeserializerException.MessageKeys.MISSING_CONTENT_TYPE, Integer.toString(line));
}
ContentType type;
try {
type = ContentType.create(contentType);
} catch (final IllegalArgumentException e) {
throw new BatchDeserializerException("Invalid content type.", e,
BatchDeserializerException.MessageKeys.INVALID_CONTENT_TYPE, Integer.toString(line));
}
if (type.isCompatible(expected)) {
return type;
} else {
throw new BatchDeserializerException("Content type is not the expected content type",
BatchDeserializerException.MessageKeys.UNEXPECTED_CONTENT_TYPE,
Integer.toString(line), expected.toContentTypeString(), type.toContentTypeString());
}
}
public static List<List<Line>> splitMessageByBoundary(final List<Line> message, final String boundary)
throws BatchDeserializerException {
final List<List<Line>> messageParts = new LinkedList<>();
List<Line> currentPart = new LinkedList<>();
boolean isEndReached = false;
final String quotedBoundary = Pattern.quote(boundary);
final Pattern boundaryDelimiterPattern = Pattern.compile("--" + quotedBoundary + "--\\s*");
final Pattern boundaryPattern = Pattern.compile("--" + quotedBoundary + "\\s*");
for (Line currentLine : message) {
if (boundaryDelimiterPattern.matcher(currentLine.toString()).matches()) {
removeEndingCRLFFromList(currentPart);
messageParts.add(currentPart);
isEndReached = true;
} else if (boundaryPattern.matcher(currentLine.toString()).matches()) {
removeEndingCRLFFromList(currentPart);
messageParts.add(currentPart);
currentPart = new LinkedList<>();
} else {
currentPart.add(currentLine);
}
if (isEndReached) {
break;
}
}
// Remove preamble
if (!messageParts.isEmpty()) {
messageParts.remove(0);
}
if (!isEndReached) {
final int lineNumber = (!message.isEmpty()) ? message.get(0).getLineNumber() : 0;
throw new BatchDeserializerException("Missing close boundary delimiter",
BatchDeserializerException.MessageKeys.MISSING_CLOSE_DELIMITER, Integer.toString(lineNumber));
}
return messageParts;
}
private static void removeEndingCRLFFromList(final List<Line> list) {
if (!list.isEmpty()) {
Line lastLine = list.remove(list.size() - 1);
list.add(removeEndingCRLF(lastLine));
}
}
public static Line removeEndingCRLF(final Line line) {
Matcher matcher = PATTERN_LAST_CRLF.matcher(line.toString());
if (matcher.matches()) {
return new Line(matcher.group(1), line.getLineNumber());
} else {
return line;
}
}
public static Header consumeHeaders(final List<Line> remainingMessage) {
final int headerLineNumber = !remainingMessage.isEmpty() ? remainingMessage.get(0).getLineNumber() : 0;
final Header headers = new Header(headerLineNumber);
final Iterator<Line> iter = remainingMessage.iterator();
Line currentLine;
boolean isHeader = true;
while (iter.hasNext() && isHeader) {
currentLine = iter.next();
final Matcher headerMatcher = PATTERN_HEADER_LINE.matcher(currentLine.toString());
if (headerMatcher.matches() && headerMatcher.groupCount() == 2) {
iter.remove();
String headerName = headerMatcher.group(1).trim();
String headerValue = headerMatcher.group(2).trim();
headers.addHeader(headerName, Header.splitValuesByComma(headerValue), currentLine.getLineNumber());
} else {
isHeader = false;
}
}
return headers;
}
public static void consumeBlankLine(final List<Line> remainingMessage, final boolean isStrict)
throws BatchDeserializerException {
if (!remainingMessage.isEmpty() && remainingMessage.get(0).toString().matches("\\s*\r?\n\\s*")) {
remainingMessage.remove(0);
} else {
if (isStrict) {
final int lineNumber = (!remainingMessage.isEmpty()) ? remainingMessage.get(0).getLineNumber() : 0;
throw new BatchDeserializerException("Missing blank line",
BatchDeserializerException.MessageKeys.MISSING_BLANK_LINE, "[None]", Integer.toString(lineNumber));
}
}
}
public static InputStream convertLineListToInputStream(final List<Line> messageList, final Charset charset) {
final String message = lineListToString(messageList);
return new ByteArrayInputStream(message.getBytes(charset));
}
private static String lineListToString(final List<Line> messageList) {
final StringBuilder builder = new StringBuilder();
for (Line currentLine : messageList) {
builder.append(currentLine.toString());
}
return builder.toString();
}
public static String trimLineListToLength(final List<Line> list, final int length) {
final String message = lineListToString(list);
final int lastIndex = Math.min(length, message.length());
return (lastIndex > 0) ? message.substring(0, lastIndex) : "";
}
public static InputStream convertLineListToInputStream(final List<Line> list, final Charset charset,
final int length) {
final String message = trimLineListToLength(list, length);
return new ByteArrayInputStream(message.getBytes(charset));
}
}