blob: a02b54e24d44fa297d619224913a6dc08e095e5e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.generation;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import javax.servlet.http.HttpServletRequest;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.avalon.framework.activity.Initializable;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.ResourceNotFoundException;
import org.apache.cocoon.environment.ObjectModelHelper;
import org.apache.cocoon.environment.Request;
import org.apache.cocoon.environment.http.HttpEnvironment;
import org.apache.cocoon.servlet.multipart.Part;
import org.apache.cocoon.util.PostInputStream;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
/**
* @cocoon.sitemap.component.documentation
* The <code>StreamGenerator</code> is a class that reads XML from a
* request InputStream and generates SAX Events.
*
* @cocoon.sitemap.component.name stream
* @cocoon.sitemap.component.label content
* @cocoon.sitemap.component.logger sitemap.generator.stream
*
* @cocoon.sitemap.component.pooling.max 16
*
* For the POST requests with a mimetype of application/x-www-form-urlencoded,
* or multipart/form-data the xml data is expected to be associated with the
* sitemap parameter 'form-name'.
*
* For the POST requests with mimetypes: text/plain, text/xml,
* application/xhtml+xml, application/xml the xml data is in the body of the POST request and
* its length is specified by the value returned by getContentLength()
* method. The StreamGenerator uses helper
* org.apache.cocoon.util.PostInputStream class for InputStream
* reading operations. At the time that Parser is reading the data
* out of InputStream - Parser has no knowledge about the length of
* data to be read. The only way to signal to the Parser that all
* data was read from the InputStream is to control reading operation-
* PostInputStream--and to return to the requestor '-1' when the
* number of bytes read is equal to the getContentLength() value.
*
* @author <a href="mailto:Kinga_Dziembowski@hp.com">Kinga Dziembowski</a>
* @version CVS $Id$
*/
public class StreamGenerator extends ServiceableGenerator implements Initializable
{
/** The parameter holding the name associated with the xml data **/
public static final String FORM_NAME = "form-name";
/** The input source */
private InputSource inputSource;
// don't use Excalibur's SAXParser to prevent XXE injection
private SAXParserFactory factory;
/**
* Recycle this component.
* All instance variables are set to <code>null</code>.
*/
public void recycle() {
super.recycle();
this.inputSource = null;
}
public void initialize() throws Exception {
factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setXIncludeAware(false);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
}
/**
* Generate XML data out of request InputStream.
*/
public void generate()
throws IOException, SAXException, ProcessingException {
int len = 0;
String contentType = null;
Request request = ObjectModelHelper.getRequest(this.objectModel);
try {
contentType = request.getContentType();
if (contentType == null) {
contentType = parameters.getParameter("defaultContentType", null);
if (getLogger().isDebugEnabled()) {
getLogger().debug("no Content-Type header - using contentType parameter: " + contentType);
}
if (contentType == null) {
throw new IOException("both Content-Type header and defaultContentType parameter are not set");
}
}
if (contentType.startsWith("application/x-www-form-urlencoded") ||
contentType.startsWith("multipart/form-data")) {
String parameter = parameters.getParameter(FORM_NAME, null);
if (parameter == null) {
throw new ProcessingException(
"StreamGenerator expects a sitemap parameter called '" +
FORM_NAME + "' for handling form data"
);
}
Object xmlObject = request.get(parameter);
Reader xmlReader = null;
if (xmlObject instanceof String) {
xmlReader = new StringReader((String)xmlObject);
} else if (xmlObject instanceof Part) {
xmlReader = new InputStreamReader(((Part)xmlObject).getInputStream());
} else {
throw new ProcessingException("Unknown request object encountered named " +
parameter + " : " + xmlObject);
}
inputSource = new InputSource(xmlReader);
} else if (contentType.startsWith("text/plain") ||
contentType.startsWith("text/xml") ||
contentType.startsWith("application/xhtml+xml") ||
contentType.startsWith("application/xml")) {
HttpServletRequest httpRequest = (HttpServletRequest) objectModel.get(HttpEnvironment.HTTP_REQUEST_OBJECT);
if ( httpRequest == null ) {
throw new ProcessingException("This feature is only available in an http environment.");
}
len = request.getContentLength();
if (len > 0) {
PostInputStream anStream = new PostInputStream(httpRequest.getInputStream(), len);
inputSource = new InputSource(anStream);
} else {
throw new IOException("getContentLen() == 0");
}
} else {
throw new IOException("Unexpected getContentType(): " + request.getContentType());
}
if (getLogger().isDebugEnabled()) {
getLogger().debug("processing stream ContentType=" + contentType + " ContentLen=" + len);
}
String charset = getCharacterEncoding(request, contentType) ;
if( charset != null) {
this.inputSource.setEncoding(charset);
}
SAXParser parser = factory.newSAXParser();
XMLReader xmlReader = parser.getXMLReader();
xmlReader.setContentHandler(super.xmlConsumer);
xmlReader.setProperty( "http://xml.org/sax/properties/lexical-handler", super.xmlConsumer );
xmlReader.setFeature( "http://xml.org/sax/features/namespaces", true );
xmlReader.parse(this.inputSource);
} catch (IOException e) {
getLogger().error("StreamGenerator.generate()", e);
throw new ResourceNotFoundException("StreamGenerator could not find resource", e);
} catch (SAXException e) {
getLogger().error("StreamGenerator.generate()", e);
throw(e);
} catch (Exception e) {
getLogger().error("Could not get parser", e);
throw new ProcessingException("Exception in StreamGenerator.generate()", e);
}
}
/**
* Content type HTTP header can contains character encodinf info
* for ex. Content-Type: text/xml; charset=UTF-8
* If the servlet is following spec 2.3 and higher the servlet API can be used to retrieve character encoding part of
* Content-Type header. Some containers can choose to not unpack charset info - the spec is not strong about it.
* in any case this method can be used as a latest resource to retrieve the passed charset value.
* <code>null</code> is returned.
* It is very common mistake to send : Content-Type: text/xml; charset="UTF-8".
* Some containers are not filtering this mistake and the processing results in exception..
* The getCharacterEncoding() compensates for above mistake.
*
* @param contentType value associated with Content-Type HTTP header.
*/
public String getCharacterEncoding(Request req, String contentType) {
String charencoding = null;
String charset = "charset=";
if (contentType == null) {
return null;
}
int idx = contentType.indexOf(charset);
if (idx == -1) {
return null;
}
try {
charencoding = req.getCharacterEncoding();
if ( charencoding != null) {
getLogger().debug("charset from container: " + charencoding);
charencoding = charencoding.trim();
if ((charencoding.length() > 2) && (charencoding.startsWith("\""))&& (charencoding.endsWith("\""))) {
charencoding = charencoding.substring(1, charencoding.length() - 1);
}
getLogger().debug("charset from container clean: " + charencoding);
return charencoding;
} else {
return extractCharset( contentType, idx );
}
} catch(Throwable e) {
// We will be there if the container do not implement getCharacterEncoding() method
return extractCharset( contentType, idx );
}
}
protected String extractCharset(String contentType, int idx) {
String charencoding = null;
String charset = "charset=";
getLogger().debug("charset from extractCharset");
charencoding = contentType.substring(idx + charset.length());
int idxEnd = charencoding.indexOf(";");
if (idxEnd != -1) {
charencoding = charencoding.substring(0, idxEnd);
}
charencoding = charencoding.trim();
if ((charencoding.length() > 2) && (charencoding.startsWith("\""))&& (charencoding.endsWith("\""))) {
charencoding = charencoding.substring(1, charencoding.length() - 1);
}
getLogger().debug("charset from extractCharset: " + charencoding);
return charencoding.trim();
}
}