blob: 700e4771cc432d3423b1854d7e06c5797c72510d [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "apr.h"
#if APU_USE_XMLLITE
#include "apr_arch_utf8.h"
#include "apr_xml.h"
typedef struct xmllite_parser_s* XML_Parser;
typedef int XML_Error;
#include "apr_xml_internal.h"
#define CINTERFACE
#define COBJMACROS
#define interface struct
typedef void * LPMSG;
#include <xmllite.h>
#include "apr_xml_internal.h"
typedef struct xml_stream_t {
ISequentialStream sequental_stream;
ULONG refcount;
const char *data;
apr_size_t remaining;
int is_final;
} xml_stream_t;
struct xmllite_parser_s
{
IXmlReader *xml_reader;
xml_stream_t *input_stream;
apr_pool_t *iterpool;
apr_status_t (*current_state)(apr_xml_parser *parser, apr_pool_t *scratch_pool);
void (*start_func)(void *userdata, const char *name, const char **attrs);
void (*end_func)(void *userdata, const char *name);
void (*cdata_func)(void *userdata, const char *data, int len);
};
static HRESULT STDMETHODCALLTYPE
stream_QueryInterface(ISequentialStream * This, REFIID riid, void **ppvObject)
{
xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
if (IsEqualIID(riid, &IID_IUnknown) ||
IsEqualIID(riid, &IID_ISequentialStream))
{
InterlockedIncrement(&obj->refcount);
*ppvObject = &obj->sequental_stream;
return S_OK;
}
else
{
*ppvObject = NULL;
return E_NOINTERFACE;
}
}
static ULONG STDMETHODCALLTYPE
stream_AddRef(ISequentialStream * This)
{
xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
return InterlockedIncrement(&obj->refcount);
}
static ULONG STDMETHODCALLTYPE
stream_Release(ISequentialStream * This)
{
xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
ULONG refcount = InterlockedDecrement(&obj->refcount);
if (refcount == 0) {
free(obj);
}
return refcount;
}
static HRESULT STDMETHODCALLTYPE
stream_Read(ISequentialStream * This,
void *pv,
ULONG cb,
ULONG *pcbRead)
{
xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
ULONG read = cb;
if (read > obj->remaining) {
read = (ULONG) obj->remaining;
}
memcpy(pv, obj->data, read);
obj->data += read;
obj->remaining -= read;
*pcbRead = read;
if (read == cb) {
return S_OK;
}
else if (read < cb && !obj->is_final) {
return E_PENDING;
}
else
{
return S_FALSE;
}
}
static HRESULT STDMETHODCALLTYPE
stream_Write(ISequentialStream * This,
const void *pv,
ULONG cb,
ULONG *pcbWritten)
{
xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
return E_NOTIMPL;
}
static ISequentialStreamVtbl stream_vtable =
{
stream_QueryInterface,
stream_AddRef,
stream_Release,
stream_Read,
stream_Write
};
static apr_status_t cleanup_xml_stream(void *ctx)
{
xml_stream_t *xml_stream = ctx;
ISequentialStream_Release(&xml_stream->sequental_stream);
return APR_SUCCESS;
}
static xml_stream_t *create_xml_stream(apr_pool_t *pool)
{
xml_stream_t *xml_stream = malloc(sizeof(*xml_stream));
if (xml_stream == NULL) {
return NULL;
}
memset(xml_stream, 0, sizeof(*xml_stream));
xml_stream->sequental_stream.lpVtbl = &stream_vtable;
xml_stream->refcount = 1;
apr_pool_cleanup_register(pool, xml_stream, cleanup_xml_stream,
apr_pool_cleanup_null);
return xml_stream;
}
static apr_status_t cleanup_parser(void *ctx)
{
apr_xml_parser *parser = ctx;
if (parser->xp->xml_reader) {
IXmlReader_Release(parser->xp->xml_reader);
parser->xp->xml_reader = NULL;
}
return APR_SUCCESS;
}
static apr_status_t wstr2utf(const char **utf_p, apr_size_t *utf_len_p,
LPCWSTR wstr, apr_size_t wlen,
apr_pool_t *pool)
{
apr_size_t result_len;
char *result;
if (wlen > 0) {
apr_status_t status;
apr_size_t bufsize = wlen * 3;
apr_size_t outbytes = bufsize;
result = apr_palloc(pool, outbytes + 1);
if (!result) {
return APR_ENOMEM;
}
status = apr_conv_utf16_to_utf8(wstr, &wlen, result, &outbytes);
if (status) {
return status;
}
result_len = bufsize - outbytes;
result[result_len] = 0;
}
else {
result = "";
result_len = 0;
}
*utf_p = result;
if (utf_len_p) {
*utf_len_p = result_len;
}
return APR_SUCCESS;
}
static char * get_xmllite_errmsg(HRESULT hr)
{
switch(hr)
{
case MX_E_INPUTEND:
return "unexpected end of input";
case MX_E_ENCODING:
return "unrecognized encoding";
case MX_E_ENCODINGSWITCH:
return "unable to switch the encoding";
case MX_E_ENCODINGSIGNATURE:
return "unrecognized input signature";
case WC_E_WHITESPACE:
return "whitespace expected";
case WC_E_SEMICOLON:
return "semicolon expected";
case WC_E_GREATERTHAN:
return "'>' expected";
case WC_E_QUOTE:
return "quote expected";
case WC_E_EQUAL:
return "equal expected";
case WC_E_LESSTHAN:
return "well-formedness constraint: no '<' in attribute value";
case WC_E_HEXDIGIT:
return "hexadecimal digit expected";
case WC_E_DIGIT:
return "decimal digit expected";
case WC_E_LEFTBRACKET:
return "'[' expected";
case WC_E_LEFTPAREN:
return "'(' expected";
case WC_E_XMLCHARACTER:
return "illegal xml character";
case WC_E_NAMECHARACTER:
return "illegal name character";
case WC_E_SYNTAX:
return "incorrect document syntax";
case WC_E_CDSECT:
return "incorrect CDATA section syntax";
case WC_E_COMMENT:
return "incorrect comment syntax";
case WC_E_CONDSECT:
return "incorrect conditional section syntax";
case WC_E_DECLATTLIST:
return "incorrect ATTLIST declaration syntax";
case WC_E_DECLDOCTYPE:
return "incorrect DOCTYPE declaration syntax";
case WC_E_DECLELEMENT:
return "incorrect ELEMENT declaration syntax";
case WC_E_DECLENTITY:
return "incorrect ENTITY declaration syntax";
case WC_E_DECLNOTATION:
return "incorrect NOTATION declaration syntax";
case WC_E_NDATA:
return "NDATA expected";
case WC_E_PUBLIC:
return "PUBLIC expected";
case WC_E_SYSTEM:
return "SYSTEM expected";
case WC_E_NAME:
return "name expected";
case WC_E_ROOTELEMENT:
return "one root element";
case WC_E_ELEMENTMATCH:
return "well-formedness constraint: element type match";
case WC_E_UNIQUEATTRIBUTE:
return "well-formedness constraint: unique attribute spec";
case WC_E_TEXTXMLDECL:
return "text/xmldecl not at the beginning of input";
case WC_E_LEADINGXML:
return "leading \"xml\"";
case WC_E_TEXTDECL:
return "incorrect text declaration syntax";
case WC_E_XMLDECL:
return "incorrect xml declaration syntax";
case WC_E_ENCNAME:
return "incorrect encoding name syntax";
case WC_E_PUBLICID:
return "incorrect public identifier syntax";
case WC_E_PESINTERNALSUBSET:
return "well-formedness constraint: pes in internal subset";
case WC_E_PESBETWEENDECLS:
return "well-formedness constraint: pes between declarations";
case WC_E_NORECURSION:
return "well-formedness constraint: no recursion";
case WC_E_ENTITYCONTENT:
return "entity content not well formed";
case WC_E_UNDECLAREDENTITY:
return "well-formedness constraint: undeclared entity";
case WC_E_PARSEDENTITY:
return "well-formedness constraint: parsed entity";
case WC_E_NOEXTERNALENTITYREF:
return "well-formedness constraint: no external entity references";
case WC_E_PI:
return "incorrect processing instruction syntax";
case WC_E_SYSTEMID:
return "incorrect system identifier syntax";
case WC_E_QUESTIONMARK:
return "'?' expected";
case WC_E_CDSECTEND:
return "no ']]>' in element content";
case WC_E_MOREDATA:
return "not all chunks of value have been read";
case WC_E_DTDPROHIBITED:
return "DTD was found but is prohibited";
case WC_E_INVALIDXMLSPACE:
return "xml:space attribute with invalid value";
case NC_E_QNAMECHARACTER:
return "illegal qualified name character";
case NC_E_QNAMECOLON:
return "multiple colons in qualified name";
case NC_E_NAMECOLON:
return "colon in name";
case NC_E_DECLAREDPREFIX:
return "declared prefix";
case NC_E_UNDECLAREDPREFIX:
return "undeclared prefix";
case NC_E_EMPTYURI:
return "non default namespace with empty uri";
case NC_E_XMLPREFIXRESERVED:
return "\"xml\" prefix is reserved and must have the "
"http://www.w3.org/XML/1998/namespace URI";
case NC_E_XMLNSPREFIXRESERVED:
return "\"xmlns\" prefix is reserved for use by XML";
case NC_E_XMLURIRESERVED:
return "xml namespace URI (http://www.w3.org/XML/1998/namespace) must "
"be assigned only to prefix \"xml\"";
case NC_E_XMLNSURIRESERVED:
return "xmlns namespace URI (http://www.w3.org/2000/xmlns/) is "
"reserved and must not be used";
case SC_E_MAXELEMENTDEPTH:
return "element depth exceeds limit";
case SC_E_MAXENTITYEXPANSION:
return "entity expansion exceeds limit";
case XML_E_INVALID_DECIMAL:
return "character in character entity is not a decimal digit "
"as was expected.";
case XML_E_INVALID_HEXIDECIMAL:
return "character in character entity is not a hexadecimal "
"digit as was expected.";
case XML_E_INVALID_UNICODE:
return "character entity has invalid Unicode value.";
default:
return "";
}
}
static apr_status_t handle_xmllite_err(apr_xml_parser *parser, HRESULT hr)
{
parser->xp_err = hr;
parser->xp_msg = get_xmllite_errmsg(hr);
/* this misnomer is used as a test for (any) parser error. */
parser->error = APR_XML_ERROR_EXPAT;
return APR_EGENERAL;
}
static apr_status_t
cdata_state(apr_xml_parser *parser,
apr_pool_t *scratch_pool);
static apr_status_t
read_state(apr_xml_parser *parser,
apr_pool_t *scratch_pool)
{
HRESULT hr;
XmlNodeType node_type;
apr_status_t status;
hr = IXmlReader_Read(parser->xp->xml_reader, &node_type);
if (hr == E_PENDING) {
return APR_EAGAIN;
}
else if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
if (node_type == XmlNodeType_Element) {
LPCWSTR wname;
UINT wname_len;
ULONG attr_count;
char **attrs;
const char* elem_name;
hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader,
&wname, &wname_len);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
status = wstr2utf(&elem_name, NULL, wname, wname_len, scratch_pool);
if (status) {
return status;
}
hr = IXmlReader_GetAttributeCount(parser->xp->xml_reader, &attr_count);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
if (attr_count > 0) {
ULONG i;
attrs = apr_palloc(parser->p, sizeof(char*) * (attr_count + 1) * 2);
hr = IXmlReader_MoveToFirstAttribute(parser->xp->xml_reader);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
for (i = 0; i < attr_count; i++) {
hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader,
&wname, &wname_len);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
status = wstr2utf(&attrs[i * 2], NULL, wname, wname_len,
scratch_pool);
if (status) {
return status;
}
hr = IXmlReader_GetValue(parser->xp->xml_reader,
&wname, &wname_len);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
status = wstr2utf(&attrs[i * 2 + 1], NULL, wname, wname_len,
scratch_pool);
if (status) {
return status;
}
hr = IXmlReader_MoveToNextAttribute(parser->xp->xml_reader);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
}
attrs[i * 2] = NULL;
attrs[i * 2 + 1] = NULL;
hr = IXmlReader_MoveToElement(parser->xp->xml_reader);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
}
else {
static char* no_attrs[] = { NULL, NULL };
attrs = no_attrs;
}
parser->xp->start_func(parser, elem_name, attrs);
if (IXmlReader_IsEmptyElement(parser->xp->xml_reader)) {
parser->xp->end_func(parser, elem_name);
}
}
else if (node_type == XmlNodeType_EndElement) {
LPCWSTR wname;
UINT wname_len;
const char *elem_name;
hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader,
&wname, &wname_len);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
status = wstr2utf(&elem_name, NULL, wname, wname_len, scratch_pool);
if (status) {
return status;
}
parser->xp->end_func(parser, elem_name);
}
else if (node_type == XmlNodeType_CDATA ||
node_type == XmlNodeType_Text) {
parser->xp->current_state = cdata_state;
}
else if (node_type == XmlNodeType_Whitespace) {
UINT depth;
hr = IXmlReader_GetDepth(parser->xp->xml_reader, &depth);
if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
/* Report whitespaces as cdata (the same as Expat does), but
ignore them when depth == 0. */
if (depth > 0) {
parser->xp->current_state = cdata_state;
}
}
else if (node_type == XmlNodeType_None)
{
return APR_EOF;
}
return APR_SUCCESS;
}
static apr_status_t
cdata_state(apr_xml_parser *parser,
apr_pool_t *scratch_pool)
{
HRESULT hr;
apr_status_t status;
WCHAR buf[4000];
UINT read_count;
hr = IXmlReader_ReadValueChunk(parser->xp->xml_reader, buf,
sizeof(buf) / sizeof(buf[0]),
&read_count);
if (hr == E_PENDING) {
return APR_EAGAIN;
}
else if (FAILED(hr)) {
return handle_xmllite_err(parser, hr);
}
if (read_count > 0) {
const char *cdata;
apr_size_t cdata_len;
status = wstr2utf(&cdata, &cdata_len, buf, read_count, scratch_pool);
if (status) {
return status;
}
parser->xp->cdata_func(parser, cdata, (int) cdata_len);
}
if (hr == S_FALSE) {
parser->xp->current_state = read_state;
}
return APR_SUCCESS;
}
static apr_status_t do_parse(apr_xml_parser *parser,
const char *data, apr_size_t len,
int is_final)
{
apr_status_t status;
apr_pool_t *iterpool = parser->xp->iterpool;
parser->xp->input_stream->data = data;
parser->xp->input_stream->remaining = len;
parser->xp->input_stream->is_final = is_final;
while (TRUE) {
apr_pool_clear(iterpool);
status = parser->xp->current_state(parser, iterpool);
if (status != APR_SUCCESS) {
break;
}
}
if (status == APR_EAGAIN || status == APR_EOF) {
status = APR_SUCCESS;
}
return status;
}
static XMLParserImpl xml_parser_xmllite = {
do_parse,
cleanup_parser
};
XMLParserImpl* apr_xml_get_parser_impl(void) { return &xml_parser_xmllite; }
static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
apr_xml_parser* apr_xml_parser_create_internal(apr_pool_t *pool,
void *start_func,
void *end_func,
void *cdata_func)
{
apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
HRESULT hr;
parser->impl = apr_xml_get_parser_impl();
parser->p = pool;
parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
/* ### is there a way to avoid hard-coding this? */
apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
parser->xp = apr_pcalloc(pool, sizeof(struct xmllite_parser_s));
parser->xp->current_state = read_state;
parser->xp->start_func = start_func;
parser->xp->end_func = end_func;
parser->xp->cdata_func = cdata_func;
apr_pool_create(&parser->xp->iterpool, pool);
parser->xp->input_stream = create_xml_stream(pool);
if (parser->xp->input_stream == NULL) {
(*apr_pool_abort_get(pool))(APR_ENOMEM);
return NULL;
}
hr = CreateXmlReader(&IID_IXmlReader, &parser->xp->xml_reader, NULL);
if (FAILED(hr)) {
return NULL;
}
apr_pool_cleanup_register(pool, parser, cleanup_parser, apr_pool_cleanup_null);
hr = IXmlReader_SetInput(parser->xp->xml_reader,
(IUnknown*) &parser->xp->input_stream->sequental_stream);
if (FAILED(hr)) {
return NULL;
}
return parser;
}
#endif