blob: b5f8299a67eb999caa1000ffd536326025cb0407 [file] [log] [blame]
/*
* Copyright 2003-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @author Dasarath Weerathunga (dasarath@opensource.lk)
* @author Dinesh Premalal (xydinesh@gmail.com
premalal@opensource.lk)
*
*@author Susantha Kumara ( susantha@opensource.lk )
*/
#include "XmlPullParser.hpp"
void
XmlPullParser::relocateTokens (int offset)
{
int size = token->count ();
for (int i = 0; i < size; i++)
token->get (i)->relocate (offset);
}
void
XmlPullParser::shift ()
{
memmove (buffer->get (), buffer->get () + offset, _next - offset);
_next -= offset;
last -= offset;
offset = 0;
}
int
XmlPullParser::read ()
{
if (_next == buffer->getSize ())
{
if (offset > 0)
{
relocateTokens (offset);
shift ();
}
else
{
char *p = buffer->get ();
buffer->grow ();
relocateTokens (p - buffer->get ());
}
}
int c = s->read (buffer->get (), _next, buffer->getSize () - _next);
last += c;
return !c;
}
/*! check with BOM
*This may or may not be presant with utf8 documents
*But UTF-16 document must have BOM 0xff 0xfe in any order,
* for LE = 0xff 0xfe,
* for BE = 0xfe 0xff,
* for UTF-8 = 0xef 0xbb 0xbf ( not necessary) */
int
XmlPullParser::isUtf8 ()
{
UTF8_char utf[3] = { 0, 0, 0 };
for (int i = 0; i < 3; i++)
utf[i] = (UTF8_char) buffer->get ()[_next++];
if (0xef == utf[0] && 0xbb == utf[1] && 0xbf == utf[2])
{
unicodeState = None;
return _next;
}
else
return _next = 0;
}
/*! check for BOM */
int
XmlPullParser::isUtf16 ()
{
if (_next == 0)
{
UTF8_char utf[2];
int i;
for (i = 0; i < 2; i++)
utf[i] = (UTF8_char) buffer->get ()[_next++];
if (0xff == utf[0] && 0xfe == utf[1])
return LE;
else if (0xfe == utf[0] && 0xff == utf[1])
return BE;
else
{
_next = 0;
return None;
}
}
else
return None;
}
/*!since we are interested in special charactes such as "<",
* "/",">","?","!",""",... nextCh() method suppose to return those
* values. Those values are retrieved form UTF-16 characters
* ,Here we need not to do a convertion. Because those values are
* < 128 */
int
XmlPullParser::fromUtf16 (int eof)
{
int c = buffer->get ()[_next++];
if (_next > last && read ())
if (eof)
{
term = 1;
return -1;
}
else
throw new XmlPullParserException ();
/*UTF16_char d = *((UTF16_char *) & buffer->get ()[_next]);
if (!d)
{
term = 1;
return -1;
}*/
while (!c)
c = buffer->get ()[_next++];
return c;
}
int
XmlPullParser::nextCh (int eof)
{
if (_next > last && read ())
if (eof)
{
term = 1;
return -1;
}
else
throw new XmlPullParserException ();
if (_next == 0)
{
if (isUtf8 ());
else
(unicodeState = isUtf16 ());
}
if (unicodeState == None)
return buffer->get ()[_next++];
else
return fromUtf16 (eof);
}
char *
XmlPullParser::lastCh ()
{
int i = 1;
char c = buffer->get ()[_next - 1];
/*! If term has set to true then that is a buffer termination */
if (term)
return buffer->get () + _next - 1;
else
{
while (!c)
c = buffer->get ()[_next - (++i)];
return buffer->get () + _next - i;
}
}
void
XmlPullParser::reset (int del)
{
offset = _next;
name = NULL;
prefix = NULL;
namesp = NULL;
term = 0;
token->trunc (0);
attr->trunc (0);
if (!del)
{
if (ETag == event || EmptyElemTag == event)
closeElement ();
}
}
void
XmlPullParser::openToken ()
{
token_t *t = token->append ();
t->type = token_t::Unknown;
t->start = lastCh ();
}
void
XmlPullParser::closeToken (int type)
{
token_t *t = token->last ();
t->type = type;
t->end = lastCh () - 1;
}
XmlPullParser::XmlPullParser (InputStream * s)
{
this->s = s;
state = S_1;
offset = 0;
last = -1;
_next = 0;
buffer = new Buffer < char >(1024);
token = new Array < token_t > (64);
namespa = new Array < namespace_t > (64);
depth = new Array < depth_t > (16);
attr = new Array < attr_t > (64);
}
XmlPullParser::~XmlPullParser ()
{
reset (-1);
delete buffer;
delete token;
delete namespa;
delete depth;
delete attr;
}
int
XmlPullParser::isS (int c)
{
return 0x9 == c || 0xA == c || 0xD == c || 0x20 == c;
}
int
XmlPullParser::parseS (int c)
{
while (isS (c))
c = nextCh ();
return c;
}
int
XmlPullParser::parseEq (int c)
{
if ('=' == parseS (c))
return parseS (nextCh ());
else
throw new XmlPullParserException ();
}
int
XmlPullParser::parseNameToken (int c)
{
openToken ();
while (!(isS (c) || '/' == c || '?' == c || '=' == c || '>' == c))
{
if (':' == c)
{
closeToken (token_t::Prefix);
c = nextCh ();
openToken ();
}
else
c = nextCh ();
}
closeToken (token_t::Name);
return c;
}
int
XmlPullParser::parseAttValueToken (int quote)
{
if ('\'' == quote || '"' == quote)
{
int c = nextCh ();
openToken ();
while ('&' != c && '<' != c)
{
if (c == quote)
{
closeToken (token_t::AttValue);
return nextCh ();
}
c = nextCh ();
}
}
throw new XmlPullParserException ();
}
int
XmlPullParser::parseAttribute (int c)
{
c = parseEq (parseNameToken (c));
c = parseAttValueToken (c);
return parseS (c);
}
void
XmlPullParser::parseXMLDecl ()
{
if (nextCh () == 'x' && nextCh () == 'm' && nextCh () == 'l')
{
int c = parseAttribute (parseS (nextCh ()));
if (c == 'e')
c = parseAttribute (c);
if (c == 's')
c = parseAttribute (c);
if (c == '?' && nextCh () == '>')
{
event = XMLDecl;
return;
}
}
throw new XmlPullParserException ();
}
void
XmlPullParser::parseSTagOrEmptyElemTag (int c)
{
c = parseS (parseNameToken (c));
event = STag;
for (;;)
{
if (c == '/')
{
event = EmptyElemTag;
if (nextCh () == '>')
return;
else
throw new XmlPullParserException ();
}
else if (c == '>')
return;
else
c = parseAttribute (c);
}
}
void
XmlPullParser::parseETag ()
{
if (parseS (parseNameToken (nextCh ())) == '>')
event = ETag;
else
throw new XmlPullParserException ();
}
int
XmlPullParser::parseCharData ()
{
int c;
event = Content;
openToken ();
do
{
c = nextCh (-1);
if (-1 == c)
{
closeToken (token_t::CharData);
return 0;
}
}
while ('<' != c);
closeToken (token_t::CharData);
return c;
}
int
XmlPullParser::tokenize ()
{
do
{
int c = nextCh (-1);
if (-1 == c)
return -1;
switch (state)
{
case S_1:
if ('<' == c)
state = S_2;
else
state = S_0;
break;
case S_2:
if ('?' == c)
parseXMLDecl ();
else
parseSTagOrEmptyElemTag (c);
state = S_3;
break;
case S_3:
if ('<' == c)
state = S_4;
else
{
c = parseCharData ();
if ('<' == c)
{
state = S_4;
return event;
}
else if ('\0' == c)
state = S_3;
else
state = S_0;
}
break;
case S_4:
if ('/' == c)
{
parseETag ();
state = S_3;
}
else if ('!' == c)
state = S_0;
else if ('?' == c)
state = S_0;
else
{
parseSTagOrEmptyElemTag (c);
state = S_3;
}
}
if (state == S_0)
throw new XmlPullParserException ();
}
while (state != S_3);
return event;
}
int
XmlPullParser::addNamespace (token_t * uri)
{
namespace_t *r = namespa->append ();
r->prefix = NULL;
r->length = 0;
r->uri = uri->toString (unicodeState);
r->lengthUri = strlen(r->uri);
}
int
XmlPullParser::addNamespace (token_t * prefix, token_t * uri)
{
namespace_t *r = namespa->append ();
r->prefix = prefix->toString (unicodeState);
r->length = strlen (r->prefix);
r->uri = uri->toString (unicodeState);
r->lengthUri = strlen(r->uri);
}
int
XmlPullParser::addAttribute (token_t * prefix, token_t * name,
token_t * value)
{
attr_t *r = attr->append ();
r->prefix = prefix;
r->name = name;
r->value = value;
r->namesp = NULL;
}
void
XmlPullParser::openElement ()
{
if (depth->count ())
{
depth_t *l = depth->last ();
depth_t *m = depth->append ();
m->first = l->first + l->count;
m->total = namespa->count ();
m->count = m->total - l->total;
}
else
{
depth_t *m = depth->append ();
m->first = 0;
m->total = namespa->count ();
m->count = m->total;
}
}
void
XmlPullParser::closeElement ()
{
depth_t *d = depth->last ();
if (d->count)
{
namespace_t *p = namespa->get (d->first);
for (int i = 0; i < d->count; i++, p++)
{
if (p->prefix)
free (p->prefix);
if(p->uri)
free(p->uri);
}
namespa->trunc (d->first);
}
depth->chip ();
}
int
XmlPullParser::next ()
{
token_t *p;
reset ();
switch (tokenize ())
{
case -1:
return -1;
case XMLDecl:
case STag:
case EmptyElemTag:
p = token->last ();
while (p->isOfType (token_t::AttValue))
{
token_t *value = p--;
token_t *name = p--;
if (p->isOfType (token_t::Prefix))
{
if (!p->compare ("xmlns", 5))
addNamespace (name, value);
else
addAttribute (p, name, value);
p--;
}
else if (!name->compare ("xmlns", 5))
addNamespace (value);
else
addAttribute (NULL, name, value);
}
openElement ();
case ETag:
p = token->get ();
if (p->isOfType (token_t::Prefix))
prefix = p++;
name = p;
case Content:
defualt:
return event;
}
}
namespace_t *
XmlPullParser::getNamespace (token_t * prefix)
{
namespace_t *p = namespa->last (), *q = namespa->get ();
while (1)
{
if (prefix == NULL)
{
if (p->prefix == NULL)
return p;
}
else
{
if (p->prefix && !prefix->compare (p->prefix, p->length))
return p;
}
if (p == q)
throw new XmlPullParserException ();
p--;
}
}
char *
XmlPullParser::getName ()
{
if (name)
return name->toString (unicodeState);
throw new XmlPullParserException ();
}
char *
XmlPullParser::getPrefix ()
{
if (prefix)
return prefix->toString (unicodeState);
return NULL;
}
char*
XmlPullParser::getNamespaceUri ()
{
if (!namesp)
namesp = getNamespace (prefix);
return strndup (namesp->uri, namesp->lengthUri);
}
char*
XmlPullParser::getValue ()
{
return token->get ()->toString (unicodeState);
}
int
XmlPullParser::getDepth ()
{
return depth->count ();
}
int
XmlPullParser::getAttributeCount ()
{
return attr->count ();
}
char*
XmlPullParser::getAttributeName (int i)
{
return attr->get (i)->name->toString (unicodeState);
}
char*
XmlPullParser::getAttributeValue (int i)
{
return attr->get (i)->value->toString (unicodeState);
}
char*
XmlPullParser::getAttributePrefix (int i)
{
attr_t *p = attr->get (i);
if (p->prefix)
return p->prefix->toString (unicodeState);
return NULL;
}
char*
XmlPullParser::getAttributeNamespaceUri (int i)
{
attr_t *p = attr->get (i);
if (!p->namesp)
p->namesp = getNamespace (p->prefix);
return strndup(p->namesp->uri, p->namesp->lengthUri);
}
int
XmlPullParser::getNamespaceCount (int i)
{
return depth->get (i - 1)->total;
}
char *
XmlPullParser::getNamespacePrefix (int i)
{
namespace_t *p = namespa->get (i);
if (p->prefix)
return strndup(p->prefix, p->length);
return NULL;
}
char *
XmlPullParser::getNamespaceUri (int i)
{
return strndup(namespa->get(i)->uri, namespa->get(i)->lengthUri);
}
char *
XmlPullParser::getNamespaceForPrefix (char *prefix)
{
namespace_t *p = namespa->last (), *q = namespa->get ();
while (1)
{
if (prefix == NULL)
{
if (p->prefix == NULL)
return strndup (p->uri, p->lengthUri);
}
else
{
if (p->prefix && !strncmp (p->prefix, prefix, p->length))
return strndup (p->uri, p->lengthUri);
}
if (p == q)
throw new XmlPullParserException ();
p--;
}
}
char *
XmlPullParser::getPrefixForNamespace (char *ns)
{
namespace_t *p = namespa->last (), *q = namespa->get ();
while (1)
{
if (ns == NULL)
throw new XmlPullParserException ();
/* No Namespace exsist with NULL URI*/
else
{
if (p->uri && !strncmp (p->uri, ns, p->lengthUri))
return strndup (p->prefix, p->length);
}
if (p == q)
throw new XmlPullParserException ();
p--;
}
}