blob: 9774743d62a0992d1c8982f1d2d65530a6f116ce [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
* $Id$
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/XMLURL.hpp>
#include <xercesc/util/XMLUri.hpp>
#include <xercesc/util/XMLChar.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
// ---------------------------------------------------------------------------
// XMLUri: static data
// ---------------------------------------------------------------------------
// Amended by RFC2732
// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
// "$" | "," | "[" | "]"
chSemiColon, chForwardSlash, chQuestion, chColon, chAt,
chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare,
chCloseSquare, chNull
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
// "(" | ")"
chDash, chUnderscore, chPeriod, chBang, chTilde,
chAsterisk, chSingleQuote, chOpenParen, chCloseParen, chNull
// combination of MARK and RESERVED
chDash, chUnderscore, chPeriod, chBang, chTilde,
chAsterisk, chSingleQuote, chOpenParen, chCloseParen,
chSemiColon, chForwardSlash, chQuestion, chColon, chAt,
chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare,
chCloseSquare, chNull
// scheme = alpha *( alpha | digit | "+" | "-" | "." )
chPlus, chDash, chPeriod, chNull
// userinfo = *( unreserved | escaped |
// ";" | ":" | "&" | "=" | "+" | "$" | "," )
chSemiColon, chColon, chAmpersand, chEqual, chPlus,
chDollarSign, chPeriod, chNull
// reg_name = 1*( unreserved | escaped | "$" | "," |
// ";" | ":" | "@" | "&" | "=" | "+" )
chDollarSign, chComma, chSemiColon, chColon, chAt,
chAmpersand, chEqual, chPlus, chNull
// pchar plus ';' and '/'.
// pchar = unreserved | escaped |
// ":" | "@" | "&" | "=" | "+" | "$" | ","
chSemiColon, chForwardSlash, chColon, chAt, chAmpersand,
chEqual, chPlus, chDollarSign, chComma, chNull
// ---------------------------------------------------------------------------
// Local methods and data
// ---------------------------------------------------------------------------
static const int BUF_LEN = 64;
// "Scheme"
// "Parameters"
// "UserInfo"
// "Host"
// "Port"
// "RegName"
// "Path"
// "Query"
// "Fragment"
static const XMLCh errMsg_SCHEME[] =
chLatin_s, chLatin_c, chLatin_h, chLatin_e,
chLatin_m, chLatin_e, chNull
static const XMLCh errMsg_PARAMS[] =
chLatin_p, chLatin_a, chLatin_r, chLatin_a, chLatin_m,
chLatin_e, chLatin_t, chLatin_e, chLatin_r, chLatin_s, chNull
static const XMLCh errMsg_USERINFO[] =
chLatin_u, chLatin_s, chLatin_e, chLatin_r,
chLatin_i, chLatin_n, chLatin_f, chLatin_o, chNull
static const XMLCh errMsg_HOST[] =
chLatin_h, chLatin_o, chLatin_s, chLatin_t, chNull
static const XMLCh errMsg_PORT[] =
chLatin_p, chLatin_o, chLatin_r, chLatin_t, chNull
static const XMLCh errMsg_REGNAME[] =
chLatin_R, chLatin_e, chLatin_g,
chLatin_N, chLatin_a, chLatin_m, chLatin_e, chNull
static const XMLCh errMsg_PATH[] =
chLatin_p, chLatin_a, chLatin_t, chLatin_h, chNull
static const XMLCh errMsg_QUERY[] =
chLatin_q, chLatin_u, chLatin_e, chLatin_r, chLatin_y, chNull
static const XMLCh errMsg_FRAGMENT[] =
chLatin_f, chLatin_r, chLatin_a, chLatin_g,
chLatin_m, chLatin_e, chLatin_n, chLatin_t, chNull
// "//"
// "/"
// "./"
// "/."
// "/../"
// "/.."
static const XMLCh DOUBLE_SLASH[] =
chForwardSlash, chForwardSlash, chNull
static const XMLCh SINGLE_SLASH[] =
chForwardSlash, chNull
static const XMLCh SLASH_DOT_SLASH[] =
chForwardSlash, chPeriod, chForwardSlash, chNull
static const XMLCh SLASH_DOT[] =
chForwardSlash, chPeriod, chNull
static const XMLCh SLASH_DOTDOT_SLASH[] =
chForwardSlash, chPeriod, chPeriod, chForwardSlash, chNull
static const XMLCh SLASH_DOTDOT[] =
chForwardSlash, chPeriod, chPeriod, chNull
// ":/?#"
// REVISIT: why?
static const XMLCh SCHEME_SEPARATORS[] =
chColon, chForwardSlash, chQuestion, chPound, chNull
// ---------------------------------------------------------------------------
// XMLUri: Constructors and Helper methods
// ---------------------------------------------------------------------------
// ctor# 2
typedef JanitorMemFunCall<XMLUri> CleanupType;
XMLUri::XMLUri(const XMLCh* const uriSpec,
MemoryManager* const manager)
: fPort(-1)
, fScheme(0)
, fUserInfo(0)
, fHost(0)
, fRegAuth(0)
, fPath(0)
, fQueryString(0)
, fFragment(0)
, fURIText(0)
, fMemoryManager(manager)
CleanupType cleanup(this, &XMLUri::cleanUp);
try {
initialize((XMLUri *)0, uriSpec);
catch(const OutOfMemoryException&)
// ctor# 7 relative ctor
XMLUri::XMLUri(const XMLUri* const baseURI
, const XMLCh* const uriSpec
, MemoryManager* const manager)
: fPort(-1)
, fScheme(0)
, fUserInfo(0)
, fHost(0)
, fRegAuth(0)
, fPath(0)
, fQueryString(0)
, fFragment(0)
, fURIText(0)
, fMemoryManager(manager)
CleanupType cleanup(this, &XMLUri::cleanUp);
try {
initialize(baseURI, uriSpec);
catch(const OutOfMemoryException&)
//Copy constructor
XMLUri::XMLUri(const XMLUri& toCopy)
: XSerializable(toCopy)
, XMemory(toCopy)
, fPort(-1)
, fScheme(0)
, fUserInfo(0)
, fHost(0)
, fRegAuth(0)
, fPath(0)
, fQueryString(0)
, fFragment(0)
, fURIText(0)
, fMemoryManager(toCopy.fMemoryManager)
CleanupType cleanup(this, &XMLUri::cleanUp);
try {
catch(const OutOfMemoryException&)
XMLUri& XMLUri::operator=(const XMLUri& toAssign)
CleanupType cleanup(this, &XMLUri::cleanUp);
try {
catch(const OutOfMemoryException&)
return *this;
void XMLUri::cleanUp()
if (fScheme)
XMLString::release(&fScheme, fMemoryManager);//delete[] fScheme;
if (fUserInfo)
XMLString::release(&fUserInfo, fMemoryManager);//delete[] fUserInfo;
if (fHost)
XMLString::release(&fHost, fMemoryManager);//delete[] fHost;
if (fRegAuth)
XMLString::release(&fRegAuth, fMemoryManager);//delete[] fRegAuth;
if (fPath)
XMLString::release(&fPath, fMemoryManager);//delete[] fPath;
if (fQueryString)
XMLString::release(&fQueryString, fMemoryManager);//delete[] fQueryString;
if (fFragment)
XMLString::release(&fFragment, fMemoryManager);//delete[] fFragment;
XMLString::release(&fURIText, fMemoryManager);//delete[] fURIText;
void XMLUri::initialize(const XMLUri& toCopy)
// assuming that all fields from the toCopy are valid,
// therefore need NOT to go through various setXXX() methods
fMemoryManager = toCopy.fMemoryManager;
fScheme = XMLString::replicate(toCopy.fScheme, fMemoryManager);
fUserInfo = XMLString::replicate(toCopy.fUserInfo, fMemoryManager);
fHost = XMLString::replicate(toCopy.fHost, fMemoryManager);
fPort = toCopy.fPort;
fRegAuth = XMLString::replicate(toCopy.fRegAuth, fMemoryManager);
fPath = XMLString::replicate(toCopy.fPath, fMemoryManager);
fQueryString = XMLString::replicate(toCopy.fQueryString, fMemoryManager);
fFragment = XMLString::replicate(toCopy.fFragment, fMemoryManager);
void XMLUri::initialize(const XMLUri* const baseURI
, const XMLCh* const uriSpec)
// get a trimmed version of uriSpec
// uriSpec will NO LONGER be used in this function.
XMLCh* trimmedUriSpec = XMLString::replicate(uriSpec, fMemoryManager);
ArrayJanitor<XMLCh> janName(trimmedUriSpec, fMemoryManager);
XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);
if ( !baseURI &&
(!trimmedUriSpec || trimmedUriSpecLen == 0))
, XMLExcepts::XMLNUM_URI_Component_Empty
, errMsg_PARAMS
, fMemoryManager);
// just make a copy of the base if spec is empty
if (!trimmedUriSpec || trimmedUriSpecLen == 0)
XMLSize_t index = 0;
bool foundScheme = false;
// Check for scheme, which must be before `/', '?' or '#'.
int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);
if ((colonIdx <= 0) ||
(colonIdx > slashIdx && slashIdx != -1) ||
(colonIdx > queryIdx && queryIdx != -1) ||
(colonIdx > fragmentIdx && fragmentIdx != -1))
// A standalone base is a valid URI according to spec
if ( colonIdx == 0 || (!baseURI && fragmentIdx != 0) )
ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager);
foundScheme = true;
index = XMLString::stringLen(fScheme)+1;
// It's an error if we stop here
if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
, XMLExcepts::XMLNUM_URI_Component_Empty
, errMsg_PATH
, fMemoryManager);
// two slashes means generic URI syntax, so we get the authority
XMLCh* authUriSpec = (XMLCh*) fMemoryManager->allocate
(trimmedUriSpecLen+1) * sizeof(XMLCh)
);//new XMLCh[trimmedUriSpecLen+1];
ArrayJanitor<XMLCh> authName(authUriSpec, fMemoryManager);
XMLString::subString(authUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager);
if (((index+1) < trimmedUriSpecLen) &&
XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
index += 2;
XMLSize_t startPos = index;
// get authority - everything up to path, query or fragment
XMLCh testChar;
while (index < trimmedUriSpecLen)
testChar = trimmedUriSpec[index];
if (testChar == chForwardSlash ||
testChar == chQuestion ||
testChar == chPound )
// if we found authority, parse it out, otherwise we set the
// host to empty string
if (index > startPos)
XMLString::subString(authUriSpec, trimmedUriSpec, startPos, index, fMemoryManager);
//fHost = 0;
// we need to check if index has exceed the lenght or not
if (index >= trimmedUriSpecLen)
XMLCh* pathUriSpec = (XMLCh*) fMemoryManager->allocate
(trimmedUriSpecLen+1) * sizeof(XMLCh)
);//new XMLCh[trimmedUriSpecLen+1];
ArrayJanitor<XMLCh> pathUriSpecName(pathUriSpec, fMemoryManager);
XMLString::subString(pathUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager);
// Resolve relative URI to base URI - see RFC 2396 Section 5.2
// In some cases, it might make more sense to throw an exception
// (when scheme is specified is the string spec and the base URI
// is also specified, for example), but we're just following the
// RFC specifications
if ( baseURI )
// check to see if this is the current doc - RFC 2396 5.2 #2
// note that this is slightly different from the RFC spec in that
// we don't include the check for query string being null
// - this handles cases where the urispec is just a query
// string or a fragment (e.g. "?y" or "#s") -
// see <> which
// identified this as a bug in the RFC
if ((!fPath || !*fPath) &&
fScheme == 0 &&
fHost == 0 && fRegAuth == 0)
fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager);
fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager);
fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager);
fPort = baseURI->getPort();
fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager);
fMemoryManager->deallocate(fPath);//delete [] fPath;
fPath = XMLString::replicate(baseURI->getPath(), fMemoryManager);
if ( !fQueryString )
fQueryString = XMLString::replicate(baseURI->getQueryString(), fMemoryManager);
// check for scheme - RFC 2396 5.2 #3
// if we found a scheme, it means absolute URI, so we're done
if (fScheme == 0)
fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager);
// check for authority - RFC 2396 5.2 #4
// if we found a host, then we've got a network path, so we're done
if (fHost == 0 && fRegAuth == 0)
fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager);
fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager);
fPort = baseURI->getPort();
fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager);
// check for absolute path - RFC 2396 5.2 #5
if ((fPath && *fPath) &&
XMLString::startsWith(fPath, SINGLE_SLASH))
// if we get to this point, we need to resolve relative path
// RFC 2396 5.2 #6
XMLCh* basePath = XMLString::replicate(baseURI->getPath(), fMemoryManager);
ArrayJanitor<XMLCh> basePathName(basePath, fMemoryManager);
XMLSize_t bufLen = trimmedUriSpecLen+XMLString::stringLen(fPath)+XMLString::stringLen(basePath)+1;
XMLCh* path = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
ArrayJanitor<XMLCh> pathName(path, fMemoryManager);
path[0] = 0;
XMLCh* tmp1 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
ArrayJanitor<XMLCh> tmp1Name(tmp1, fMemoryManager);
XMLCh* tmp2 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
ArrayJanitor<XMLCh> tmp2Name(tmp2, fMemoryManager);
// 6a - get all but the last segment of the base URI path
if (basePath)
int lastSlash = XMLString::lastIndexOf(basePath, chForwardSlash);
if (lastSlash != -1)
XMLString::subString(path, basePath, 0, lastSlash+1, fMemoryManager);
// 6b - append the relative URI path
XMLString::catString(path, fPath);
// 6c - remove all "./" where "." is a complete path segment
int iIndex = -1;
while ((iIndex = XMLString::patternMatch(path, SLASH_DOT_SLASH)) != -1)
XMLString::subString(tmp1, path, 0, iIndex, fMemoryManager);
XMLString::subString(tmp2, path, iIndex+2, XMLString::stringLen(path), fMemoryManager);
path[0] = 0;
XMLString::catString(path, tmp1);
XMLString::catString(path, tmp2);
// 6d - remove "." if path ends with "." as a complete path segment
if (XMLString::endsWith(path, SLASH_DOT))
path[XMLString::stringLen(path) - 1] = chNull;
// 6e - remove all "<segment>/../" where "<segment>" is a complete
// path segment not equal to ".."
iIndex = -1;
int segIndex = -1;
int offset = 1;
while ((iIndex = XMLString::patternMatch(&(path[offset]), SLASH_DOTDOT_SLASH)) != -1)
// Undo offset
iIndex += offset;
// Find start of <segment> within substring ending at found point.
XMLString::subString(tmp1, path, 0, iIndex-1, fMemoryManager);
segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash);
// Ensure <segment> exists and != ".."
if (segIndex != -1 &&
(path[segIndex+1] != chPeriod ||
path[segIndex+2] != chPeriod ||
segIndex + 3 != iIndex))
XMLString::subString(tmp1, path, 0, segIndex, fMemoryManager);
XMLString::subString(tmp2, path, iIndex+3, XMLString::stringLen(path), fMemoryManager);
path[0] = 0;
XMLString::catString(path, tmp1);
XMLString::catString(path, tmp2);
offset = (segIndex == 0 ? 1 : segIndex);
offset += 4;
}// while
// 6f - remove ending "<segment>/.." where "<segment>" is a
// complete path segment
if (XMLString::endsWith(path, SLASH_DOTDOT))
// Find start of <segment> within substring ending at found point.
index = XMLString::stringLen(path) - 3;
XMLString::subString(tmp1, path, 0, index-1, fMemoryManager);
segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash);
if (segIndex != -1 &&
(path[segIndex+1] != chPeriod ||
path[segIndex+2] != chPeriod ||
segIndex + 3 != (int)index))
path[segIndex+1] = chNull;
if (getPath())
fMemoryManager->deallocate(fPath);//delete [] fPath;
fPath = XMLString::replicate(path, fMemoryManager);
// ---------------------------------------------------------------------------
// Components initialization
// ---------------------------------------------------------------------------
// authority = server | reg_name
// server = [ [ userinfo "@" ] hostport ]
// hostport = host [ ":" port ]
// reg_name = 1*( unreserved | escaped | "$" | "," |
// ";" | ":" | "@" | "&" | "=" | "+" )
// userinfo = *( unreserved | escaped |
// ";" | ":" | "&" | "=" | "+" | "$" | "," )
void XMLUri::initializeAuthority(const XMLCh* const uriSpec)
int index = 0;
XMLSize_t start = 0;
const XMLSize_t end = XMLString::stringLen(uriSpec);
// server = [ [ userinfo "@" ] hostport ]
// userinfo is everything up @,
XMLCh* userinfo = (XMLCh*) fMemoryManager->allocate
(end+1) * sizeof(XMLCh)
);//new XMLCh[end+1];
ArrayJanitor<XMLCh> userName(userinfo, fMemoryManager);
index = XMLString::indexOf(&(uriSpec[start]), chAt);
if ( index != -1)
XMLString::subString(userinfo, &(uriSpec[start]), 0, index, fMemoryManager);
index++; // skip the @
start += index;
userinfo = 0;
// hostport = host [ ":" port ]
// host is everything up to ':', or up to
// and including ']' if followed by ':'.
XMLCh* host = (XMLCh*) fMemoryManager->allocate
(end+1) * sizeof(XMLCh)
);//new XMLCh[end+1];
ArrayJanitor<XMLCh> hostName(host, fMemoryManager);
// Search for port boundary.
if (start < end && uriSpec[start] == chOpenSquare)
index = XMLString::indexOf(&(uriSpec[start]), chCloseSquare);
if (index != -1)
// skip the ']'
index = ((start + index + 1) < end
&& uriSpec[start + index + 1] == chColon) ? index+1 : -1;
index = XMLString::indexOf(&(uriSpec[start]), chColon);
if ( index != -1 )
XMLString::subString(host, &(uriSpec[start]), 0, index, fMemoryManager);
index++; // skip the :
start +=index;
XMLString::subString(host, &(uriSpec[start]), 0, end-start, fMemoryManager);
start = end;
// port is everything after ":"
XMLCh* portStr = (XMLCh*) fMemoryManager->allocate
(end+1) * sizeof(XMLCh)
);//new XMLCh[end+1];
ArrayJanitor<XMLCh> portName(portStr, fMemoryManager);
int port = -1;
if ((host && *host) && // non empty host
(index != -1) && // ":" found
(start < end) ) // ":" is not the last
XMLString::subString(portStr, &(uriSpec[start]), 0, end-start, fMemoryManager);
if (portStr && *portStr)
port = XMLString::parseInt(portStr, fMemoryManager);
} // if > 0
// Check if we have server based authority.
if (isValidServerBasedAuthority(host, port, userinfo, fMemoryManager))
if (fHost)
fMemoryManager->deallocate(fHost);//delete [] fHost;
if (fUserInfo)
fMemoryManager->deallocate(fUserInfo);//delete[] fUserInfo;
fHost = XMLString::replicate(host, fMemoryManager);
fPort = port;
fUserInfo = XMLString::replicate(userinfo, fMemoryManager);
// This must be registry based authority or the URI is malformed.
// scheme = alpha *( alpha | digit | "+" | "-" | "." )
void XMLUri::initializeScheme(const XMLCh* const uriSpec)
const XMLCh* tmpPtr = XMLString::findAny(uriSpec, SCHEME_SEPARATORS);
if ( !tmpPtr )
ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager);
XMLCh* scheme = (XMLCh*) fMemoryManager->allocate
(XMLString::stringLen(uriSpec) + 1) * sizeof(XMLCh)
);//new XMLCh[XMLString::stringLen(uriSpec)+1];
ArrayJanitor<XMLCh> tmpName(scheme, fMemoryManager);
XMLString::subString(scheme, uriSpec, 0, (tmpPtr - uriSpec), fMemoryManager);
void XMLUri::initializePath(const XMLCh* const uriSpec)
if ( !uriSpec )
, XMLExcepts::XMLNUM_URI_Component_Empty
, errMsg_PATH
, fMemoryManager);
XMLSize_t index = 0;
XMLSize_t start = 0;
XMLSize_t end = XMLString::stringLen(uriSpec);
XMLCh testChar = 0;
// path - everything up to query string or fragment
if (start < end)
// RFC 2732 only allows '[' and ']' to appear in the opaque part.
if (!getScheme() || uriSpec[start] == chForwardSlash)
// Scan path.
// abs_path = "/" path_segments
// rel_path = rel_segment [ abs_path ]
while (index < end)
testChar = uriSpec[index];
if (testChar == chQuestion || testChar == chPound)
// check for valid escape sequence
if (testChar == chPercent)
if (index + 2 >= end)
XMLCh value1[3];
value1[1] = chNull;
value1[2] = chNull;
XMLString::moveChars(value1, &(uriSpec[index]), (index + 1 >= end ? 1 : 2));
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, errMsg_PATH
, value1
, fMemoryManager);
else if (!XMLString::isHex(uriSpec[index+1]) || !XMLString::isHex(uriSpec[index+2]))
XMLCh value1[4];
XMLString::moveChars(value1, &(uriSpec[index]), 3);
value1[3] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, errMsg_PATH
, value1
, fMemoryManager);
else if (!isUnreservedCharacter(testChar) &&
XMLCh value1[2];
value1[0] = testChar;
value1[1] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, errMsg_PATH
, value1
, fMemoryManager);
}//while (index < end)
// Scan opaque part.
// opaque_part = uric_no_slash *uric
while (index < end)
testChar = uriSpec[index];
if (testChar == chQuestion || testChar == chPound)
// check for valid escape sequence
if (testChar == chPercent)
if (index + 2 >= end)
XMLCh value1[3];
value1[1] = chNull;
value1[2] = chNull;
XMLString::moveChars(value1, &(uriSpec[index]), (index + 1 >= end ? 1 : 2));
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, errMsg_PATH
, value1
, fMemoryManager);
else if (!XMLString::isHex(uriSpec[index+1]) || !XMLString::isHex(uriSpec[index+2]))
XMLCh value1[4];
XMLString::moveChars(value1, &(uriSpec[index]), 3);
value1[3] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, errMsg_PATH
, value1
, fMemoryManager);
// If the scheme specific part is opaque, it can contain '['
// and ']'. uric_no_slash wasn't modified by RFC 2732, which
// I've interpreted as an error in the spec, since the
// production should be equivalent to (uric - '/'), and uric
// contains '[' and ']'.
else if (!isReservedOrUnreservedCharacter(testChar))
XMLCh value1[2];
value1[0] = testChar;
value1[1] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, errMsg_PATH
, value1
, fMemoryManager);
}//while (index < end)
} //if (start < end)
if (getPath())
fMemoryManager->deallocate(fPath);//delete [] fPath;
fPath = (XMLCh*) fMemoryManager->allocate((index+1) * sizeof(XMLCh));//new XMLCh[index+1];
XMLString::subString(fPath, uriSpec, start, index, fMemoryManager);
// query - starts with ? and up to fragment or end
if (testChar == chQuestion)
start = index;
while (index < end)
testChar = uriSpec[index];
if (testChar == chPound)
if (testChar == chPercent)
if (index + 2 >= end)
XMLCh value1[3];
value1[1] = chNull;
value1[2] = chNull;
XMLString::moveChars(value1, &(uriSpec[index]), (index + 1 >= end ? 1 : 2));
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, errMsg_QUERY
, value1
, fMemoryManager);
if (!XMLString::isHex(uriSpec[index+1]) || !XMLString::isHex(uriSpec[index+2]))
XMLCh value1[4];
XMLString::moveChars(value1, &(uriSpec[index]), 3);
value1[3] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, errMsg_QUERY
, value1
, fMemoryManager);
else if (!isReservedOrUnreservedCharacter(testChar))
XMLCh value1[2];
value1[0] = testChar;
value1[1] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, errMsg_QUERY
, value1
, fMemoryManager);
if (getQueryString())
fMemoryManager->deallocate(fQueryString);//delete [] fQueryString;
fQueryString = (XMLCh*) fMemoryManager->allocate
(index - start + 1) * sizeof(XMLCh)
);//new XMLCh[index - start + 1];
XMLString::subString(fQueryString, uriSpec, start, index, fMemoryManager);
// fragment - starts with #
if (testChar == chPound)
start = index;
while (index < end)
testChar = uriSpec[index];
if (testChar == chPercent)
if (index + 2 >= end)
XMLCh value1[3];
value1[1] = chNull;
value1[2] = chNull;
XMLString::moveChars(value1, &(uriSpec[index]), (index + 1 >= end ? 1 : 2));
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, value1
, fMemoryManager);
if (!XMLString::isHex(uriSpec[index+1]) || !XMLString::isHex(uriSpec[index+2]))
XMLCh value1[4];
XMLString::moveChars(value1, &(uriSpec[index]), 3);
value1[3] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, value1
, fMemoryManager);
else if (!isReservedOrUnreservedCharacter(testChar))
XMLCh value1[2];
value1[0] = testChar;
value1[1] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, value1
, fMemoryManager);
if (getFragment())
fMemoryManager->deallocate(fFragment);//delete [] fFragment;
//make sure that there is something following the '#'
if (index > start)
fFragment = (XMLCh*) fMemoryManager->allocate
(index - start + 1) * sizeof(XMLCh)
);//new XMLCh[index - start + 1];
XMLString::subString(fFragment, uriSpec, start, index, fMemoryManager);
// RFC 2396, 4.0. URI Reference
// URI-reference = [absoulteURI | relativeURI] [# fragment]
// RFC 2396, 4.1. Fragment Identifier
// fragment = *uric
// empty fragment is valid
fFragment = 0;
// ---------------------------------------------------------------------------
// Setter
// ---------------------------------------------------------------------------
void XMLUri::setScheme(const XMLCh* const newScheme)
if ( !newScheme )
, XMLExcepts::XMLNUM_URI_Component_Set_Null
, errMsg_SCHEME
, fMemoryManager);
if (!isConformantSchemeName(newScheme))
, XMLExcepts::XMLNUM_URI_Component_Not_Conformant
, errMsg_SCHEME
, newScheme
, fMemoryManager);
if (getScheme())
fMemoryManager->deallocate(fScheme);//delete [] fScheme;
fScheme = XMLString::replicate(newScheme, fMemoryManager);
// server = [ [ userinfo "@" ] hostport ]
// hostport = host [":" port]
// setUserInfo(), setHost() and setPort() are closely related
// three methods, in a word, userinfo and port has dependency
// on host.
// if host is not present, userinfo must be null and port = -1
void XMLUri::setUserInfo(const XMLCh* const newUserInfo)
if ( newUserInfo &&
!getHost() )
, XMLExcepts::XMLNUM_URI_NullHost
, newUserInfo
, fMemoryManager);
isConformantUserInfo(newUserInfo, fMemoryManager);
if (getUserInfo())
fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
//sometimes we get passed a empty string rather than a null.
//Other procedures rely on it being null
if(newUserInfo && *newUserInfo) {
fUserInfo = XMLString::replicate(newUserInfo, fMemoryManager);
fUserInfo = 0;
void XMLUri::setHost(const XMLCh* const newHost)
if ( !newHost )
if (getHost())
fMemoryManager->deallocate(fHost);//delete [] fHost;
fHost = 0;
if ( *newHost && !isWellFormedAddress(newHost, fMemoryManager))
, XMLExcepts::XMLNUM_URI_Component_Not_Conformant
, errMsg_HOST
, newHost
, fMemoryManager);
if (getHost())
fMemoryManager->deallocate(fHost);//delete [] fHost;
fHost = XMLString::replicate(newHost, fMemoryManager);
void XMLUri::setPort(int newPort)
if (newPort >= 0 && newPort <= 65535)
if (!getHost())
XMLCh value1[BUF_LEN+1];
XMLString::binToText(newPort, value1, BUF_LEN, 10, fMemoryManager);
, XMLExcepts::XMLNUM_URI_NullHost
, errMsg_PORT
, value1
, fMemoryManager);
else if (newPort != -1)
XMLCh value1[BUF_LEN+1];
XMLString::binToText(newPort, value1, BUF_LEN, 10, fMemoryManager);
, XMLExcepts::XMLNUM_URI_PortNo_Invalid
, value1
, fMemoryManager);
fPort = newPort;
void XMLUri::setRegBasedAuthority(const XMLCh* const newRegAuth)
if ( !newRegAuth )
if (getRegBasedAuthority())
fMemoryManager->deallocate(fRegAuth);//delete [] fRegAuth;
fRegAuth = 0;
// reg_name = 1*( unreserved | escaped | "$" | "," |
// ";" | ":" | "@" | "&" | "=" | "+" )
else if ( !*newRegAuth || !isValidRegistryBasedAuthority(newRegAuth) )
, XMLExcepts::XMLNUM_URI_Component_Not_Conformant
, errMsg_REGNAME
, newRegAuth
, fMemoryManager);
if (getRegBasedAuthority())
fMemoryManager->deallocate(fRegAuth);//delete [] fRegAuth;
fRegAuth = XMLString::replicate(newRegAuth, fMemoryManager);
// setPath(), setQueryString() and setFragment() are closely
// related three methods as well.
void XMLUri::setPath(const XMLCh* const newPath)
if (!newPath)
if (getPath())
fMemoryManager->deallocate(fPath);//delete [] fPath;
fPath = 0;
// fragment = *uric
void XMLUri::setFragment(const XMLCh* const newFragment)
if ( !newFragment )
if (getFragment())
fMemoryManager->deallocate(fFragment);//delete [] fFragment;
fFragment = 0;
else if (!isGenericURI())
, XMLExcepts::XMLNUM_URI_Component_for_GenURI_Only
, newFragment
, fMemoryManager);
else if ( !getPath() )
, XMLExcepts::XMLNUM_URI_NullPath
, newFragment
, fMemoryManager);
else if (!isURIString(newFragment))
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, fMemoryManager);
if (getFragment())
fMemoryManager->deallocate(fFragment);//delete [] fFragment;
fFragment = XMLString::replicate(newFragment, fMemoryManager);
// query = *uric
void XMLUri::setQueryString(const XMLCh* const newQueryString)
if ( !newQueryString )
if (getQueryString())
fMemoryManager->deallocate(fQueryString);//delete [] fQueryString;
fQueryString = 0;
else if (!isGenericURI())
, XMLExcepts::XMLNUM_URI_Component_for_GenURI_Only
, errMsg_QUERY
, newQueryString
, fMemoryManager);
else if ( !getPath() )
, XMLExcepts::XMLNUM_URI_NullPath
, errMsg_QUERY
, newQueryString
, fMemoryManager);
else if (!isURIString(newQueryString))
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, errMsg_QUERY
, newQueryString
, fMemoryManager);
if (getQueryString())
fMemoryManager->deallocate(fQueryString);//delete [] fQueryString;
fQueryString = XMLString::replicate(newQueryString, fMemoryManager);
// ---------------------------------------------------------------------------
// XMLUri: Public, static methods
// ---------------------------------------------------------------------------
// scheme = alpha *( alpha | digit | "+" | "-" | "." )
// alphanum = alpha | digit
bool XMLUri::isConformantSchemeName(const XMLCh* const scheme)
if ( !scheme )
return false;
const XMLCh* tmpStr = scheme;
if (!XMLString::isAlpha(*tmpStr)) // first: alpha
return false;
// second onwards: ( alpha | digit | "+" | "-" | "." )
while (*tmpStr)
if ( !XMLString::isAlphaNum(*tmpStr) &&
(XMLString::indexOf(SCHEME_CHARACTERS, *tmpStr) == -1))
return false;
return true;
// userinfo = *( unreserved | escaped |
// ";" | ":" | "&" | "=" | "+" | "$" | "," )
void XMLUri::isConformantUserInfo(const XMLCh* const userInfo
, MemoryManager* const manager)
if ( !userInfo )
const XMLCh* tmpStr = userInfo;
while (*tmpStr)
if ( isUnreservedCharacter(*tmpStr) ||
(XMLString::indexOf(USERINFO_CHARACTERS, *tmpStr) != -1))
else if (*tmpStr == chPercent) // '%'
if (XMLString::stringLen(tmpStr) >= 3
&& XMLString::isHex(*(tmpStr+1)) // 1st hex
&& XMLString::isHex(*(tmpStr+2)) ) // 2nd hex
XMLCh value1[4];
value1[0] = chPercent;
value1[1] = *(tmpStr+1);
value1[2] = *(tmpStr+2);
value1[3] = chNull;
, XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
, value1
, manager);
, XMLExcepts::XMLNUM_URI_Component_Invalid_Char
, userInfo
, manager);
} //while
bool XMLUri::isValidServerBasedAuthority(const XMLCh* const host,
const XMLSize_t hostLen,
const int port,
const XMLCh* const userinfo,
const XMLSize_t userLen)
// The order is important, do not change
if (!isWellFormedAddress(host, hostLen))
return false;
// check port number
if ((port > 65535) || (port < 0 && port != -1))
return false;
// check userinfo
XMLSize_t index = 0;
while (index < userLen)
if (isUnreservedCharacter(userinfo[index]) ||
(XMLString::indexOf(USERINFO_CHARACTERS, userinfo[index]) != -1))
else if (userinfo[index] == chPercent) // '%'
if (index + 2 < userLen
&& XMLString::isHex(userinfo[index+1]) // 1st hex
&& XMLString::isHex(userinfo[index+2]) ) // 2nd hex
index +=3;
return false;
return false;
} //while
return true;
bool XMLUri::isValidServerBasedAuthority(const XMLCh* const host
, const int port
, const XMLCh* const userinfo
, MemoryManager* const manager)
// The order is important, do not change
if (!isWellFormedAddress(host, manager))
return false;
// check port number
if ((port > 65535) || (port < 0 && port != -1))
return false;
// check userinfo
if (!userinfo)
return true;
const XMLCh* tmpStr = userinfo;
while (*tmpStr)
if ( isUnreservedCharacter(*tmpStr) ||
(XMLString::indexOf(USERINFO_CHARACTERS, *tmpStr) != -1))
else if (*tmpStr == chPercent) // '%'
if (XMLString::stringLen(tmpStr) >= 3
&& XMLString::isHex(*(tmpStr+1)) // 1st hex
&& XMLString::isHex(*(tmpStr+2)) ) // 2nd hex
return false;
return false;
} //while
return true;
bool XMLUri::isValidRegistryBasedAuthority(const XMLCh* const authority,
const XMLSize_t authLen)
// check authority
XMLSize_t index = 0;
while (index < authLen)
if (isUnreservedCharacter(authority[index]) ||
(XMLString::indexOf(REG_NAME_CHARACTERS, authority[index]) != -1))
else if (authority[index] == chPercent) // '%'
if (index + 2 < authLen
&& XMLString::isHex(authority[index+1]) // 1st hex
&& XMLString::isHex(authority[index+2]) ) // 2nd hex
index +=3;
return false;
return false;
} //while
return true;
bool XMLUri::isValidRegistryBasedAuthority(const XMLCh* const authority)
// check authority
if (!authority)
return false;
const XMLCh* tmpStr = authority;
while (*tmpStr)
if (isUnreservedCharacter(*tmpStr) ||
(XMLString::indexOf(REG_NAME_CHARACTERS, *tmpStr) != -1))
else if (*tmpStr == chPercent) // '%'
if (XMLString::stringLen(tmpStr) >= 3
&& XMLString::isHex(*(tmpStr + 1)) // 1st hex
&& XMLString::isHex(*(tmpStr + 2))) // 2nd hex
return false;
return false;
} //while
return true;
// uric = reserved | unreserved | escaped
// escaped = "%" hex hex
// hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
// "a" | "b" | "c" | "d" | "e" | "f"
bool XMLUri::isURIString(const XMLCh* const uricString)
if (!uricString || !*uricString)
return false;
const XMLCh* tmpStr = uricString;
while (*tmpStr)
if (isReservedOrUnreservedCharacter(*tmpStr))
else if (*tmpStr == chPercent) // '%'
if (XMLString::stringLen(tmpStr) >=3
&& XMLString::isHex(*(tmpStr+1)) // 1st hex
&& XMLString::isHex(*(tmpStr+2)) ) // 2nd hex
return false;
return false;
return true;
// host = hostname | IPv4address
// hostname = *( domainlabel "." ) toplabel [ "." ]
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
bool XMLUri::isWellFormedAddress(const XMLCh* const addrString
, MemoryManager* const manager)
// Check that we have a non-zero length string.
if (!addrString || !*addrString)
return false;
// Get address length.
XMLSize_t addrStrLen = XMLString::stringLen(addrString);
// Check if the host is a valid IPv6reference.
if (*addrString == chOpenSquare)
return isWellFormedIPv6Reference(addrString, addrStrLen);
// Cannot start with a '.', '-', or end with a '-'.
if (*addrString == chPeriod ||
*addrString == chDash ||
addrString[addrStrLen-1] == chDash)
return false;
// rightmost domain label starting with digit indicates IP address
// since top level domain label can only start with an alpha
// see RFC 2396 Section 3.2.2
int lastPeriodPos = XMLString::lastIndexOf(addrString, chPeriod);
// if the string ends with "."
// get the second last "."
if (XMLSize_t(lastPeriodPos + 1) == addrStrLen)
XMLCh* tmp2 = (XMLCh*) manager->allocate
addrStrLen * sizeof(XMLCh)
);//new XMLCh[addrStrLen];
XMLString::subString(tmp2, addrString, 0, lastPeriodPos, manager);
lastPeriodPos = XMLString::lastIndexOf(tmp2, chPeriod);
manager->deallocate(tmp2);//delete [] tmp2;
if ( XMLString::isDigit(addrString[lastPeriodPos + 1]))
return false;
if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
return isWellFormedIPv4Address(addrString, addrStrLen);
} // end of IPv4address
// hostname = *( domainlabel "." ) toplabel [ "." ]
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
// RFC 2396 states that hostnames take the form described in
// RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
// to RFC 1034, hostnames are limited to 255 characters.
if (addrStrLen > 255) {
return false;
unsigned int labelCharCount = 0;
// domain labels can contain alphanumerics and '-"
// but must start and end with an alphanumeric
for (XMLSize_t i = 0; i < addrStrLen; i++)
if (addrString[i] == chPeriod)
if (((i > 0) &&
(!XMLString::isAlphaNum(addrString[i-1]))) ||
((i + 1 < addrStrLen) &&
(!XMLString::isAlphaNum(addrString[i+1]))) )
return false;
labelCharCount = 0;
else if (!XMLString::isAlphaNum(addrString[i]) &&
addrString[i] != chDash)
return false;
// RFC 1034: Labels must be 63 characters or less.
else if (++labelCharCount > 63) {
return false;
} //for
return true;
// RFC 2732 amended RFC 2396 by replacing the definition
// of IPv4address with the one defined by RFC 2373.
// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const XMLSize_t length)
int numDots = 0;
int numDigits = 0;
// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
// make sure that
// 1) we see only digits and dot separators,
// 2) that any dot separator is preceded and followed by a digit
// 3) that we find 3 dots
// 4) that each segment contains 1 to 3 digits.
// 5) that each segment is not greater than 255.
for (XMLSize_t i = 0; i < length; ++i)
if (addr[i] == chPeriod)
if ((i == 0) ||
(i+1 == length) ||
return false;
numDigits = 0;
if (++numDots > 3)
return false;
else if (!XMLString::isDigit(addr[i]))
return false;
// Check that that there are no more than three digits
// in this segment.
else if (++numDigits > 3)
return false;
// Check that this segment is not greater than 255.
else if (numDigits == 3)
XMLCh first = addr[i-2];
XMLCh second = addr[i-1];
XMLCh last = addr[i];
if (!(first < chDigit_2 ||
(first == chDigit_2 &&
(second < chDigit_5 ||
(second == chDigit_5 && last <= chDigit_5)))))
return false;
} //for
return (numDots == 3);
// IPv6reference = "[" IPv6address "]"
bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const XMLSize_t length)
XMLSize_t end = length-1;
// Check if string is a potential match for IPv6reference.
if (!(length > 2 && addr[0] == chOpenSquare && addr[end] == chCloseSquare))
return false;
// Counter for the number of 16-bit sections read in the address.
int counter = 0;
// Scan hex sequence before possible '::' or IPv4 address.
int iIndex = scanHexSequence(addr, 1, end, counter);
if (iIndex == -1)
return false;
XMLSize_t index=(XMLSize_t)iIndex;
// Address must contain 128-bits of information.
if (index == end)
return (counter == 8);
if (index+1 < end && addr[index] == chColon)
if (addr[index+1] == chColon)
// '::' represents at least one 16-bit group of zeros.
if (++counter > 8)
return false;
index += 2;
// Trailing zeros will fill out the rest of the address.
if (index == end)
return true;
// If the second character wasn't ':', in order to be valid,
// the remainder of the string must match IPv4Address,
// and we must have read exactly 6 16-bit groups.
if (counter == 6)
return isWellFormedIPv4Address(addr+index+1, end-index-1);
return false;
return false;
// 3. Scan hex sequence after '::'.
int prevCount = counter;
iIndex = scanHexSequence(addr, index, end, counter);
if (iIndex == -1)
return false;
// If this is the end of the address then
// we've got 128-bits of information.
if (index == end)
return true;
// The address ends in an IPv4 address, or it is invalid.
// scanHexSequence has already made sure that we have the right number of bits.
XMLSize_t shiftCount = (counter > prevCount) ? index+1 : index;
return isWellFormedIPv4Address(addr + shiftCount, end - shiftCount);
// For use with isWellFormedIPv6Reference only.
int XMLUri::scanHexSequence (const XMLCh* const addr, XMLSize_t index, XMLSize_t end, int& counter)
XMLCh testChar = chNull;
int numDigits = 0;
XMLSize_t start = index;
// Trying to match the following productions:
// hexseq = hex4 *( ":" hex4)
// hex4 = 1*4HEXDIG
for (; index < end; ++index)
testChar = addr[index];
if (testChar == chColon)
// IPv6 addresses are 128-bit, so there can be at most eight sections.
if (numDigits > 0 && ++counter > 8)
return -1;
// This could be '::'.
if (numDigits == 0 || ((index+1 < end) && addr[index+1] == chColon))
return (int)index;
numDigits = 0;
// This might be invalid or an IPv4address. If it's potentially an IPv4address,
// backup to just after the last valid character that matches hexseq.
else if (!XMLString::isHex(testChar))
if (testChar == chPeriod && numDigits < 4 && numDigits > 0 && counter <= 6)
int back = (int)index - numDigits - 1;
return (back >= (int)start) ? back : (int)start;
return -1;
// There can be at most 4 hex digits per group.
else if (++numDigits > 4)
return -1;
return (numDigits > 0 && ++counter <= 8) ? (int)end : -1;
bool XMLUri::isGenericURI()
return (getHost() != 0);
// This method will take the broken out parts of the URI and build up the
// full text. We don't do this unless someone asks us to, since its often
// never required.
void XMLUri::buildFullText()
// Calculate the worst case size of the buffer required
XMLSize_t bufSize = XMLString::stringLen(fScheme) + 1
+ XMLString::stringLen(fFragment) + 1
+ XMLString::stringLen(fHost ? fHost : fRegAuth) + 2
+ XMLString::stringLen(fPath)
+ XMLString::stringLen(fQueryString) + 1
+ XMLString::stringLen(fUserInfo) + 1
+ 32;
// Clean up the existing buffer and allocate another
fMemoryManager->deallocate(fURIText);//delete [] fURIText;
fURIText = (XMLCh*) fMemoryManager->allocate(bufSize * sizeof(XMLCh));//new XMLCh[bufSize];
*fURIText = 0;
XMLCh* outPtr = fURIText;
if (fScheme != 0)
XMLString::catString(fURIText, getScheme());
outPtr += XMLString::stringLen(fURIText);
*outPtr++ = chColon;
// Authority
if (fHost || fRegAuth)
*outPtr++ = chForwardSlash;
*outPtr++ = chForwardSlash;
// Server based authority.
if (fHost)
if (fUserInfo)
XMLString::copyString(outPtr, fUserInfo);
outPtr += XMLString::stringLen(fUserInfo);
*outPtr++ = chAt;
XMLString::copyString(outPtr, fHost);
outPtr += XMLString::stringLen(fHost);
// If the port is -1, then we don't put it in. Else we need
// to because it was explicitly provided.
if (fPort != -1)
*outPtr++ = chColon;
XMLCh tmpBuf[17];
XMLString::binToText(fPort, tmpBuf, 16, 10, fMemoryManager);
XMLString::copyString(outPtr, tmpBuf);
outPtr += XMLString::stringLen(tmpBuf);
// Registry based authority.
else {
XMLString::copyString(outPtr, fRegAuth);
outPtr += XMLString::stringLen(fRegAuth);
if (fPath)
XMLString::copyString(outPtr, fPath);
outPtr += XMLString::stringLen(fPath);
if (fQueryString)
*outPtr++ = chQuestion;
XMLString::copyString(outPtr, fQueryString);
outPtr += XMLString::stringLen(fQueryString);
if (fFragment)
*outPtr++ = chPound;
XMLString::copyString(outPtr, fFragment);
outPtr += XMLString::stringLen(fFragment);
// Cap it off in case the last op was not a string copy
*outPtr = 0;
// NOTE: no check for NULL value of uriStr (caller responsiblilty)
bool XMLUri::isValidURI(const XMLUri* const baseURI
, const XMLCh* const uriStr
, bool bAllowSpaces/*=false*/)
// get a trimmed version of uriStr
// uriStr will NO LONGER be used in this function.
const XMLCh* trimmedUriSpec = uriStr;
while (XMLChar1_0::isWhitespace(*trimmedUriSpec))
XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);
while (trimmedUriSpecLen) {
if (XMLChar1_0::isWhitespace(trimmedUriSpec[trimmedUriSpecLen-1]))
if (trimmedUriSpecLen == 0)
if (!baseURI)
return false;
return true;
XMLSize_t index = 0;
bool foundScheme = false;
// Check for scheme, which must be before `/', '?' or '#'.
int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);
if ((colonIdx <= 0) ||
(colonIdx > slashIdx && slashIdx != -1) ||
(colonIdx > queryIdx && queryIdx != -1) ||
(colonIdx > fragmentIdx && fragmentIdx != -1))
// A standalone base is a valid URI according to spec
if (colonIdx == 0 || (!baseURI && fragmentIdx != 0))
return false;
if (!processScheme(trimmedUriSpec, index))
return false;
foundScheme = true;
// It's an error if we stop here
if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
return false;
// two slashes means generic URI syntax, so we get the authority
const XMLCh* authUriSpec = trimmedUriSpec + index;
if (((index+1) < trimmedUriSpecLen) &&
XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
index += 2;
XMLSize_t startPos = index;
// get authority - everything up to path, query or fragment
XMLCh testChar;
while (index < trimmedUriSpecLen)
testChar = trimmedUriSpec[index];
if (testChar == chForwardSlash ||
testChar == chQuestion ||
testChar == chPound )
// if we found authority, parse it out, otherwise we set the
// host to empty string
if (index > startPos)
if (!processAuthority(trimmedUriSpec + startPos, index - startPos))
return false;
// we need to check if index has exceed the lenght or not
if (index < trimmedUriSpecLen)
if (!processPath(trimmedUriSpec + index, trimmedUriSpecLen - index, foundScheme, bAllowSpaces))
return false;
return true;
// NOTE: no check for NULL value of uriStr (caller responsiblilty)
// NOTE: this routine is the same as above, but it uses a flag to
// indicate the existance of a baseURI rather than an XMLuri.
bool XMLUri::isValidURI(bool haveBaseURI, const XMLCh* const uriStr, bool bAllowSpaces/*=false*/)
// get a trimmed version of uriStr
// uriStr will NO LONGER be used in this function.
const XMLCh* trimmedUriSpec = uriStr;
while (XMLChar1_0::isWhitespace(*trimmedUriSpec))
XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);
while (trimmedUriSpecLen) {
if (XMLChar1_0::isWhitespace(trimmedUriSpec[trimmedUriSpecLen-1]))
if (trimmedUriSpecLen == 0)
if (!haveBaseURI)
return false;
return true;
XMLSize_t index = 0;
bool foundScheme = false;
// Check for scheme, which must be before `/', '?' or '#'.
int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);
if ((colonIdx <= 0) ||
(colonIdx > slashIdx && slashIdx != -1) ||
(colonIdx > queryIdx && queryIdx != -1) ||
(colonIdx > fragmentIdx && fragmentIdx != -1))
// A standalone base is a valid URI according to spec
if (colonIdx == 0 || (!haveBaseURI && fragmentIdx != 0))
return false;
if (!processScheme(trimmedUriSpec, index))
return false;
foundScheme = true;
// It's an error if we stop here
if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
return false;
// two slashes means generic URI syntax, so we get the authority
const XMLCh* authUriSpec = trimmedUriSpec + index;
if (((index+1) < trimmedUriSpecLen) &&
XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
index += 2;
XMLSize_t startPos = index;
// get authority - everything up to path, query or fragment
XMLCh testChar;
while (index < trimmedUriSpecLen)
testChar = trimmedUriSpec[index];
if (testChar == chForwardSlash ||
testChar == chQuestion ||
testChar == chPound )
// if we found authority, parse it out, otherwise we set the
// host to empty string
if (index > startPos)
if (!processAuthority(trimmedUriSpec + startPos, index - startPos))
return false;
// we need to check if index has exceed the length or not
if (index < trimmedUriSpecLen)
if (!processPath(trimmedUriSpec + index, trimmedUriSpecLen - index, foundScheme, bAllowSpaces))
return false;
return true;
bool XMLUri::isWellFormedAddress(const XMLCh* const addrString,
const XMLSize_t addrStrLen)
// Check that we have a non-zero length string.
if (addrStrLen == 0)
return false;
// Check if the host is a valid IPv6reference.
if (*addrString == chOpenSquare)
return isWellFormedIPv6Reference(addrString, addrStrLen);
// Cannot start with a '.', '-', or end with a '-'.
if (*addrString == chPeriod ||
*addrString == chDash ||
addrString[addrStrLen-1] == chDash)
return false;
// rightmost domain label starting with digit indicates IP address
// since top level domain label can only start with an alpha
// see RFC 2396 Section 3.2.2
int lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, addrStrLen);
// if the string ends with "."
// get the second last "."
if (XMLSize_t(lastPeriodPos + 1) == addrStrLen)
lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, lastPeriodPos);
if ( XMLString::isDigit(addrString[lastPeriodPos + 1]))
return false;
if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
return isWellFormedIPv4Address(addrString, addrStrLen);
} // end of IPv4address
// hostname = *( domainlabel "." ) toplabel [ "." ]
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
// RFC 2396 states that hostnames take the form described in
// RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
// to RFC 1034, hostnames are limited to 255 characters.
if (addrStrLen > 255) {
return false;
unsigned int labelCharCount = 0;
// domain labels can contain alphanumerics and '-"
// but must start and end with an alphanumeric
for (XMLSize_t i = 0; i < addrStrLen; i++)
if (addrString[i] == chPeriod)
if (((i > 0) &&
(!XMLString::isAlphaNum(addrString[i-1]))) ||
((i + 1 < addrStrLen) &&
(!XMLString::isAlphaNum(addrString[i+1]))) )
return false;
labelCharCount = 0;
else if (!XMLString::isAlphaNum(addrString[i]) &&
addrString[i] != chDash)
return false;
// RFC 1034: Labels must be 63 characters or less.
else if (++labelCharCount > 63) {
return false;
} //for
return true;
bool XMLUri::processScheme(const XMLCh* const schemeStr, XMLSize_t& index)
const XMLCh* tmpPtr = XMLString::findAny(schemeStr, SCHEME_SEPARATORS);
if (tmpPtr) {
index = tmpPtr - schemeStr;
return isConformantSchemeName(schemeStr, index);
else {
return false;
bool XMLUri::isConformantSchemeName( const XMLCh* const scheme
, const XMLSize_t schemeLen)
if (!XMLString::isAlpha(*scheme)) // first: alpha
return false;
// second onwards: ( alpha | digit | "+" | "-" | "." )
for (XMLSize_t i=1; i<schemeLen; i++)
if ( !XMLString::isAlphaNum(scheme[i]) &&
(XMLString::indexOf(SCHEME_CHARACTERS, scheme[i]) == -1))
return false;
return true;
bool XMLUri::processAuthority( const XMLCh* const authSpec
, const XMLSize_t authLen)
int index = XMLString::indexOf(authSpec, chAt);
XMLSize_t start = 0;
// server = [ [ userinfo "@" ] hostport ]
// userinfo is everything up @,
const XMLCh* userinfo;
int userInfoLen = 0;
if ((index != -1) && (XMLSize_t(index) < authLen))
userinfo = authSpec;
userInfoLen = index;
start = index + 1;
userinfo = XMLUni::fgZeroLenString;
// hostport = host [ ":" port ]
// host is everything up to ':', or up to
// and including ']' if followed by ':'.
// Search for port boundary.
const XMLCh* host;
XMLSize_t hostLen = 0;
if ((start < authLen) && (authSpec[start] == chOpenSquare))
index = XMLString::indexOf(&(authSpec[start]), chCloseSquare);
if ((index != -1) && (XMLSize_t(index) < authLen))
// skip the ']'
index = ((start + index + 1) < authLen
&& authSpec[start + index + 1] == chColon) ? index+1 : -1;
index = XMLString::indexOf(&(authSpec[start]), chColon);
if (index!=-1 && XMLSize_t(index) >= authLen)
index = -1;
host = &(authSpec[start]);
if (index != -1)
hostLen = index;
start += index + 1; // skip the :
hostLen = authLen - start;
start = authLen;
// port is everything after ":"
int port = -1;
if ((hostLen) && // non empty host
(index != -1) && // ":" found
(start < authLen) ) // ":" is not the last
const XMLCh* portStr = &(authSpec[start]);
if (*portStr)
port = 0;
for (XMLSize_t i=0; i<(authLen - start); i++)
if (portStr[i] < chDigit_0 || portStr[i] > chDigit_9)
// Assume this is a registry-based authority.
port = -1;
hostLen = 0;
host = XMLUni::fgZeroLenString;
userInfoLen = 0;
userinfo = XMLUni::fgZeroLenString;
port = (port * 10) + (int) (portStr[i] - chDigit_0);
return isValidServerBasedAuthority(host, hostLen, port, userinfo, userInfoLen)
|| isValidRegistryBasedAuthority(authSpec, authLen);
bool XMLUri::processPath(const XMLCh* const pathStr,
const XMLSize_t pathStrLen,
const bool isSchemePresent,
const bool bAllowSpaces/*=false*/)
if (pathStrLen != 0)
XMLSize_t index = 0;
XMLCh testChar = chNull;
bool isOpaque = (!isSchemePresent || *pathStr == chForwardSlash);
// path - everything up to query string or fragment
// RFC 2732 only allows '[' and ']' to appear in the opaque part.
while (index < pathStrLen)
testChar = pathStr[index];
if (testChar == chQuestion || testChar == chPound)
if (testChar == chPercent)
if (index+2 >= pathStrLen ||
!XMLString::isHex(pathStr[index+1]) ||
return false;
else if (testChar==chSpace)
return false;
else if (!isUnreservedCharacter(testChar) &&
((isOpaque && !isPathCharacter(testChar)) ||
(!isOpaque && !isReservedCharacter(testChar))))
return false;
// query - starts with ? and up to fragment or end
// fragment - starts with #
bool isQuery = (testChar == chQuestion);
if (isQuery || testChar == chPound)
while (index < pathStrLen)
testChar = pathStr[index];
if (testChar == chPound && isQuery) {
isQuery = false;
if (testChar == chPercent)
if (index+2 >= pathStrLen ||
!XMLString::isHex(pathStr[index+1]) ||
return false;
else if (testChar==chSpace)
return false;
else if (!isReservedOrUnreservedCharacter(testChar))
return false;
} //if (pathStrLen...)
return true;
* [Bug7698]: filenames with embedded spaces in schemaLocation strings not handled properly
* This method is called when Scanner/TraverseSchema knows that the URI reference is
* for local file.
void XMLUri::normalizeURI(const XMLCh* const systemURI,
XMLBuffer& normalizedURI)
const XMLCh* pszSrc = systemURI;
while (*pszSrc) {
if ((*(pszSrc) == chPercent)
&& (*(pszSrc+1) == chDigit_2)
&& (*(pszSrc+2) == chDigit_0))
pszSrc += 3;
* Support for Serialization/De-serialization
void XMLUri::serialize(XSerializeEngine& serEng)
if (serEng.isStoring())
XMLUri::XMLUri(MemoryManager* const manager)
: fPort(-1)
, fScheme(0)
, fUserInfo(0)
, fHost(0)
, fRegAuth(0)
, fPath(0)
, fQueryString(0)
, fFragment(0)
, fURIText(0)
, fMemoryManager(manager)