blob: b75f57f6c4d232841fd3b2855b27c7394d85a6d3 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 1999-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id$
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/internal/ReaderMgr.hpp>
#include <xercesc/internal/XMLScanner.hpp>
#include <xercesc/validators/DTD/DTDValidator.hpp>
// ---------------------------------------------------------------------------
// DTDValidator: Constructors and Destructor
// ---------------------------------------------------------------------------
DTDValidator::DTDValidator(XMLErrorReporter* const errReporter) :
XMLValidator(errReporter)
, fDTDGrammar(0)
{
reset();
}
DTDValidator::~DTDValidator()
{
}
// ---------------------------------------------------------------------------
// DTDValidator: Implementation of the XMLValidator interface
// ---------------------------------------------------------------------------
int DTDValidator::checkContent(XMLElementDecl* const elemDecl
, QName** const children
, const unsigned int childCount)
{
//
// Look up the element id in our element decl pool. This will get us
// the element decl in our own way of looking at them.
//
if (!elemDecl)
ThrowXML(RuntimeException, XMLExcepts::Val_InvalidElemId);
//
// Get the content spec type of this element. This will tell us what
// to do to validate it.
//
const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType();
if (modelType == DTDElementDecl::Empty)
{
//
// We can do this one here. It cannot have any children. If it does
// we return 0 as the index of the first bad child.
//
if (childCount)
return 0;
}
else if (modelType == DTDElementDecl::Any)
{
// We pass no judgement on this one, anything goes
}
else if ((modelType == DTDElementDecl::Mixed_Simple)
|| (modelType == DTDElementDecl::Children))
{
// Get the element's content model or fault it in
const XMLContentModel* elemCM = elemDecl->getContentModel();
// Ask it to validate and return its return
return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId());
}
else
{
ThrowXML(RuntimeException, XMLExcepts::CM_UnknownCMType);
}
// Went ok, so return success
return -1;
}
void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const
{
//
// At this level, we cannot set the URI id. So we just set it to zero
// and leave it at that. The scanner, who called us, will look at the
// prefix we stored (if any), resolve it, and store the URL id if any.
//
const XMLCh* fullName = attDef.getFullName();
const int colonInd = XMLString::indexOf(fullName, chColon);
if (colonInd == -1)
{
// There is no prefix, so we just do a simple and quick setting
toFill.set
(
0
, fullName
, XMLUni::fgZeroLenString
, attDef.getValue()
, attDef.getType()
);
}
else
{
//
// There is a colon, so we have to split apart the name and prefix
// part.
//
XMLCh* tmpNameBuf = XMLString::replicate(fullName);
ArrayJanitor<XMLCh> janNameBuf(tmpNameBuf);
// Put a null where the colon is, to split it into two strings
tmpNameBuf[colonInd] = chNull;
//
// And now we can set the attribute object with the prefix and name
// parts.
//
toFill.set
(
0
, &tmpNameBuf[colonInd+1]
, tmpNameBuf
, attDef.getValue()
, attDef.getType()
);
}
}
void DTDValidator::reset()
{
}
bool DTDValidator::requiresNamespaces() const
{
// Namespaces are not supported for DTDs
return false;
}
void
DTDValidator::validateAttrValue(const XMLAttDef* attDef
, const XMLCh* const attrValue)
{
//
// Get quick refs to lost of of the stuff in the passed objects in
// order to simplify the code below, which will reference them very
// often.
//
const XMLAttDef::AttTypes type = attDef->getType();
const XMLAttDef::DefAttTypes defType = attDef->getDefaultType();
const XMLCh* const valueText = attDef->getValue();
const XMLCh* const fullName = attDef->getFullName();
const XMLCh* const enumList = attDef->getEnumeration();
//
// If the default type is fixed, then make sure the passed value maps
// to the fixed value.
//
if (defType == XMLAttDef::Fixed)
{
if (XMLString::compareString(attrValue, valueText))
emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText);
}
//
// If its a CDATA attribute, then we are done with any DTD level
// validation else do the rest.
//
if (type == XMLAttDef::CData)
return;
// An empty string cannot be valid for any of the other types
if (!attrValue[0])
{
emitError(XMLValid::InvalidEmptyAttValue, fullName);
return;
}
// See whether we are doing multiple values or not
const bool multipleValues =
(
(type == XMLAttDef::IDRefs)
|| (type == XMLAttDef::Entities)
|| (type == XMLAttDef::NmTokens)
|| (type == XMLAttDef::Notation)
|| (type == XMLAttDef::Enumeration)
);
// And whether we must check for a first name char
const bool firstNameChar =
(
(type == XMLAttDef::ID)
|| (type == XMLAttDef::IDRef)
|| (type == XMLAttDef::IDRefs)
|| (type == XMLAttDef::Entity)
|| (type == XMLAttDef::Entities)
|| (type == XMLAttDef::Notation)
);
// Whether it requires ref checking stuff
const bool isARefType
(
(type == XMLAttDef::ID)
|| (type == XMLAttDef::IDRef)
|| (type == XMLAttDef::IDRefs)
);
// Some trigger flags to avoid issuing redundant errors and whatnot
bool sawOneValue;
bool alreadyCapped = false;
//
// Make a copy of the text that we can mangle and get a pointer we can
// move through the value
//
// Use a stack-based buffer, when possible...
XMLCh tempBuffer[100];
XMLCh* pszTmpVal = 0;
ArrayJanitor<XMLCh> janTmpVal(0);
if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
{
XMLString::copyString(tempBuffer, attrValue);
pszTmpVal = tempBuffer;
}
else
{
janTmpVal.reset(XMLString::replicate(attrValue));
pszTmpVal = janTmpVal.get();
}
XMLCh* valPtr = pszTmpVal;
while (true)
{
// Reset the trigger flags
sawOneValue = false;
//
// Make sure the first character is a valid first name char, i.e.
// if its a Name value. For NmToken values we don't treat the first
// char any differently.
//
if (firstNameChar)
{
// If its not, emit and error but try to keep going
if (!XMLReader::isFirstNameChar(*valPtr))
emitError(XMLValid::AttrValNotName, fullName);
valPtr++;
}
// Make sure all the remaining chars are valid name chars
while (*valPtr)
{
//
// If we hit a whitespace, its either a break between two
// or more values, or an error if we have a single value.
//
if (XMLReader::isWhitespace(*valPtr))
{
if (!multipleValues)
{
emitError(XMLValid::NoMultipleValues, fullName);
return;
}
break;
}
if (!XMLReader::isNameChar(*valPtr))
{
emitError(XMLValid::AttrValNotName, fullName);
return;
}
valPtr++;
}
//
// Cap it off at the current non-name char. If already capped,
// then remember this.
//
if (!(*valPtr))
alreadyCapped = true;
*valPtr = 0;
//
// If this type of attribute requires that we track reference
// stuff, then handle that.
//
if (isARefType)
{
if ((type == XMLAttDef::ID)
|| (type == XMLAttDef::IDRef)
|| (type == XMLAttDef::IDRefs))
{
XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal);
if (find)
{
if (find->getDeclared() && (type == XMLAttDef::ID))
emitError(XMLValid::ReusedIDValue, pszTmpVal);
}
else
{
find = new XMLRefInfo(pszTmpVal);
getScanner()->getIDRefList()->put((void*)find->getRefName(), find);
}
//
// Mark it declared or used, which might be redundant in some cases
// but not worth checking
//
if (type == XMLAttDef::ID)
find->setDeclared(true);
else
find->setUsed(true);
}
}
else if ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities))
{
//
// If its refering to a entity, then look up the name in the
// general entity pool. If not there, then its an error. If its
// not an external unparsed entity, then its an error.
//
const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal);
if (decl)
{
if (!decl->isUnparsed())
emitError(XMLValid::BadEntityRefAttr, fullName);
}
else
{
emitError
(
XMLValid::UnknownEntityRefAttr
, fullName
, pszTmpVal
);
}
}
else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration))
{
//
// Make sure that this value maps to one of the enumeration or
// notation values in the enumList parameter. We don't have to
// look it up in the notation pool (if a notation) because we
// will look up the enumerated values themselves. If they are in
// the notation pool (after the DTD is parsed), then obviously
// this value will be legal since it matches one of them.
//
if (!XMLString::isInList(pszTmpVal, enumList))
emitError(XMLValid::DoesNotMatchEnumList, fullName);
}
// If not doing multiple values, then we are done
if (!multipleValues)
break;
//
// If we are at the end, then break out now, else move up to the
// next char and update the base pointer.
//
if (alreadyCapped)
break;
valPtr++;
pszTmpVal = valPtr;
}
}
void DTDValidator::preContentValidation(bool reuseGrammar,
bool validateDefAttr)
{
//
// Lets enumerate all of the elements in the element decl pool
// and put out an error for any that did not get declared.
// We also check all of the attributes as well.
//
NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator();
while (elemEnum.hasMoreElements())
{
const DTDElementDecl& curElem = elemEnum.nextElement();
const DTDElementDecl::CreateReasons reason = curElem.getCreateReason();
//
// See if this element decl was ever marked as declared. If
// not, then put out an error. In some cases its just
// a warning, such as being referenced in a content model.
//
if (reason != XMLElementDecl::Declared)
{
if (reason == XMLElementDecl::AttList)
{
getScanner()->emitError
(
XMLErrs::UndeclaredElemInAttList
, curElem.getFullName()
);
}
else if (reason == XMLElementDecl::AsRootElem)
{
// It's ok that the root element is not declared in the DTD
/*
emitError
(
XMLValid::UndeclaredElemInDocType
, curElem.getFullName()
);*/
}
else if (reason == XMLElementDecl::InContentModel)
{
getScanner()->emitError
(
XMLErrs::UndeclaredElemInCM
, curElem.getFullName()
);
}
else
{
#if defined(XERCES_DEBUG)
if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){
}
else
ThrowXML(RuntimeException, XMLExcepts::DTD_UnknownCreateReason);
#endif
}
}
//
// Check all of the attributes of the current element.
// We check for:
//
// 1) Multiple ID attributes
// 2) That all of the default values of attributes are
// valid for their type.
// 3) That for any notation types, that their lists
// of possible values refer to declared notations.
//
XMLAttDefList& attDefList = curElem.getAttDefList();
bool seenId = false;
while (attDefList.hasMoreElements())
{
const XMLAttDef& curAttDef = attDefList.nextElement();
if (curAttDef.getType() == XMLAttDef::ID)
{
if (seenId)
{
emitError
(
XMLValid::MultipleIdAttrs
, curElem.getFullName()
);
break;
}
seenId = true;
}
else if (curAttDef.getType() == XMLAttDef::Notation)
{
//
// We need to verify that all of its possible values
// (in the enum list) refer to valid notations.
//
XMLCh* list = XMLString::replicate(curAttDef.getEnumeration());
ArrayJanitor<XMLCh> janList(list);
//
// Search forward for a space or a null. If a null,
// we are done. If a space, cap it and look it up.
//
bool breakFlag = false;
XMLCh* listPtr = list;
XMLCh* lastPtr = listPtr;
while (true)
{
while (*listPtr && (*listPtr != chSpace))
listPtr++;
//
// If at the end, indicate we need to break after
// this one. Else, cap it off here.
//
if (!*listPtr)
breakFlag = true;
else
*listPtr = chNull;
if (!fDTDGrammar->getNotationDecl(lastPtr))
{
emitError
(
XMLValid::UnknownNotRefAttr
, curAttDef.getFullName()
, lastPtr
);
}
// Break out if we hit the end last time
if (breakFlag)
break;
// Else move upwards and try again
listPtr++;
lastPtr = listPtr;
}
}
// If it has a default/fixed value, then validate it
if (validateDefAttr && curAttDef.getValue())
{
validateAttrValue
(
&curAttDef
, curAttDef.getValue()
);
}
}
}
//
// And enumerate all of the general entities. If any of them
// reference a notation, then make sure the notation exists.
//
NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator();
while (entEnum.hasMoreElements())
{
const DTDEntityDecl& curEntity = entEnum.nextElement();
if (!curEntity.getNotationName())
continue;
// It has a notation name, so look it up
if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName()))
{
emitError
(
XMLValid::NotationNotDeclared
, curEntity.getNotationName()
);
}
}
}
void DTDValidator::postParseValidation()
{
//
// At this time, there is nothing to do here. The scanner itself handles
// ID/IDREF validation, since that is the same no matter what kind of
// validator.
//
}