blob: c661fbe8da542dc87567bfb1fb720f74610d31c1 [file] [log] [blame]
/* Copyright (c) 2012-2015 Tresys Technology, LLC. All rights reserved.
*
* Developed by: Tresys Technology, LLC
* http://www.tresys.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal with
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of Tresys Technology, nor the names of its contributors
* may be used to endorse or promote products derived from this Software
* without specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
* SOFTWARE.
*/
package edu.illinois.ncsa.daffodil.xml
import java.net.URI
import java.net.URISyntaxException
import scala.language.reflectiveCalls
import scala.util.Try
import edu.illinois.ncsa.daffodil.api.DaffodilTunables
import edu.illinois.ncsa.daffodil.api.UnqualifiedPathStepPolicy
import edu.illinois.ncsa.daffodil.equality.TypeEqual
import edu.illinois.ncsa.daffodil.exceptions.Assert
/**
* Please centralize QName handling here.
*
* This can be XSD-specific, but should be generic capabilities.
*
* There are two concrete classes of QNames for named things: Global and Local
*
* There are two concrete classes of referencing QNames, for references to
* global objects (as in ref="tns:bar" and type="xs:int") and for references
* which can be to local objects as well as global, such as in DPath step expressions like
* ../foo:bar/baz.
*
* There is a precedent for how xpath expressions work in XSD which is the xpath
* expressions that are used in the xs:selector and xs:field sub-elements of the
* xs:key and xs:unique constraints.
*
* Based on this, there are two kinds of named things: globally named things, and
* locally. The only locally named things are local element declarations. Global things
* include elements, types, groups, etc.
*
* These distinctions are necessary because of XSD's elementFormDefault concept.
* This makes matching against a local name either equivalent to matching against
* a global name (elementFormDefault="qualified"), or local name matching is
* namespace independent (elementFormDefault="unqualified"), which is the default
* behavior.
*
* The QName objects are always constructed with a prefix (if present), local
* name part, and namespace.
*
* Consider this case:
*
* <xs:element ref="bar" xmlns="someURN".../>
*
* In that case, the name "bar" is resolved in the default namespace to be
* {someURN}bar.
*
* However, in the absence of a default namespace binding, the name "bar" will
* resolve to {No_Namespace}bar, and that will only have a counterpart if
* there is a schema with no target namespace which defines bar as a global def
* of some sort.
*
* Now consider this case:
*
* <xs:schema targetNamespace="someURN">
* ...
* <xs:element name="foo" form="qualified">
* <xs:complexType>
* <xs:sequence>
* <xs:element name="bar" form="qualfied"..../>
*
* Now, in some other place we have a DPath such as
*
* <.... xmlns="someURN"
* <xs:selector xpath="../foo/bar"/>
*
* This does not match because despite the xmlns defining a default namespace,
* that is not used when considering path steps. Both foo and bar are qualified
* which means path steps referencing them MUST have prefixes on them.
* (This is the behavior of Xerces as of 2014-09-29)
*
* If there is no default namespace at the point of reference, then
* this will only match if
* (1) There is no targetNamespace surrounding the decl of element bar. (Which ought
* to draw a warning given that there is no namespace but there is an explicit request
* for qualified names.)
* (2) The schema's element form default is 'unqualified' (which is the
* default). In this case the local name is the only thing that has to match.
*
* This latter case is represented by a StepRef with NoNamespace for the namespace,
* matching against a LocalDeclQName, which will have NoNamespace by way of
* the elementFormDefault being unqualified.
*
*/
/*
* Use this factory to create the right kinds of QNames.
*/
object QName {
def resolveRef(qnameString: String, scope: scala.xml.NamespaceBinding, tunable: DaffodilTunables): Try[RefQName] =
RefQNameFactory.resolveRef(qnameString, scope, tunable)
/**
* Specialized getQName function for handling
* manually specified variables via the CLI.
*
* Variables will be of the format:
*
* 1. {nsURI}varName=value
* 2. {}varName=value // explicitly means NoNamespace
* 3. varName=value // unspecified namespace, i.e., might default
*/
def refQNameFromExtendedSyntax(extSyntax: String): Try[RefQName] = Try {
val res =
try {
extSyntax match {
case QNameRegex.ExtQName(prefix, uriString, local) => {
val pre = if (prefix eq null) None else Some(prefix)
val ns = (pre, uriString) match {
case (Some(pre), "") => throw new ExtendedQNameSyntaxException(Some(extSyntax), None)
case (Some(pre), null) => UnspecifiedNamespace
case (Some(pre), s) => NS(s)
case (None, "") => NoNamespace
case (None, null) => UnspecifiedNamespace
case (None, s) => NS(s)
}
val rqn = RefQName(pre, local, ns)
rqn
}
case _ => throw new ExtendedQNameSyntaxException(Some(extSyntax), None)
}
} catch {
case ex: URISyntaxException => throw new ExtendedQNameSyntaxException(Some(extSyntax), Some(ex))
case ia: IllegalArgumentException => {
val ex = ia.getCause()
if (ex ne null)
throw new ExtendedQNameSyntaxException(Some(extSyntax), Some(ex))
else
throw ia
}
}
res
}
def resolveStep(qnameString: String, scope: scala.xml.NamespaceBinding, tunable: DaffodilTunables): Try[StepQName] =
StepQNameFactory.resolveRef(qnameString, scope, tunable)
def createLocal(name: String, targetNamespace: NS, isQualified: Boolean,
scope: scala.xml.NamespaceBinding) = {
val ns = if (isQualified) targetNamespace else NoNamespace
// TODO: where we parse xmlSchemaDocument, a check for
// xs:schema with no target namespace, but elementFormDefault 'qualified'
// should emit a warning. It is not ALWAYS incorrect, as a schema
// designed for inclusion into another schema (via xs:include)
// can take a position on how its local element names are to
// be qualified without having a target namespace itself.
//
// But,... it is very likely to be an error.
//
LocalDeclQName(None, name, ns)
}
def createGlobal(name: String, targetNamespace: NS, scope: scala.xml.NamespaceBinding) = {
val optPrefix =
if (scope eq null) None
else {
val prefixes = NS.allPrefixes(targetNamespace, scope)
prefixes.length match {
case 0 => None
case 1 => Option(prefixes.head)
case _ => {
//
// suppose we have xmlns="..." xmlns:ex="..." xmlns:tns="..."
//
// We want to prefer ex as the prefix in this case.
// So we take the shortest prefix that isn't empty string.
//
val notNullPrefixes = prefixes.filter(_ ne null) // remove null prefix. There must be a non-empty one.
notNullPrefixes.foreach { p => Assert.invariant(p.length > 0) }
val pairs = notNullPrefixes.map { _.length } zip notNullPrefixes
val first = pairs.head // there has to be 1 at least. But there might be only 1
val minLengthPair = pairs.foldLeft(first) { case (x @ (xlen, _), y @ (ylen, _)) => if (xlen <= ylen) x else y }
val (n, shortest) = minLengthPair
Assert.invariant(n > 0)
Some(shortest)
}
}
}
GlobalQName(optPrefix, name, targetNamespace)
}
}
protected sealed abstract class QNameSyntaxExceptionBase(kind: String, offendingSyntax: Option[String], cause: Option[Throwable])
extends Exception(offendingSyntax.getOrElse(null), cause.getOrElse(null)) {
override def getMessage = {
val intro = "Invalid syntax for " + kind + " "
val details = (offendingSyntax, cause) match {
case (Some(syntax), Some(cause)) => "'%s'. Caused by: '%s'".format(syntax, cause)
case (None, Some(cause)) => "'%s'".format(cause.getMessage())
case (Some(syntax), None) => "'%s'.".format(syntax)
case _ => Assert.usageError("supply either offendingSyntax, or cause or both")
}
intro + details
}
}
class ExtendedQNameSyntaxException(offendingSyntax: Option[String], cause: Option[Throwable])
extends QNameSyntaxExceptionBase("extended QName", offendingSyntax, cause)
class QNameSyntaxException(offendingSyntax: Option[String], cause: Option[Throwable])
extends QNameSyntaxExceptionBase("QName", offendingSyntax, cause)
class QNameUndefinedPrefixException(pre: String)
extends Exception("Undefined QName prefix '%s'".format(pre))
trait QNameBase {
/**
* The prefix is not generally involved in matching, but they
* must show up in diagnostic messages. Mistakes by having the wrong prefix
* or by omitting one, are very common.
*/
def prefix: Option[String]
def local: String
def namespace: NS // No namespace is represented by the NoNamespace object.
override def toString = toPrettyString
/**
* For purposes of hashCode and equals, we disregard the prefix
*/
override lazy val hashCode = namespace.hashCode + local.hashCode
override def equals(other: Any) = {
val res = other match {
case qn: QNameBase => (local =:= qn.local && namespace =:= qn.namespace)
case _ => false
}
res
}
/**
* Provides name with namespace information. Uses prefix if that is
* appropriate, otherwise puts out the namespace in braces. Empty braces are
* the no-namespace indicator.
*
* Incorrectly defined names are not tolerated.
*/
def toPrettyString: String = {
(prefix, local, namespace) match {
case (Some(pre), local, NoNamespace) => Assert.invariantFailed("QName has prefix, but NoNamespace")
case (Some(pre), local, UnspecifiedNamespace) => pre + ":" + local
case (None, local, NoNamespace) => "{}" + local
case (None, local, UnspecifiedNamespace) => local
case (None, local, ns) => "{" + ns + "}" + local
case (Some(pre), local, ns) => pre + ":" + local
}
}
/**
* displays all components that are available.
*/
def toExtendedSyntax: String = {
(prefix, local, namespace) match {
case (Some(pre), local, NoNamespace) => Assert.invariantFailed("QName has prefix, but NoNamespace")
case (Some(pre), local, UnspecifiedNamespace) => Assert.invariantFailed("QName has prefix, but unspecified namespace")
case (None, local, NoNamespace) => "{}" + local
case (None, local, UnspecifiedNamespace) => local
case (None, local, ns) => "{" + ns + "}" + local
//
// This is a hack to avoid printing out tns prefixes just becasue a
// schema author has chosen to use the tns prefix for the same uri as
// the target namespace. Really we only want to print out a prefix
// if it's a meaningful prefix that will distinguish something.
//
case (Some("tns"), local, ns) => "{" + ns + "}" + local // ignore the "tns" prefix.
case (Some(pre), local, ns) => pre + ":{" + ns + "}" + local
}
}
/**
* Never displays the namespace. Never complains about inconsistencies.
* Provides back what the schema author ought to think of as the name
* of the thing.
*/
def diagnosticDebugName: String = {
(prefix, local, namespace) match {
case (Some(pre), local, NoNamespace) => pre + ":" + local // generally this is an error. Shouldn't have a prefix.
case (Some(pre), local, UnspecifiedNamespace) => pre + ":" + local
case (None, local, NoNamespace) => local
case (None, local, UnspecifiedNamespace) => local
case (None, local, ns) => local
//
// See comment above about tns hack
//
case (Some("tns"), local, ns) => local
case (Some(pre), local, ns) => pre + ":" + local
}
}
/**
* Creates a string suitable for use in an XML attribute as in 'dfdl:terminator="..."' or
* 'xsi:nil="true"'
*/
def toAttributeNameString: String = {
(prefix, local, namespace) match {
case (None, local, NoNamespace) => local
case (None, local, ns) => Assert.invariantFailed("QName has namespace, but no prefix defined.")
case _ => toPrettyString
}
}
/**
* Just turns into a prefix (optionally) then the local name e.g, foo:bar
* or if there is no prefix, just bar.
*/
def toQNameString: String = {
if (prefix.isDefined) prefix.get + ":" + local else local
}
def matches[Q <: QNameBase](other: Q): Boolean
}
/**
* common base trait for named things, both global and local
*/
sealed trait NamedQName
extends QNameBase {
if (prefix.isDefined) {
Assert.usage(!namespace.isNoNamespace)
Assert.usage(!namespace.isUnspecified)
}
}
/**
* QName for a local declaration.
*/
final case class LocalDeclQName(prefix: Option[String], local: String, namespace: NS)
extends NamedQName {
override def matches[Q <: QNameBase](other: Q): Boolean = {
other match {
case StepQName(_, `local`, `namespace`) => {
// exact match
//
// This means either this was unqualified so the namespace
// arg is NoNamespace, and the local matched that, or...
// it means this was qualified but the other matched that
// (which can happen even without a prefix if there is a default
// namespace i.e., xmlns="...")
true
}
case StepQName(_, `local`, NoNamespace) => {
//
// This case matches if the other has no NS qualifier
// but in this case, this local decl name DOES have
// some other URI as its namespace, so there is
// explicitly no match here.
//
false
}
case StepQName(_, `local`, _) => false // NS didn't match, even if local did we don't care.
case StepQName(_, notLocal, _) => false
case _ => Assert.usageError("other must be a StepQName")
}
}
}
/**
* QName for a global declaration or definition (of element, type, group, format, etc.)
*/
final case class GlobalQName(prefix: Option[String], local: String, namespace: NS)
extends NamedQName {
override def matches[Q <: QNameBase](other: Q): Boolean = {
other match {
// StepQNames match against global names in the case of a path
// step that refers to an element that is defined in its
// group, via an element reference.
case StepQName(_, `local`, `namespace`) => true // exact match
case StepQName(_, _, _) => false
// RefQNames match against global names in element references,
// group references, type references (i.e., type="..."), etc.
case RefQName(_, `local`, `namespace`) => true // exact match
case RefQName(_, _, _) => false
case _ => Assert.usageError("other must be a StepQName or RefQName")
}
}
}
/**
* base trait for Qnames that reference other things
*/
protected sealed trait RefQNameBase extends QNameBase
/**
* A qname as found in a ref="foo:bar" attribute, or a type="foo:barType" attribute.
* Or a variable reference e.g., \$foo:barVar in an expression.
*
* These are references to globally defined things.
*/
final case class RefQName(prefix: Option[String], local: String, namespace: NS)
extends RefQNameBase {
override def matches[Q <: QNameBase](other: Q): Boolean = {
other match {
case named: GlobalQName => named.matches(this)
case _ => Assert.usageError("other must be a GlobalQName")
}
}
def toStepQName = StepQName(prefix, local, namespace)
def toGlobalQName = GlobalQName(prefix, local, namespace)
}
/**
* A qname as found in a path step as in ../foo:bar/baz/quux
*
* Differs from RefQName in that it has to match up to LocalDeclQNames
* properly.
*/
final case class StepQName(prefix: Option[String], local: String, namespace: NS)
extends RefQNameBase {
override def matches[Q <: QNameBase](other: Q): Boolean = {
other match {
case named: NamedQName => named.matches(this) // let the named things decide how other things match them.
case _ => Assert.usageError("other must be a NamedQName")
}
}
/**
* Finds the matches in a list of things that have QNames.
* Used for finding if a named path step has corresponding element declaration.
*
* Handles local or global matches
*
*/
def findMatches[T <: { def namedQName: NamedQName }](candidates: Seq[T]): Seq[T] = {
val matched = candidates.filter { x =>
val other = x.namedQName
val res = matches(other)
res
}
matched
}
}
protected trait RefQNameFactoryBase[T] {
protected def resolveDefaultNamespace(scope: scala.xml.NamespaceBinding, tunable: DaffodilTunables): Option[String]
protected def constructor(prefix: Option[String], local: String, namespace: NS): T
def resolveRef(qnameString: String, scope: scala.xml.NamespaceBinding, tunable: DaffodilTunables): Try[T] = Try {
qnameString match {
case QNameRegex.QName(pre, local) => {
val prefix = Option(pre)
// note that the prefix, if defined, can never be ""
val optURI = prefix match {
case None => resolveDefaultNamespace(scope, tunable)
case Some(pre) => Option(scope.getURI(pre))
}
val ns = (prefix, optURI) match {
case (None, None) => NoNamespace
case (Some(pre), None) => throw new QNameUndefinedPrefixException(pre)
case (_, Some(ns)) => NS(ns)
}
val res = constructor(prefix, local, ns)
res
}
case _ => throw new QNameSyntaxException(Some(qnameString), None)
}
}
}
object RefQNameFactory extends RefQNameFactoryBase[RefQName] {
override def constructor(prefix: Option[String], local: String, namespace: NS) =
RefQName(prefix, local, namespace)
override def resolveDefaultNamespace(scope: scala.xml.NamespaceBinding, tunable: DaffodilTunables) =
Option(scope.getURI(null)) // could be a default namespace
}
object StepQNameFactory extends RefQNameFactoryBase[StepQName] {
override def constructor(prefix: Option[String], local: String, namespace: NS) =
StepQName(prefix, local, namespace)
/* This is what needs Tunables and propagates into Expression */
override def resolveDefaultNamespace(scope: scala.xml.NamespaceBinding, tunable: DaffodilTunables) = {
tunable.unqualifiedPathStepPolicy match {
case UnqualifiedPathStepPolicy.NoNamespace => None // don't consider default namespace
case UnqualifiedPathStepPolicy.DefaultNamespace => Option(scope.getURI(null)) // could be a default namespace
case UnqualifiedPathStepPolicy.PreferDefaultNamespace => Option(scope.getURI(null)) // could be a default namespace
}
}
}
object QNameRegex {
private val xC0_D6 = ("""\x{C0}-\x{D6}""")
private val xD8_F6 = """\x{D8}-\x{F6}"""
private val xF8_2FF = """\x{F8}-\x{2FF}"""
// private val x370_37D = """\x{370}-\x{37D}"""
private val x37F_1FFF = """\x{37F}-\x{1FFF}"""
private val x200C_200D = """\x{200c}|\x{200d}"""
private val x2070_218F = """\x{2070}-\x{218F}"""
private val x2C00_2FEF = """\x{2C00}-\x{2FEF}"""
private val x3001_D7FF = """\x{3001}-\x{D7FF}"""
private val xF900_FDCF = """\x{F900}-\x{FDCF}"""
private val xFDF0_FFFD = """\x{FDF0}-\x{FFFD}"""
private val x10000_EFFFF = """\x{10000}-\x{EFFFF}"""
private val range0_9 = """0-9"""
// private val xB7 = """\xB7"""
// private val x0300_036F = """\x{0300}-\x{036F}"""
// private val x203F_2040 = """\x{203F}-\x{2040}"""
private val ncNameStartChar = "A-Z_a-z" + xC0_D6 + xD8_F6 + xF8_2FF +
// x370_37D + // TODO: why is this one is left out? Add comments please.
x37F_1FFF + x200C_200D +
x2070_218F + x2C00_2FEF + x3001_D7FF + xF900_FDCF + xFDF0_FFFD + x10000_EFFFF
private val ncNameChar = ncNameStartChar + "\\-" + "\\." + range0_9 // + "|" + xB7 + "|" + x0300_036F + "|" + x203F_2040
// TODO: why are the above left out? Add comments please.
private val NCNameRegexString = "([" + ncNameStartChar + "](?:[" + ncNameChar + "])*)"
private val QNameRegexString = "(?:" + NCNameRegexString + "\\:)?" + NCNameRegexString
lazy val NCName = NCNameRegexString.r
lazy val QName = QNameRegexString.r
//
// extended syntax now supports a prefix and a namespace
// E.g., ex:{http://example.com}foo
//
private val URIPartRegexString = """(?:\{(.*)\})"""
private val PrefixPartRegexString = """(?:""" + NCNameRegexString + """\:)"""
private val ExtQNameRegexString = PrefixPartRegexString + "?" + URIPartRegexString + "?" + NCNameRegexString
lazy val ExtQName = ExtQNameRegexString.r
def isURISyntax(s: String): Boolean = {
try {
new URI(s)
true
} catch {
case _: URISyntaxException => false
}
}
}