package edu.illinois.ncsa.daffodil.dsom
/* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
* Developed by: Tresys Technology, LLC
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal with
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of Tresys Technology, nor the names of its contributors
* may be used to endorse or promote products derived from this Software
* without specific prior written permission.
import scala.xml._
import scala.xml.parsing._
import edu.illinois.ncsa.daffodil.xml._
import edu.illinois.ncsa.daffodil.exceptions._
import edu.illinois.ncsa.daffodil.schema.annotation.props._
import edu.illinois.ncsa.daffodil.schema.annotation.props.gen._
import scala.collection.JavaConversions._
import edu.illinois.ncsa.daffodil.grammar._
import edu.illinois.ncsa.daffodil.dsom.oolag.OOLAG
import edu.illinois.ncsa.daffodil.dsom.oolag.OOLAG._
import edu.illinois.ncsa.daffodil.api._
import edu.illinois.ncsa.daffodil.processors.VariableMap
import edu.illinois.ncsa.daffodil.util.Compile
import edu.illinois.ncsa.daffodil.processors.charset.USASCII7BitPackedCharset
import edu.illinois.ncsa.daffodil.processors.charset.CharsetUtils
import edu.illinois.ncsa.daffodil.compiler.RootSpec
import edu.illinois.ncsa.daffodil.util._
import edu.illinois.ncsa.daffodil.dsom.IIUtils._
import IIUtils._
import edu.illinois.ncsa.daffodil.dsom.DiagnosticUtils._
import edu.illinois.ncsa.daffodil.ExecutionMode
import edu.illinois.ncsa.daffodil.compiler.DaffodilTunableParameters
import edu.illinois.ncsa.daffodil.externalvars.ExternalVariablesLoader
import edu.illinois.ncsa.daffodil.externalvars.Binding
* The core root class of the DFDL Schema object model.
* Every schema component has a schema document, and a schema, and a namespace.
abstract class SchemaComponent(xmlArg: Node, val parent: SchemaComponent)
extends SchemaComponentBase(xmlArg, parent)
with ImplementsThrowsSDE
with GetAttributesMixin
with SchemaComponentIncludesAndImportsMixin
with ResolvesQNames
with FindPropertyMixin
with LookupLocation
with PropTypes {
lazy val primitiveFactory: PrimitiveFactoryBase = schemaSet.primitiveFactory
lazy val prims = primitiveFactory
val context: SchemaComponent = parent
* Anything non-annotated always returns property not found
* Override in annotated components
def findPropertyOption(pname: String): PropertyLookupResult = {
NotFound(Nil, Nil)
// FIXME: not sure why non-annotated schema components need to have findProperty
// on them at all. Who would call it polymorphically, not knowing whether they
// have an annotated schema component or not?
lazy val schemaFile: Option[DFDLSchemaFile] = parent.schemaFile
lazy val schemaSet: SchemaSet = parent.schemaSet
lazy val schemaDocument: SchemaDocument = parent.schemaDocument
lazy val xmlSchemaDocument: XMLSchemaDocument = parent.xmlSchemaDocument
lazy val schema: Schema = parent.schema
override def schemaComponent: SchemaComponent = this
override lazy val fileName: String = parent.fileName
override lazy val isHidden: Boolean = isHidden_.value
private val isHidden_ = LV('isHidden) {
enclosingComponent match {
case None => Assert.invariantFailed("Root global element should be overriding this.")
case Some(ec) => ec.isHidden
override def enclosingComponent = super.enclosingComponent.asInstanceOf[Option[SchemaComponent]]
lazy val enclosingTerm: Option[Term] = {
enclosingComponent match {
case None => None
case Some(t: Term) => Some(t)
case _ => enclosingComponent.get.enclosingTerm
* path is used in diagnostic messages and code debug
* messages; hence, it is very important that it be
* very dependable.
override lazy val path = {
val p = { _.prettyName }.mkString("::")
override def toString = prettyName
* Includes instances. Ie., a global element will appear inside an element ref.
* a global group inside a group ref, a global type inside an element or for
* derived simple types inside another simple type, etc.
* Used in diagnostic messages and code debug messages
lazy val scPath: Seq[SchemaComponent] = {
val ec = enclosingComponent
val scpOpt = {
sc =>
val parentPath = sc.scPath
val res = scpOpt.getOrElse(Nil) :+ this
* the easiest way to get an empty metadata object.
private val scala.xml.Elem(_, _, emptyXMLMetadata, _, _*) = <foo/>
* Used as factory for the XML Node with the right namespace and prefix etc.
* Given "element" it creates <dfdl:element /> with the namespace definitions
* based on this schema component's corresponding XSD construct.
* Makes sure to inherit the scope so we have all the namespace bindings.
def newDFDLAnnotationXML(label: String) = {
// This is not a "wired" dfdl prefix.
// Rather, we are creating a new nested binding for the dfdl prefix to the right DFDL uri.
// Which applies to this element and what is inside it only.
// So we're indifferent to what the surrounding context might be using for namespace bindings.
val dfdlBinding = new scala.xml.NamespaceBinding("dfdl", XMLUtils.DFDL_NAMESPACE.toString, xml.scope)
scala.xml.Elem("dfdl", label, emptyXMLMetadata, dfdlBinding)
* Needed by back-end to evaluate expressions.
lazy val namespaces = {
val res = XMLUtils.namespaceBindings(xml.scope)
* Local components
trait LocalComponentMixin { self: SchemaComponent =>
// nothing currently - all components have a parent.
* Common Mixin for things that have a name attribute.
trait NamedMixin
extends GetAttributesMixin { self: SchemaComponentBase =>
override def prettyName = Misc.getNameFromClass(this) + "(" + name + ")"
lazy val name = nameFromNameAttribute
private lazy val nameFromNameAttribute = nameFromNameAttribute_.valueOrElse("??name??")
private val nameFromNameAttribute_ = LV('nameFromNameAttribute) { getAttributeRequired("name") }
def xml: Node
def schemaDocument: SchemaDocument
lazy val namespace = schemaDocument.targetNamespace // can be "" meaning no namespace
lazy val prefix = {
val prefix = xml.scope.getPrefix(namespace.toString) // can be null meaning no prefix
// cannot be ""
* All global components share these characteristics.
* The difference between this and the not-Global flavor
* has to do with the elementFormDefault attribute of the xs:schema
* element. Global things are always qualified
trait GlobalComponentMixin
extends NamedMixin { self: SchemaComponent =>
* polymorphic way to get back to what is referring to this global
def referringComponent: Option[SchemaComponent]
* Global components have a different way to find their enclosing component,
* which is to go back to their referring component (which will be None only for
* the root element.
override def enclosingComponent = {
Assert.invariant(context.isInstanceOf[SchemaDocument]) // global things have schema documents as their parents.
* elementFormDefault is an attribute of the xs:schema element.
* It defaults to 'qualified'. That means nested local element definitions,
* their names are in the target namespace. So, if you have
* @example {{{
* <schema elementFormDefault='qualified'
* targetNamespace="myURI" xmlns:tns="myURI"...>
* <element name='foo'...>
* <complexType>
* <sequence>
* <element name='bar'.../>
* ...
* }}}
* Now a DFDL/Xpath expression to reach that 'bar' element looks like /tns:foo/tns:bar
* Contrarywise, if elementFormDefault='unqualfied'...
* <pre>
* <schema elementFormDefault='unqualified'
* targetNamespace="myURI" xmlns:tns="myURI"...>
* <element name='foo'...>
* <complexType>
* <sequence>
* <element name='bar'.../>
* ...
* }}}
* Now a path to reach element bar would look like /tns:foo/bar.
* See how 'bar' isn't preceded by the tns prefix. That's becasue the child elements are
* all 'no namespace' elements.
* This also affects what a result document is like from namespaces perspective.
* Suppose the above 'bar' element is an xs:int. Then with elemenFormDefault='qualified', an
* instance would look like:
* @example {{{
* <tns:foo><tns:bar>42</tns:bar></tns:foo>
* }}}
* or the possibly nicer (for a large result)
* @example {{{
* <foo xmlns="myURI"><bar>42</bar></foo>
* }}}
* But if elementFormDefault='unqualified', the instance doc would be like:
* @example {{{
* <tns:foo><bar>42</bar></tns:foo>
* }}}
* In this case you really don't want to setup xmlns='myURI' because this happens:
* @example {{{
* <foo xmlns="myURI><bar xmlns="">42</bar></foo>
* }}}
* That is, you must explicitly go to the no-namespace syntax. It doesn't happen implicitly.
* This trait is mixed into things that are affected by elementFormDefault.
* Namely the local element declaration class.
trait ElementFormDefaultMixin
extends NamedMixin { self: SchemaComponent =>
* handle elementFormDefault to qualify
override lazy val namespace =
if (xmlSchemaDocument.elementFormDefault == "unqualified")
NoNamespace // unqualified means no namespace
else xmlSchemaDocument.targetNamespace
override lazy val prefix =
if (xmlSchemaDocument.elementFormDefault == "unqualified")
"" // unqualified means no prefix
else {
// name is supposed to be qualified by the target namespace
val tns = namespace
// record this error on the schemaDocument
xmlSchemaDocument.schemaDefinitionUnless(tns != NoNamespace, "Must have a targetNamespace if elementFormDefault='qualified'.")
val prefix = {
val existingPrefix = xml.scope.getPrefix(tns.toString)
if (existingPrefix != null) existingPrefix
else {
// There is no prefix bound to this namespace
// So we have to create a prefix. Let's try "tns", "tns1", "tns2" etc. until
// we find one that is not bound to a namespace.
val newPrefix = (0 until Int.MaxValue).flatMap { i =>
// flatMap collapses the List(None, None, Some(tryPre),...) => List(tryPre,...)
// then we just take head of this list, and we get tryPre
// Note: this does NOT create the giant list of all Int values.
val uniqueSuffix = if (i == 0) "" else i.toString
val prefixStem = DaffodilTunableParameters.generatedNamespacePrefixStem
val tryPre = prefixStem + uniqueSuffix
if (xml.scope.getURI(tryPre) == null)
Some(tryPre) // tryPre is not bound to a namespace, so we can use it.
else None
* Shared characteristics of any annotated schema component.
* Not all components can carry DFDL annotations.
abstract class AnnotatedSchemaComponent(xml: Node, sc: SchemaComponent)
extends SchemaComponent(xml, sc)
with AnnotatedMixin {
requiredEvaluations(annotationObjs, shortFormPropertiesCorrect, nonDefaultPropertySources, defaultPropertySources)
// /**
// * only used for debugging
// */
// override lazy val properties: PropMap =
// (nonDefaultPropertySources.flatMap { } ++
// defaultPropertySources.flatMap { }).toMap
final lazy val shortFormPropertiesCorrect: Boolean = {
// Check that any unprefixed properties are disjoint with ALL DFDL property names.
// Warning otherwise
// Insure that some prefix is bound to the dfdl namespace. Warn otherwise.
// Warn if dfdl: is bound to something else than the DFDL namespace.
* Since validation of extra attributes on XML Schema elements is
* normally lax validation, we can't count on validation of DFDL schemas
* to tell us whether short-form annotations are correct or not.
* So, we have to do this check ourselves.
* TBD: change properties code generator to output the various lists of
* properties that we have to check against. (Might already be there...?)
def shortFormAnnotationsAreValid: Boolean = true
def nonDefaultPropertySources: Seq[ChainPropProvider]
def defaultPropertySources: Seq[ChainPropProvider]
lazy val nonDefaultFormatChain: ChainPropProvider = formatAnnotation.getFormatChain()
lazy val defaultFormatChain: ChainPropProvider = {
val res = schemaDocument.formatAnnotation.getFormatChain()
private def findDefaultOrNonDefaultProperty(
pname: String,
sources: Seq[ChainPropProvider]): PropertyLookupResult = {
val seq = { _.chainFindProperty(pname) }
val optFound = seq.collectFirst { case found: Found => found }
val result = optFound match {
case Some(f @ Found(_, _)) => f
case None => {
// merge all the NotFound stuff.
val nonDefaults = seq.flatMap {
case NotFound(nd, d) => nd
case _: Found => Assert.invariantFailed()
val defaults = seq.flatMap {
case NotFound(nd, d) => d
case _: Found => Assert.invariantFailed()
val nf = NotFound(nonDefaults, defaults)
private def findNonDefaultProperty(pname: String): PropertyLookupResult = {
val result = findDefaultOrNonDefaultProperty(pname, nonDefaultPropertySources)
result match {
case f: Found => f
case NotFound(nd, d) =>
private def findDefaultProperty(pname: String): PropertyLookupResult = {
val result = findDefaultOrNonDefaultProperty(pname, defaultPropertySources)
val fixup = result match {
case f: Found => f
case NotFound(nd, d) =>
NotFound(Seq(), nd) // we want the places we searched shown as default locations searched
override def findPropertyOption(pname: String): PropertyLookupResult = {
// first try in regular properties
val regularResult = findNonDefaultProperty(pname)
regularResult match {
case f: Found => f
case NotFound(nonDefaultLocsTried1, defaultLocsTried1) => {
val defaultResult = findDefaultProperty(pname)
defaultResult match {
case f: Found => f
case NotFound(nonDefaultLocsTried2, defaultLocsTried2) => {
// did not find it at all. Return a NotFound with all the places we
// looked non-default and default.
val nonDefaultPlaces = nonDefaultLocsTried1
val defaultPlaces = defaultLocsTried2
NotFound(nonDefaultPlaces, defaultPlaces)
* Every component that can be annotated.
* Review Note:
* It's no longer clear that this separation is strictly speaking needed.
* It's possible that this could be collapsed back into AnnotatedSchemaComponent
* or made smaller anyway.
trait AnnotatedMixin
extends CommonRuntimeValuedPropertiesMixin
with EncodingMixin
with EscapeSchemeRefMixin { self: AnnotatedSchemaComponent =>
def xml: Node
def prettyName: String
def path: String
* Anything annotated must be able to construct the
* appropriate DFDLAnnotation object from the xml.
def annotationFactory(node: Node): DFDLAnnotation
lazy val annotationNode = {
val ann = xml \ "annotation"
* dais = Dfdl App Info nodeSeq
lazy val dais = {
val ais = (annotationNode \ "appinfo")
val dais = ais.filter { ai =>
ai.attribute("source") match {
case None => false
case Some(n) => {
val str = n.text
// Keep in mind. As the DFDL standard evolves, and new versions
// come out, this code may change to tolerate different source
// attributes that call out distinct versions of the standard.
val officialAppinfoSourceAttribute = XMLUtils.dfdlAppinfoSource
val hasRightSource = (str == officialAppinfoSourceAttribute)
val isAcceptable = str.contains("ogf") && str.contains("dfdl")
schemaDefinitionWarningWhen(!hasRightSource && isAcceptable,
"The xs:appinfo source attribute value '%s' should be '%s'.", str, officialAppinfoSourceAttribute)
(hasRightSource || isAcceptable)
* The DFDL annotations on the component, as objects
* that are subtypes of DFDLAnnotation.
lazy val annotationObjs = annotationObjs_.value
private val annotationObjs_ = LV('annotationObjs) {
// println(dais)
dais.flatMap { dai =>
val children = dai.child
val res = children.filterNot { _.isInstanceOf[Text] }.map { child =>
* Here we establish an invariant which is that every annotatable schema component has, definitely, has an
* annotation object. It may have no properties on it, but it will be there. Hence, we can
* delegate various property-related attribute calculations to it.
* To realize this, every concrete class must implement (or inherit) an implementation of
* emptyFormatFactory, which constructs an empty format annotation,
* and isMyFormatAnnotation which tests if an annotation is the corresponding kind.
* Given that, formatAnnotation then either finds the right annotation, or constructs one, but our invariant
* is imposed. There *is* a formatAnnotation.
def emptyFormatFactory: DFDLFormatAnnotation
def isMyFormatAnnotation(a: DFDLAnnotation): Boolean
lazy val formatAnnotation = formatAnnotation_.value
private val formatAnnotation_ = LV('formatAnnotation) {
val format = annotationObjs.collect { case fa: DFDLFormatAnnotation if isMyFormatAnnotation(fa) => fa }
val res = format match {
case Seq() => emptyFormatFactory // does make things with the right namespace scopes attached!
case Seq(fa) => fa
case _ => schemaDefinitionError("Only one format annotation is allowed at each annotation point.")
* Annotations can contain expressions, so we need to be able to compile them.
* We need our own instance so that the expression compiler has this schema
* component as its context.
lazy val expressionCompiler = new ExpressionCompiler(this)
lazy val justThisOneProperties = formatAnnotation.justThisOneProperties
* The other kind of DFDL annotations are DFDL 'statements'.
* This trait is everything shared by schema components that can have
* statements.
* Factory for creating the corresponding DFDLAnnotation objects.
trait DFDLStatementMixin extends ThrowsSDE { self: AnnotatedSchemaComponent =>
def annotationFactoryForDFDLStatement(node: Node, self: AnnotatedSchemaComponent): DFDLAnnotation = {
node match {
case <dfdl:assert>{ content @ _* }</dfdl:assert> => new DFDLAssert(node, self)
case <dfdl:discriminator>{ content @ _* }</dfdl:discriminator> => new DFDLDiscriminator(node, self)
case <dfdl:setVariable>{ content @ _* }</dfdl:setVariable> => new DFDLSetVariable(node, self)
case <dfdl:newVariableInstance>{ content @ _* }</dfdl:newVariableInstance> => new DFDLNewVariableInstance(node, self)
case _ => SDE("Invalid DFDL annotation found: %s", node)
* Validation won't check whether these are validly in place on a DFDL schema, so
* we allow any annotated object to have them, and then we can do checking on this list
* to enforce rules about which kinds of statements are allowed and where.
* Implement these abstract methods to do the right thing w.r.t. combining
* statements from group refs and their referenced groups, element refs and their elements,
* element decls and their simple types, simpleTypes and their base simpleTypes.
* The local ingredients are here for doing the needed combining and also for checking.
* E.g., dfdl:newVariableInstance isn't allowed on simpleType, can only have one discriminator per
* annotation point, and per combined annotation point, discriminators and assertions exclude each other, etc.
def statements: Seq[DFDLStatement]
def newVariableInstanceStatements: Seq[DFDLNewVariableInstance]
final lazy val notNewVariableInstanceStatements = setVariableStatements ++ discriminatorStatements ++ assertStatements
def assertStatements: Seq[DFDLAssert]
def discriminatorStatements: Seq[DFDLDiscriminator]
def setVariableStatements: Seq[DFDLSetVariable]
final lazy val localStatements = this.annotationObjs.collect { case st: DFDLStatement => st }
final lazy val localNewVariableInstanceStatements = localStatements.collect { case nve: DFDLNewVariableInstance => nve }
final lazy val localNotNewVariableInstanceStatements = localStatements.diff(localNewVariableInstanceStatements)
final lazy val (localDiscriminatorStatements,
localAssertStatements) = {
val discrims = localStatements.collect { case disc: DFDLDiscriminator => disc }
val asserts = localStatements.collect { case asrt: DFDLAssert => asrt }
checkDiscriminatorsAssertsDisjoint(discrims, asserts)
final def checkDiscriminatorsAssertsDisjoint(discrims: Seq[DFDLDiscriminator], asserts: Seq[DFDLAssert]): (Seq[DFDLDiscriminator], Seq[DFDLAssert]) = {
schemaDefinitionUnless(discrims.size <= 1, "At most one discriminator allowed at same location: %s", discrims)
schemaDefinitionUnless(asserts == Nil || discrims == Nil,
"Cannot have both dfdl:discriminator annotations and dfdl:assert annotations at the same location.")
(discrims, asserts)
final def checkDistinctVariableNames(svs: Seq[DFDLSetVariable]) = {
val names = { _.defv.extName }
val areAllDistinct = names.distinct.size == names.size
schemaDefinitionUnless(areAllDistinct, "Variable names must all be distinct at the same location: %s", names)
final lazy val localSetVariableStatements = {
val svs = localStatements.collect { case sv: DFDLSetVariable => sv }
* A schema set is exactly that, a set of schemas. Each schema has
* a target namespace (or 'no namespace'), so a schema set is
* conceptually a mapping from a namespace URI (or empty string, meaning no
* namespace) onto schema.
* Constructing these from XML Nodes is a unit-test
* interface. The real constructor takes a sequence of file names,
* and you can optionally specify a root element via the rootSpec argument.
* A schema set is a SchemaComponent (derived from that base), so as to inherit
* the error/warning accumulation behavior that all SchemaComponents share.
* A schema set invokes our XML Loader, which can produce validation errors, and
* those have to be gathered so we can give the user back a group of them, not
* just one.
* Schema set is however, a kind of a fake SchemaComponent in that it
* doesn't correspond to any user-specified schema object. And unlike other
* schema components obviously it does not live within a schema document.
class SchemaSet(
externalVariablesSource: Seq[Binding],
primFactory: PrimitiveFactoryBase,
schemaFilesArg: Seq[File],
rootSpec: Option[RootSpec] = None,
checkAllTopLevelArg: Boolean = false,
parent: SchemaComponent = null)
extends SchemaComponent(<schemaSet/>, parent) // a fake schema component
with SchemaSetIncludesAndImportsMixin {
override lazy val primitiveFactory = primFactory
if (checkAllTopLevel) {
override lazy val schemaSet = this
// These things are needed to satisfy the contract of being a schema component.
override lazy val enclosingComponent = None
override lazy val schemaDocument = Assert.usageError("schemaDocument should not be called on SchemaSet")
* Let's use the filename for the first schema document, rather than giving no information at all.
* It would appear that this is only used for informational purposes
* and as such, doesn't need to be a URL. Can just be String.
override lazy val fileName = schemaFilesArg(0).getPath()
lazy val schemaFiles = schemaFilesArg
lazy val checkAllTopLevel = checkAllTopLevelArg
override def warn(th: Diagnostic) = oolagWarn(th)
override def error(th: Diagnostic) = oolagError(th)
* This constructor for unit testing only
def this(primFact: PrimitiveFactoryBase, sch: Node, rootNamespace: String = null, root: String = null, extVars: Seq[Binding] = Seq.empty) =
val file = XMLUtils.convertNodeToTempFile(sch)
val files = List(file)
if (root == null) None else {
if (rootNamespace == null) Some(RootSpec(None, root))
else Some(RootSpec(Some(NS(rootNamespace)), root))
* Registry used to map from infoset nodes back to the schema components
* that made them.
lazy val schemaComponentRegistry = new SchemaComponentRegistry()
lazy val isValid = {
val isV = OOLAG.keepGoing(false) {
val files = allSchemaFiles
val fileValids = { _.isValid }
val res = fileValids.length > 0 && fileValids.fold(true) { _ && _ }
lazy val validationDiagnostics = {
val files = allSchemaFiles
val res = files.flatMap { _.validationDiagnostics }
lazy val schemas = schemas_.value
private val schemas_ = LV('schemas) {
val schemaPairs = { sd => (sd.targetNamespace, sd) }
// groupBy is deterministic if the hashCode of the key element is deterministic.
// our NS objects hashCode is same as their underlying string.
// Alas, being deterministic doesn't mean it is in an order we expect.
// but at least it is deterministic.
val schemaGroups = schemaPairs.groupBy { _._1 } // group by the namespace identifier
val schemas = {
case (ns, pairs) => {
val sds = { case (ns, s) => s }
val sch = new Schema(ns, sds.toSeq, this)
* For checking uniqueness of global definitions in their namespaces
private type UC = (NS, String, Symbol, SchemaComponent)
lazy val allTopLevels: Seq[UC] = allTopLevels_.value
private val allTopLevels_ = LV('allTopLevels) {
val res = schemas.flatMap { schema =>
val ns = schema.namespace
val geds = { g =>
(ns,, 'Element, g)
val stds = { g =>
(ns,, 'SimpleType, g)
val ctds = { g =>
(ns,, 'ComplexType, g)
val gds = { g =>
(ns,, 'Group, g)
val dfs = { g =>
(ns,, 'DefineFormat, g)
val dess = { g =>
(ns,, 'DefineEscapeScheme, g)
val dvs = { g =>
(ns,, 'DefineVariable, g)
val all = geds ++ stds ++ ctds ++ gds ++ dfs ++ dess ++ dvs
lazy val groupedTopLevels = groupedTopLevels_.value
private val groupedTopLevels_ = LV('groupedTopLevels) {
val grouped = allTopLevels.groupBy {
case (ns, name, kind, obj) => {
(kind, ns, name)
val grouped2 = {
case (idFields, seq) => {
val onlyObj = { case (ns, name, kind, obj) => obj }
if (onlyObj.length > 1) {
val (ns, name, kind) = idFields
val obj = onlyObj.head
val locations = onlyObj.asInstanceOf[Seq[LookupLocation]] // don't like this downcast
SDEButContinue("multiple definitions for %s {%s}%s.\n%s", kind.toString, ns, name, { _.locationDescription }.mkString("\n"))
(idFields, onlyObj)
val res = grouped2.flatMap { case (_, topLevelThing) => topLevelThing }.toSeq
// The trick with this is when to call it. If you call it, as
// a consequence of computing all of this, it will have to parse
// every file, every included/imported file, etc.
def checkForDuplicateTopLevels() = {
groupedTopLevels // demand this.
* When the user (of the API) doesn't specify a root element namespace, just a
* root element name, then this searches for a single element having that name, and if it is
* unambiguous, it is used as the root.
def findRootElement(name: String) = {
// log(Info("%s searching for root element with name %s", Misc.getNameFromClass(this), name))
val candidates = schemas.flatMap { _.getGlobalElementDecl(name) }
schemaDefinitionUnless(candidates.length != 0, "No root element found for %s in any available namespace", name)
schemaDefinitionUnless(candidates.length <= 1, "Root element %s is ambiguous. Candidates are %s.", { gef => + " in namespace: " + gef.schemaDocument.targetNamespace })
Assert.invariant(candidates.length == 1)
val gef = candidates(0)
val re = gef.forRoot()
* Given a RootSpec, get the global element it specifies. Error if ambiguous
* or not found.
def getGlobalElement(rootSpec: RootSpec) = {
rootSpec match {
case RootSpec(Some(rootNamespaceName), rootElementName) => {
val geFactory = getGlobalElementDecl(rootNamespaceName, rootElementName)
val ge = geFactory match {
case None => schemaDefinitionError("No global element found for %s", rootSpec)
case Some(f) => f.forRoot()
case RootSpec(None, rootElementName) => {
case _ => Assert.impossible()
* Cache of whatever the rootElem decision was
var rootElemOpt: Option[GlobalElementDecl] = None
var rootSpecFromPF: Option[RootSpec] = None
* Since the root element can be specified by an API call on the
* Compiler class, or by an API call on the ProcessorFactory, this
* method reconciles the two. E.g., you can't specify the root both
* places, it's one or the other.
* Also, if you don't specify a root element at all, this
* grabs the first element declaration of the first schema file
* to use as the root.
def rootElement(rootSpecFromProcessorFactory: Option[RootSpec]): GlobalElementDecl = {
rootSpecFromPF = rootSpecFromProcessorFactory
val rootSpecFromCompiler = rootSpec
val re =
(rootSpecFromCompiler, rootSpecFromProcessorFactory) match {
case (Some(rs), None) =>
case (None, Some(rs)) =>
case (None, None) => {
// if the root element and rootNamespace aren't provided at all, then
// the first element of the first schema document is the root
val sDocs = this.allSchemaDocuments
val firstSchemaDocument = sDocs(0)
val gdeclf = firstSchemaDocument.globalElementDecls
val firstElement = {
schemaDefinitionUnless(gdeclf.length >= 1, "No global elements in: " + firstSchemaDocument.fileName)
val rootElement = gdeclf(0).forRoot()
case _ => Assert.invariantFailed("illegal combination of root element specifications")
rootElemOpt = Some(re)
// Show root as either "{...ns...}name"
// or if there is no namespace just "name (in no namespace)"
val (nsSpecifier, comment) =
if (re.targetNamespace == NoNamespace) ("", " (in no namespace)")
else ("{" + re.targetNamespace + "}", "")
// log(Info("Found root element %s%s%s", nsSpecifier,, comment))
* Retrieve schema by namespace name.
* If the schema has no namespace, then use ""
def getSchema(namespace: NS) = {
val schemaForNamespace = schemas.find { s => s.targetNamespace == namespace }
* XML Schema global objects.
* Given a namespace and name, try to retrieve the named object
* These all return factories for the objects, not the objects themselves.
def getGlobalElementDecl(namespace: NS, name: String) = {
// getSchema(namespace).flatMap { _.getGlobalElementDecl(name) }
val s = getSchema(namespace)
val res = s.flatMap { s =>
val ged = s.getGlobalElementDecl(name)
def getGlobalSimpleTypeDef(namespace: NS, name: String) = getSchema(namespace).flatMap { _.getGlobalSimpleTypeDef(name) }
def getGlobalComplexTypeDef(namespace: NS, name: String) = getSchema(namespace).flatMap { _.getGlobalComplexTypeDef(name) }
def getGlobalGroupDef(namespace: NS, name: String) = getSchema(namespace).flatMap { _.getGlobalGroupDef(name) }
* DFDL Schema top-level global objects
def getDefaultFormat(namespace: NS, name: String) = getSchema(namespace).flatMap { x => Some(x.getDefaultFormat) }
def getDefineFormat(namespace: NS, name: String) = {
val s = getSchema(namespace)
s.flatMap { _.getDefineFormat(name) }
def getDefineFormats(namespace: NS, context: ThrowsSDE) = getSchema(namespace) match {
case None => context.schemaDefinitionError("Failed to find a schema for namespace: " + namespace)
case Some(sch) => sch.getDefineFormats()
def getDefineVariable(namespace: NS, name: String) = {
val res = getSchema(namespace).flatMap { _.getDefineVariable(name) }
def getDefineEscapeScheme(namespace: NS, name: String) = getSchema(namespace).flatMap { _.getDefineEscapeScheme(name) }
def getPrimitiveType(ns: NS, localName: String) = {
if (ns != XMLUtils.XSD_NAMESPACE) // must check namespace
PrimType.allPrimitiveTypes.find { == localName }
lazy val variableMap = {
val dvs = allSchemaDocuments.flatMap { _.defineVariables }
val vmap = VariableMap.create(dvs)
val finalVMap =
ExternalVariablesLoader.loadVariablesByBinding(externalVariablesSource, this, vmap)
private lazy val elements = schemaComponentRegistry.contextMap
def getIDByName(name: String, ns: org.jdom.Namespace) = {
val matches = elements.filter {
case (uuid, eb) => {
eb.targetNamespace.toJDOM == ns && == name
def getSCIDAugmentedInfoset(origInfoset: Node) = {
val infoset = XMLUtils.elem2Element(origInfoset)
val rootMatch = getIDByName(infoset.getName(), infoset.getNamespace())
rootMatch match {
case None => // Nothing to do here
case Some((uuid, _)) => {
infoset.setAttribute("context", uuid.toString(), XMLUtils.INT_NS_OBJECT)
val it = infoset.getDescendants(new org.jdom.filter.ElementFilter).asInstanceOf[java.util.Iterator[org.jdom.Element]]
for (e <- it) {
val name = e.getName()
val ns = e.getNamespace()
val theMatch = getIDByName(name, ns)
theMatch match {
case None => // Nothing to do here
case Some((uuid, _)) => {
e.setAttribute("context", uuid.toString(), XMLUtils.INT_NS_OBJECT)
* A schema is all the schema documents sharing a single target namespace.
* That is, one can write several schema documents which all have the
* same target namespace, and in that case all those schema documents make up
* the 'schema'.
class Schema(val namespace: NS, schemaDocs: Seq[SchemaDocument], schemaSetArg: SchemaSet)
extends SchemaComponent(<fake/>, schemaSetArg) {
override lazy val targetNamespace: NS = namespace
override lazy val enclosingComponent = None
override lazy val schemaDocument = Assert.usageError("schemaDocument should not be called on Schema")
override lazy val schemaSet = schemaSetArg
lazy val schemaDocuments = schemaDocs
private def noneOrOne[T](scs: Seq[T], name: String): Option[T] = {
scs match {
case Nil => None
case Seq(sc) => Some(sc) // exactly one is good
case s => {
"More than one definition for name: %s\n" +
"defined %s",
name, { thing =>
thing match {
case df: DFDLDefiningAnnotation => df.asAnnotation.fileDescription
case sc: SchemaComponent => sc.fileDescription
case _ => Assert.invariantFailed("should only be a SchemaComponent or a DFDLDefiningAnnotation")
}.mkString("\n and also "))
* Given a name, retrieve the appropriate object.
* This just scans each schema document in the schema, checking each one.
def getGlobalElementDecl(name: String) = {
// noneOrOne(schemaDocuments.flatMap { _.getGlobalElementDecl(name) }, name)
val sds = schemaDocuments
val res = sds.flatMap {
sd =>
val ged = sd.getGlobalElementDecl(name)
noneOrOne(res, name)
def getGlobalSimpleTypeDef(name: String) = noneOrOne(schemaDocuments.flatMap { _.getGlobalSimpleTypeDef(name) }, name)
def getGlobalComplexTypeDef(name: String) = noneOrOne(schemaDocuments.flatMap { _.getGlobalComplexTypeDef(name) }, name)
def getGlobalGroupDef(name: String) = noneOrOne(schemaDocuments.flatMap { _.getGlobalGroupDef(name) }, name)
def getDefineFormat(name: String) = noneOrOne(schemaDocuments.flatMap { _.getDefineFormat(name) }, name)
def getDefineFormats() = schemaDocuments.flatMap { _.defineFormats }
def getDefineVariable(name: String) = noneOrOne(schemaDocuments.flatMap { _.getDefineVariable(name) }, name)
def getDefineEscapeScheme(name: String) = noneOrOne(schemaDocuments.flatMap { _.getDefineEscapeScheme(name) }, name)
def getDefaultFormat = schemaDocuments.flatMap { x => Some(x.getDefaultFormat) }
// used for bulk checking of uniqueness
lazy val globalElementDecls = schemaDocuments.flatMap(_.globalElementDecls)
lazy val globalGroupDefs = schemaDocuments.flatMap(_.globalGroupDefs)
lazy val globalSimpleTypeDefs = schemaDocuments.flatMap(_.globalSimpleTypeDefs)
lazy val globalComplexTypeDefs = schemaDocuments.flatMap(_.globalComplexTypeDefs)
lazy val defineFormats = schemaDocuments.flatMap(_.defineFormats)
lazy val defineEscapeSchemes = schemaDocuments.flatMap(_.defineEscapeSchemes)
lazy val defineVariables = schemaDocuments.flatMap(_.defineVariables)
* A schema document corresponds to one file usually named with an ".xsd" extension.
* The root element of a schema document is xsd:schema where xsd is the namespace for
* XML Schema.
* A schema document is important because it is the unit of lexical scoping of format
* properties in DFDL.
* Specifically, note that two schema documents may have the same target namespace, but
* different default formats scoped over their contents.
* When dealing with plain XML and XSD (not DFDL), the concept of Schema with a single
* target namespace is more used than the concept of schema document, which is usually
* only needed to issue diagnostic error messages ("In file foo.xsd, line 938...")
* Conversely, for DFDL, the concept of schema document is more used because schema documents
* are where default formats are specified, so it is very important what schema document
* a schema component was defined within.
* Common to both types we use for dealing with
* schema documents.
trait SchemaDocumentMixin { self: SchemaComponent =>
override lazy val enclosingComponent: Option[SchemaComponent] = None
* Handles everything about schema documents that has nothing to
* do with DFDL. Things like namespace, include, import, elementFormDefault
* etc.
class XMLSchemaDocument(xmlArg: Node,
schemaSetArg: SchemaSet,
iiArg: Option[IIBase],
sfArg: Option[DFDLSchemaFile],
seenBeforeArg: IIMap)
extends SchemaComponent(xmlArg, sfArg.getOrElse(schemaSetArg))
with SchemaDocumentMixin
with SchemaDocIncludesAndImportsMixin {
lazy val seenBefore = seenBeforeArg
val ii = iiArg
override lazy val schemaFile = sfArg
override lazy val xmlSchemaDocument = this
* Error checks on the xs:schema element itself.
* E.g., we don't support the xsi:schemaLocation attribute. So we issue a warning for that.
* and some other attributes as well.
def qualOrUnqual(str: String, kind: String) = {
str match {
case "unqualified" => str
case "qualified" => str
case _ => schemaDefinitionError("Unrecognized value for %s FormDefault='%s'.", kind, str)
lazy val elementFormDefault = {
val efdAttr = (xml \ "@elementFormDefault").text
if (efdAttr == "") "unqualified"
else qualOrUnqual(efdAttr, "element")
lazy val attributeFormDefault = {
val afdAttr = (xml \ "@attributeFormDefault").text
if (afdAttr == "") "unqualified"
else qualOrUnqual(afdAttr, "attribute")
lazy val checkUnsupportedAttributes = checkUnsupportedAttributes_.value
private val checkUnsupportedAttributes_ = LV('checkUnsupportedAttributes) {
val hasSchemaLocation = (xml \ "@schemaLocation").text != ""
val hasBlockDefault = (xml \ "@blockDefault").text != ""
val hasFinalDefault = (xml \ "@finalDefault").text != ""
schemaDefinitionWarningUnless(!hasSchemaLocation, "schemaLocation is ignored.")
schemaDefinitionWarningUnless(!hasBlockDefault, "blockDefault is ignored")
schemaDefinitionWarningUnless(!hasFinalDefault, "finalDefault is ignored")
schemaDefinitionUnless(attributeFormDefault == "unqualified", "attributeFormDefault='qualified' is not yet implemented.")
val res = hasSchemaLocation | hasBlockDefault | hasFinalDefault
* Handles only things specific to DFDL about schema documents.
* I.e., default format properties, named format properties, etc.
class SchemaDocument(xmlSDoc: XMLSchemaDocument)
extends AnnotatedSchemaComponent(xmlSDoc.xml, xmlSDoc)
with SchemaDocumentMixin
with Format_AnnotationMixin
with SeparatorSuppressionPolicyMixin {
* Implements the selectivity so that if you specify a root element
* to the compiler, then only that root element (and things reached from it)
* is compiled. Otherwise all top level elements are compiled.
if (schemaSet.checkAllTopLevel) { { _.forRoot() }
// TODO: about defineVariables:
// only include these if they have default values or external values.
// Those then have to be evaluated before any processing,
// and may depend on other variables with default values or external values.
// Not these, because we'll pick these up when elements reference them.
// And we don't compile them independently of that (since they could be very
// incomplete and would lead to many errors for missing this or that.)
// Note: don't include these. They get checked if used.
// globalSimpleTypeDefs
// globalComplexTypeDefs
// globalGroupDefs
override lazy val schemaDocument = this
override lazy val schema = schemaSet.getSchema(targetNamespace).getOrElse {
Assert.invariantFailed("schema not found for schema document's namespace.")
// lazy val shortFormAnnotationsAreValid: Boolean = {
// val dfdlns = XMLUtils.DFDL_NAMESPACE
// val attrs = xml.attributes
// // Check that any prefixed properties in the DFDL namespace are allowed on
// // this specific annotated schema component.
// val dfdlAttrs = attrs.filter{ a => a.isPrefixed && a.}
// }
lazy val nonDefaultPropertySources = Seq()
lazy val defaultPropertySources = defaultPropertySources_.value
private val defaultPropertySources_ = LV('defaultPropertySources) {
val seq = Seq(this.defaultFormatChain)
def annotationFactory(node: Node): DFDLAnnotation = {
node match {
case <dfdl:format>{ content @ _* }</dfdl:format> => new DFDLFormat(node, this)
case <dfdl:defineFormat>{ content @ _* }</dfdl:defineFormat> => new DFDLDefineFormat(node, this)
case <dfdl:defineEscapeScheme>{ content @ _* }</dfdl:defineEscapeScheme> => new DFDLDefineEscapeScheme(node, this)
case <dfdl:defineVariable>{ content @ _* }</dfdl:defineVariable> => new DFDLDefineVariable(node, this)
case _ => {
val prefix =
if (node.prefix == null || node.prefix == "") ""
else node.prefix + ":"
// we make a junk component so as to avail ourselves of
// a uniform way to do diagnostics, line numbers, etc.
val junk = new JunkComponent(node, this)
junk.SDE("Invalid dfdl annotation found: %s", prefix + node.label)
def emptyFormatFactory = new DFDLFormat(newDFDLAnnotationXML("format"), this)
def isMyFormatAnnotation(a: DFDLAnnotation) = a.isInstanceOf[DFDLFormat]
* Design note about factories for global elements, and recursive types.
* The point of these factories is that every local site that uses a global def/decl
* needs a copy so that the def/decl can have attributes which depend on the context
* where it is used. That is, we can't share global defs/decls because the contexts change
* their meaning.
* This works as is, so long as the DFDL Schema doesn't have recursion in it. Recursion would create
* an infinite tree of local sites and copies. (There's an issue: DFDL-80 in Jira about putting
* in the check to rule out recursion)
* But recursion would be a very cool experimental feature, potentially useful for investigations
* towards DFDL v2.0 in the future.
* What's cool: if these factories are changed to memoize. That is, return the exact same global def/decl
* object if they are called from the same local site, then recursion "just works". Nothing will diverge
* creating infinite structures, but furthermore, the "contextual" information will be right. That
* is to say, the first place some global structure is used is the "top" entry. It gets a copy.
* If that global ultimately has someplace that recurses back to that global structure, it has to be from some other
* local site inside it, so that's a different local site, so it will get a copy of the global. But
* that's where it ends because the next "unwind" of the recursion will be at this same local site, so
* would be returned the exact same def/decl object.
* Of course there are runtime/backend complexities also. Relative paths, variables with newVariableInstance
* all of which can go arbitrarily deep in the recursive case.
lazy val globalElementDecls = {
val xmlelts = (xml \ "element")
val factories = { new edu.illinois.ncsa.daffodil.dsom.GlobalElementDeclFactory(_, this) }
lazy val globalSimpleTypeDefs = (xml \ "simpleType").map { new GlobalSimpleTypeDefFactory(_, this) }
lazy val globalComplexTypeDefs = (xml \ "complexType").map { new GlobalComplexTypeDefFactory(_, this) }
lazy val globalGroupDefs = (xml \ "group").map { new GlobalGroupDefFactory(_, this) }
lazy val defaultFormat = formatAnnotation.asInstanceOf[DFDLFormat]
lazy val defineFormats = annotationObjs.collect { case df: DFDLDefineFormat => df }
lazy val defineEscapeSchemes = annotationObjs.collect { case des: DFDLDefineEscapeScheme => des }
lazy val defineVariables = annotationObjs.collect { case dv: DFDLDefineVariable => dv }
* by name getters for the global things that can be referenced.
def getGlobalElementDecl(name: String) = {
val geds = globalElementDecls
val res = globalElementDecls.find { == name }
def getGlobalSimpleTypeDef(name: String) = globalSimpleTypeDefs.find { == name }
def getGlobalComplexTypeDef(name: String) = globalComplexTypeDefs.find { == name }
def getGlobalGroupDef(name: String) = globalGroupDefs.find { == name }
def getDefineFormat(name: String) = defineFormats.find { == name }
def getDefineVariable(name: String) = {
val res = defineVariables.find { == name }
def getDefaultFormat = this.defaultFormat
def getDefineEscapeScheme(name: String) = defineEscapeSchemes.find { == name }
* Used to encapsulate junk XML. E.g., if user gets prefix wrong, and types
* xs:format instead of dfdl:format.
* We want this to be a schema component so we don't have to have another
* mechanism for reporting file and line numbers and diagnostic stuff.
class JunkComponent(xml: Node, sc: SchemaComponent) extends SchemaComponent(xml, sc)