| /* Copyright (c) 2012-2015 Tresys Technology, LLC. All rights reserved. |
| * |
| * Developed by: Tresys Technology, LLC |
| * http://www.tresys.com |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| * this software and associated documentation files (the "Software"), to deal with |
| * the Software without restriction, including without limitation the rights to |
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is furnished to do |
| * so, subject to the following conditions: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimers. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimers in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * 3. Neither the names of Tresys Technology, nor the names of its contributors |
| * may be used to endorse or promote products derived from this Software |
| * without specific prior written permission. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE |
| * SOFTWARE. |
| */ |
| |
| package edu.illinois.ncsa.daffodil.dsom |
| |
| import scala.xml.Node |
| import edu.illinois.ncsa.daffodil.externalvars.Binding |
| import edu.illinois.ncsa.daffodil.compiler.RootSpec |
| import edu.illinois.ncsa.daffodil.exceptions.Assert |
| import edu.illinois.ncsa.daffodil.xml._ |
| import edu.illinois.ncsa.daffodil.api.Diagnostic |
| import edu.illinois.ncsa.daffodil.xml.XMLUtils |
| import edu.illinois.ncsa.daffodil.xml.NS |
| import edu.illinois.ncsa.daffodil.dsom.oolag.OOLAG |
| import edu.illinois.ncsa.daffodil.exceptions.ThrowsSDE |
| import edu.illinois.ncsa.daffodil.processors.VariableMapFactory |
| import edu.illinois.ncsa.daffodil.externalvars.ExternalVariablesLoader |
| import edu.illinois.ncsa.daffodil.dpath.NodeInfo |
| import java.io.File |
| import edu.illinois.ncsa.daffodil.xml.DFDLCatalogResolver |
| import edu.illinois.ncsa.daffodil.api.DaffodilSchemaSource |
| import edu.illinois.ncsa.daffodil.api.UnitTestSchemaSource |
| |
| /** |
| * A schema set is exactly that, a set of schemas. Each schema has |
| * a target namespace (or 'no namespace'), so a schema set is |
| * conceptually a mapping from a namespace URI (or empty string, meaning no |
| * namespace) onto schema. |
| * |
| * Constructing these from XML Nodes is a unit-test |
| * interface. The real constructor takes a sequence of file names, |
| * and you can optionally specify a root element via the rootSpec argument. |
| * |
| * A schema set is a SchemaComponent (derived from that base), so as to inherit |
| * the error/warning accumulation behavior that all SchemaComponents share. |
| * A schema set invokes our XML Loader, which can produce validation errors, and |
| * those have to be gathered so we can give the user back a group of them, not |
| * just one. |
| * |
| * Schema set is however, a kind of a fake SchemaComponent in that it |
| * doesn't correspond to any user-specified schema object. And unlike other |
| * schema components obviously it does not live within a schema document. |
| */ |
| |
| final class SchemaSet( |
| rootSpec: Option[RootSpec], |
| externalVariables: Seq[Binding], |
| schemaSourcesArg: Seq[DaffodilSchemaSource], |
| val validateDFDLSchemas: Boolean, |
| checkAllTopLevelArg: Boolean, |
| parent: SchemaComponent) |
| extends SchemaComponent(<schemaSet/>, parent) // a fake schema component |
| with SchemaSetIncludesAndImportsMixin { |
| |
| requiredEvaluations(isValid) |
| if (checkAllTopLevel) { |
| requiredEvaluations(checkForDuplicateTopLevels()) |
| requiredEvaluations(this.allTopLevels) |
| } |
| requiredEvaluations(validateSchemaFiles) |
| requiredEvaluations(variableMap) |
| |
| lazy val resolver = DFDLCatalogResolver.get |
| |
| override lazy val schemaSet = this |
| // These things are needed to satisfy the contract of being a schema component. |
| final override protected def enclosingComponentDef = None |
| override lazy val schemaDocument = Assert.usageError("schemaDocument should not be called on SchemaSet") |
| |
| override lazy val lineAttribute: Option[String] = None |
| |
| lazy val schemaSources = schemaSourcesArg |
| |
| /** |
| * Let's use the uri for the first schema document, rather than giving no information at all. |
| * |
| * It would appear that this is only used for informational purposes |
| * and as such, doesn't need to be a URL. Can just be String. |
| */ |
| override lazy val uriString: String = schemaSources(0).uriForLoading.toString |
| |
| /** |
| * We need to use the loader here to validate the DFDL Schema. |
| */ |
| private lazy val loader = new DaffodilXMLLoader(new ValidateSchemasErrorHandler(this)) |
| |
| /** |
| * Validates the DFDL Schema files present in the schemaFilesArg. |
| * Compiles a list of all errors and warnings before issuing them. |
| * |
| * Issues SchemaDefinitionWarnings for DFDLSchemaValidationWarnings. |
| * Issues SchemaDefinitionErrors for DFDLSchemaValidationErrors. |
| */ |
| private def validateSchemaFiles = LV('validateSchemaFiles) { |
| // TODO: DFDL-400 remove this flag check once we've fixed all affected tests. |
| if (validateDFDLSchemas) { |
| schemaSources.foreach(f => |
| try { |
| loader.validateSchema(f) |
| } catch { |
| case e: DFDLSchemaValidationException => SDE(DiagnosticUtils.getSomeMessage(e).get) |
| }) |
| } |
| }.value |
| |
| lazy val checkAllTopLevel = checkAllTopLevelArg |
| |
| override def warn(th: Diagnostic) = oolagWarn(th) |
| override def error(th: Diagnostic) = oolagError(th) |
| |
| /** |
| * This constructor for unit testing only |
| */ |
| def this(sch: Node, rootNamespace: String = null, root: String = null, extVars: Seq[Binding] = Seq.empty, optTmpDir: Option[File] = None) = |
| this( |
| { |
| if (root == null) None else { |
| if (rootNamespace == null) Some(RootSpec(None, root)) |
| else Some(RootSpec(Some(NS(rootNamespace)), root)) |
| } |
| }, |
| extVars, |
| List(UnitTestSchemaSource(sch, Option(root).getOrElse("anon"), optTmpDir)), |
| false, |
| false, |
| null) |
| |
| lazy val schemaFileList = schemas.map(s => s.uriString) |
| |
| lazy val isValid = { |
| val isV = OOLAG.keepGoing(false) { |
| val files = allSchemaFiles |
| val fileValids = files.map { _.isValid } |
| val res = fileValids.length > 0 && fileValids.fold(true) { _ && _ } |
| res |
| } |
| isV |
| } |
| |
| lazy val validationDiagnostics = { |
| val files = allSchemaFiles |
| val res = files.flatMap { _.validationDiagnostics } |
| res |
| } |
| |
| lazy val schemas = LV('schemas) { |
| val schemaPairs = allSchemaDocuments.map { sd => (sd.targetNamespace, sd) } |
| // |
| // groupBy is deterministic if the hashCode of the key element is deterministic. |
| // our NS objects hashCode is same as their underlying string. |
| // |
| // Alas, being deterministic doesn't mean it is in an order we expect. |
| // but at least it is deterministic. |
| val schemaGroups = schemaPairs.groupBy { _._1 } // group by the namespace identifier |
| val schemas = schemaGroups.map { |
| case (ns, pairs) => { |
| val sds = pairs.map { case (ns, s) => s } |
| val sch = new Schema(ns, sds.toSeq, this) |
| sch |
| } |
| } |
| schemas.toSeq |
| }.value |
| |
| /** |
| * For checking uniqueness of global definitions in their namespaces |
| */ |
| |
| private type UC = (NS, String, Symbol, SchemaComponent) |
| |
| private def allTopLevels: Seq[UC] = LV('allTopLevels) { |
| val res = schemas.flatMap { schema => |
| { |
| val ns = schema.namespace |
| val geds = schema.globalElementDecls.map { g => |
| { |
| (ns, g.name, 'Element, g) |
| } |
| } |
| val stds = schema.globalSimpleTypeDefs.map { g => |
| { |
| (ns, g.name, 'SimpleType, g) |
| } |
| } |
| val ctds = schema.globalComplexTypeDefs.map { g => |
| { |
| (ns, g.name, 'ComplexType, g) |
| } |
| } |
| val gds = schema.globalGroupDefs.map { g => |
| { |
| (ns, g.name, 'Group, g) |
| } |
| } |
| val dfs = schema.defineFormats.map { g => |
| { |
| (ns, g.name, 'DefineFormat, g) |
| } |
| } |
| val dess = schema.defineEscapeSchemes.map { g => |
| { |
| (ns, g.name, 'DefineEscapeScheme, g) |
| } |
| } |
| val dvs = schema.defineVariables.map { g => |
| { |
| (ns, g.name, 'DefineVariable, g) |
| } |
| } |
| val all = geds ++ stds ++ ctds ++ gds ++ dfs ++ dess ++ dvs |
| all |
| } |
| } |
| res.asInstanceOf[Seq[UC]] |
| }.value |
| |
| private def groupedTopLevels = LV('groupedTopLevels) { |
| val grouped = allTopLevels.groupBy { |
| case (ns, name, kind, obj) => { |
| (kind, ns, name) |
| } |
| } |
| val grouped2 = grouped.map { |
| case (idFields, seq) => { |
| val onlyObj = seq.map { case (ns, name, kind, obj) => obj } |
| if (onlyObj.length > 1) { |
| val (ns, name, kind) = idFields |
| val locations = onlyObj.asInstanceOf[Seq[LookupLocation]] // don't like this downcast |
| SDEButContinue("multiple definitions for %s {%s}%s.\n%s", kind.toString, ns, name, |
| locations.map { _.locationDescription }.mkString("\n")) |
| } |
| (idFields, onlyObj) |
| } |
| } |
| val res = grouped2.flatMap { case (_, topLevelThing) => topLevelThing }.toSeq |
| res |
| }.value |
| |
| // The trick with this is when to call it. If you call it, as |
| // a consequence of computing all of this, it will have to parse |
| // every file, every included/imported file, etc. |
| def checkForDuplicateTopLevels() = { |
| groupedTopLevels // demand this. |
| } |
| |
| /** |
| * When the user (of the API) doesn't specify a root element namespace, just a |
| * root element name, then this searches for a single element having that name, and if it is |
| * unambiguous, it is used as the root. |
| */ |
| private def findRootElement(name: String) = { |
| // log(Info("%s searching for root element with name %s", Misc.getNameFromClass(this), name)) |
| val candidates = schemas.flatMap { _.getGlobalElementDecl(name) } |
| schemaDefinitionUnless(candidates.length != 0, "No root element found for %s in any available namespace", name) |
| schemaDefinitionUnless(candidates.length <= 1, "Root element %s is ambiguous. Candidates are %s.", |
| candidates.map { gef => |
| { |
| val tns = gef.schemaDocument.targetNamespace |
| Assert.invariant(!tns.isUnspecified) |
| gef.name + " " + tns.explainForMsg |
| } |
| }) |
| Assert.invariant(candidates.length == 1) |
| val gef = candidates(0) |
| val re = gef.forRoot() |
| re |
| } |
| |
| /** |
| * Given a RootSpec, get the global element it specifies. Error if ambiguous |
| * or not found. |
| */ |
| private def getGlobalElement(rootSpec: RootSpec) = { |
| rootSpec match { |
| case RootSpec(Some(rootNamespaceName), rootElementName) => { |
| val qn = RefQName(None, rootElementName, rootNamespaceName) |
| val geFactory = getGlobalElementDecl(qn) |
| val ge = geFactory match { |
| case None => schemaDefinitionError("No global element found for %s", rootSpec) |
| case Some(f) => f.forRoot() |
| } |
| ge |
| } |
| case RootSpec(None, rootElementName) => { |
| findRootElement(rootElementName) |
| } |
| case _ => Assert.impossible() |
| } |
| } |
| |
| /** |
| * Since the root element can be specified by an API call on the |
| * Compiler class, or by an API call on the ProcessorFactory, this |
| * method reconciles the two. E.g., you can't specify the root both |
| * places, it's one or the other. |
| * |
| * Also, if you don't specify a root element at all, this |
| * grabs the first element declaration of the first schema file |
| * to use as the root. |
| */ |
| def rootElement(rootSpecFromProcessorFactory: Option[RootSpec]): GlobalElementDecl = { |
| val rootSpecFromCompiler = rootSpec |
| val re = |
| (rootSpecFromCompiler, rootSpecFromProcessorFactory) match { |
| case (Some(rs), None) => |
| getGlobalElement(rs) |
| |
| case (None, Some(rs)) => |
| getGlobalElement(rs) |
| |
| case (None, None) => { |
| // if the root element and rootNamespace aren't provided at all, then |
| // the first element of the first schema document is the root |
| val sDocs = this.allSchemaDocuments |
| assuming(sDocs.length > 0) |
| val firstSchemaDocument = sDocs(0) |
| val gdeclf = firstSchemaDocument.globalElementDecls |
| val firstElement = { |
| schemaDefinitionUnless(gdeclf.length >= 1, "No global elements in: " + firstSchemaDocument.uriString) |
| val rootElement = gdeclf(0).forRoot() |
| rootElement |
| } |
| firstElement |
| } |
| case _ => Assert.invariantFailed("illegal combination of root element specifications") |
| } |
| re |
| } |
| |
| /** |
| * Retrieve schema by namespace name. |
| * |
| * If the schema has no namespace, then use "" |
| */ |
| def getSchema(namespace: NS) = { |
| val schemaForNamespace = schemas.find { s => s.targetNamespace == namespace } |
| schemaForNamespace |
| } |
| |
| /** |
| * XML Schema global objects. |
| * Given a namespace and name, try to retrieve the named object |
| * |
| * These all return factories for the objects, not the objects themselves. |
| */ |
| def getGlobalElementDecl(refQName: RefQName) = { |
| val s = getSchema(refQName.namespace) |
| val res = s.flatMap { s => |
| { |
| val ged = s.getGlobalElementDecl(refQName.local) |
| ged |
| } |
| } |
| res |
| } |
| def getGlobalSimpleTypeDef(refQName: RefQName) = getSchema(refQName.namespace).flatMap { _.getGlobalSimpleTypeDef(refQName.local) } |
| def getGlobalComplexTypeDef(refQName: RefQName) = getSchema(refQName.namespace).flatMap { _.getGlobalComplexTypeDef(refQName.local) } |
| def getGlobalGroupDef(refQName: RefQName) = getSchema(refQName.namespace).flatMap { _.getGlobalGroupDef(refQName.local) } |
| |
| /** |
| * DFDL Schema top-level global objects |
| */ |
| def getDefineFormat(refQName: RefQName) = { |
| val s = getSchema(refQName.namespace) |
| s.flatMap { _.getDefineFormat(refQName.local) } |
| } |
| def getDefineFormats(namespace: NS, context: ThrowsSDE) = getSchema(namespace) match { |
| case None => context.schemaDefinitionError("Failed to find a schema for namespace: " + namespace) |
| case Some(sch) => sch.getDefineFormats() |
| } |
| def getDefineVariable(refQName: RefQName) = { |
| val res = getSchema(refQName.namespace).flatMap { _.getDefineVariable(refQName.local) } |
| val finalResult = res match { |
| case None => { |
| val optRes = this.predefinedVars.find(dfv => { |
| dfv.namespace == refQName.namespace && dfv.name == refQName.local |
| }) |
| optRes |
| } |
| case Some(value) => res |
| } |
| finalResult |
| } |
| def getDefineEscapeScheme(refQName: RefQName) = getSchema(refQName.namespace).flatMap { _.getDefineEscapeScheme(refQName.local) } |
| |
| def getPrimType(refQName: RefQName) = { |
| if (refQName.namespace != XMLUtils.XSD_NAMESPACE) // must check namespace |
| None |
| else |
| NodeInfo.PrimType.fromNameString(refQName.local) |
| } |
| |
| /** |
| * Creates a DFDLDefineVariable object for the predefined variable. |
| * |
| * @param theName The variable name. |
| * @param theType The type of the variable. ex. xs:string |
| * @param nsURI The namespace URI of the variable. |
| * |
| * @return A Seq[DFDLDefineVariable] |
| */ |
| private def generateDefineVariable(theName: String, theType: String, theDefaultValue: String, nsURI: String, sdoc: SchemaDocument) = { |
| val dfv = new DFDLDefineVariable( |
| <dfdl:defineVariable name={ theName } type={ theType } defaultValue={ theDefaultValue } xmlns:xs={ XMLUtils.XSD_NAMESPACE.toString }/>, sdoc) { |
| override lazy val namespace = NS(nsURI) |
| override lazy val targetNamespace = NS(nsURI) |
| } |
| dfv |
| } |
| |
| private lazy val schemaDocForGlobalVars = { |
| // |
| // OOLAG no longer catches broad classes of exceptions like index out of bounds |
| // |
| // This avoids OOLAG masking what are coding errors and disguising them as some sort of |
| // error in the DFDL schema; however, it also requires that attributes that are |
| // evaluated to determine if there is an error in an object (and to force gathering of diagnostics) |
| // cannot assume that other data structures are correct. |
| // |
| // In this case, if the schema document isn't valid, then there won't even be |
| // any schemas or schemaDocuments, so we'll index-out-of-bounds, and OOLAG |
| // won't suppress that. So we code defensively. |
| // |
| assuming(schemas.length > 0) |
| assuming(schemas(0).schemaDocuments.length > 0) |
| this.schemas(0).schemaDocuments(0) |
| } |
| |
| // We'll declare these here at the SchemaSet level since they're global. |
| lazy val predefinedVars = { |
| val nsURI = XMLUtils.DFDL_NAMESPACE.toStringOrNullIfNoNS |
| |
| val encDFV = generateDefineVariable("encoding", "xs:string", "UTF-8", nsURI, schemaDocForGlobalVars) |
| val boDFV = generateDefineVariable("byteOrder", "xs:string", "bigEndian", nsURI, schemaDocForGlobalVars) |
| val binDFV = generateDefineVariable("binaryFloatRep", "xs:string", "ieee", nsURI, schemaDocForGlobalVars) |
| val outDFV = generateDefineVariable("outputNewLine", "xs:string", "%LF;", nsURI, schemaDocForGlobalVars) |
| |
| Seq(encDFV, boDFV, binDFV, outDFV) |
| } |
| |
| /** |
| * Determines if any of the externally defined variables |
| * were specified expecting Daffodil to figure out the |
| * namespace. If so, Daffodil attempts to guess the |
| * namespace and will SDE if there is any ambiguity. |
| * |
| * @param allDefinedVariables The list of all DFDLDefineVariables in the SchemaSet. |
| * |
| * @return A list of external variables updated with any found namespaces. |
| */ |
| private def resolveExternalVariableNamespaces(allDefinedVariables: Seq[DFDLDefineVariable]) = { |
| val finalExternalVariables: scala.collection.mutable.Queue[Binding] = scala.collection.mutable.Queue.empty |
| |
| val extVarsWithoutNS = externalVariables.filterNot(b => b.hasNamespaceSpecified) |
| |
| val extVarsWithNS = externalVariables.filter(b => b.hasNamespaceSpecified) |
| |
| extVarsWithNS.foreach(b => finalExternalVariables.enqueue(b)) |
| |
| extVarsWithoutNS.foreach(v => { |
| Assert.invariant(v.varQName.namespace.isUnspecified) |
| val matchingDVs = allDefinedVariables.filter { dv => |
| // just compare local names. We're searching for an unambiguous match |
| v.varQName.local == dv.namedQName.local |
| } |
| |
| matchingDVs.length match { |
| case 0 => this.SDE("Could not find the externally defined variable %s.", v.varQName) |
| case x: Int if x > 1 => |
| this.SDE("The externally defined variable %s is ambiguous. " + |
| "A namespace is required to resolve the ambiguity.\nFound:\t%s", |
| v.varQName, matchingDVs.mkString(", ")) |
| case _ => // This is OK, we have exactly 1 match |
| } |
| |
| val newNS = matchingDVs.head.namespace |
| val newBinding = Binding(v.varQName.local, Some(newNS), v.varValue) |
| finalExternalVariables.enqueue(newBinding) |
| }) |
| finalExternalVariables |
| } |
| |
| override def variableMap = LV('variableMap) { |
| val dvs = allSchemaDocuments.flatMap { _.defineVariables } |
| val alldvs = dvs.union(predefinedVars) |
| val vmap = VariableMapFactory.create(alldvs) |
| |
| // At this point we want to try to figure out which, if any, external |
| // variables did not have a namespace specified. |
| val finalExternalVariables = resolveExternalVariableNamespaces(alldvs) |
| |
| val finalVMap = |
| ExternalVariablesLoader.loadVariables(finalExternalVariables, this, vmap) |
| |
| finalVMap |
| }.value |
| |
| } |
| |
| class ValidateSchemasErrorHandler(sset: SchemaSet) extends org.xml.sax.ErrorHandler { |
| |
| def warning(exception: org.xml.sax.SAXParseException) = { |
| val sdw = new SchemaDefinitionWarning(sset.schemaFileLocation, "Warning loading schema due to %s", exception) |
| sset.warn(sdw) |
| } |
| |
| def error(exception: org.xml.sax.SAXParseException) = { |
| val sde = new SchemaDefinitionError(sset.schemaFileLocation, "Error loading schema due to %s", exception) |
| sset.error(sde) |
| } |
| |
| def fatalError(exception: org.xml.sax.SAXParseException) = this.error(exception) |
| } |