blob: 3eefc1c0e834a806dc94a73597c6e284dfd8fa57 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.daffodil.dsom
import scala.runtime.ScalaRunTime.stringOf
import org.apache.daffodil.api.UnqualifiedPathStepPolicy
import org.apache.daffodil.api.WarnID
import org.apache.daffodil.dpath.DState
import org.apache.daffodil.dpath.NodeInfo
import org.apache.daffodil.dpath.NodeInfo.PrimType
import org.apache.daffodil.exceptions.HasSchemaFileLocation
import org.apache.daffodil.exceptions.SchemaFileLocation
import org.apache.daffodil.infoset.DataValue
import org.apache.daffodil.processors.ParseOrUnparseState
import org.apache.daffodil.processors.Suspension
import org.apache.daffodil.processors.TypeCalculatorCompiler.TypeCalcMap
import org.apache.daffodil.processors.VariableMap
import org.apache.daffodil.util.Delay
import org.apache.daffodil.util.Maybe
import org.apache.daffodil.util.MaybeULong
import org.apache.daffodil.util.PreSerialization
import org.apache.daffodil.xml.NS
import org.apache.daffodil.xml.NamedQName
import org.apache.daffodil.xml.NoNamespace
import org.apache.daffodil.xml.StepQName
trait ContentValueReferencedElementInfoMixin {
def contentReferencedElementInfos: Set[DPathElementCompileInfo]
def valueReferencedElementInfos: Set[DPathElementCompileInfo]
* For the DFDL path/expression language, this provides the place to
* type check the expression (SDE if not properly typed)
* and provides the opportunity to compile it for efficient evaluation.
* The schemaNode is the schema component
* where the path is being evaluated which due to scoping, may not
* be the same one where it is defined. It is the combination of a
* property valued expression with a schema node that defines
* an evaluation of an expression.
* TODO: Consider - that an expression could be constant in some contexts, not others.
* E.g., if a DFDL schema defines a format where the delimiters are in a header record,
* then those are constant once you are parsing the body records. This does imply
* keeping around the xpath compiler at runtime, which may not be desirable from a
* code size perspective. Whether it's worth it to compile or not is also a question
* of how often each xpath will be repeated.
* TODO: provide enough scope information for this to optimize.
abstract class CompiledExpression[+T <: AnyRef](
val qName: NamedQName,
value: AnyRef)
extends ContentValueReferencedElementInfoMixin with Serializable {
final def toBriefXML(depth: Int = -1) = {
"'" + prettyExpr + "'"
* Note use of the `stringOf(v)` below.
* Turns out `x.toString` creates some crappy printed representations,
* particularly for `Array[Byte]`. It prints a useless thing like "[@0909280".
* Use of `stringOf` prints "Array(....)".
lazy val prettyExpr = stringOf(value)
* Tells us if the expression is the constant empty string (that is, it is "").
final lazy val isConstantEmptyString = value == ""
* Tells us if the expression can match the empty string. We know it can if the expression
* is a DFDL entity like %ES; or %WSP*. We do not know whether it can if it is a more
* complicated constant or runtime expression.
final lazy val isKnownCanMatchEmptyString = value == "%ES;" || value == "%WSP*;"
* used to obtain a constant value.
* isConstant must be true or this will throw.
@deprecated("Code should just call evaluate(...) on an Evaluatable object.", "2016-02-18")
def constant: T
def isConstant: Boolean
def evaluate(state: ParseOrUnparseState): T
def run(dstate: DState): Unit
* The target type of the expression. This is the type that we want the expression to create.
def targetType: NodeInfo.Kind
* Note that since we can reference variables, and those might never have been read,
* the act of evaluating them changes the variableMap state potentially.
* Use for outputValueCalc.
* The whereBlockedLocation is modified via its block(...) method to indicate where the
* expression blocked (for forward progress checking).
def evaluateForwardReferencing(state: ParseOrUnparseState, whereBlockedLocation: Suspension): Maybe[T]
override def toString(): String = "CompiledExpression(" + value.toString + ")"
object ReferencedElementInfos {
val None = Set.empty.asInstanceOf[Set[DPathElementCompileInfo]]
final case class ConstantExpression[+T <: AnyRef](
qn: NamedQName,
kind: NodeInfo.Kind,
value: T) extends CompiledExpression[T](qn, value) {
def targetType = kind
lazy val sourceType: NodeInfo.Kind = NodeInfo.fromObject(value)
override def evaluate(state: ParseOrUnparseState) = value
def evaluate(dstate: DState, state: ParseOrUnparseState) = {
override def run(dstate: DState) = dstate.setCurrentValue(DataValue.unsafeFromAnyRef(value))
final def evaluateForwardReferencing(state: ParseOrUnparseState, whereBlockedLocation: Suspension): Maybe[T] = {
// whereBlockedLocation is ignored since a constant expression cannot block.
def expressionEvaluationBlockLocation = MaybeULong.Nope
def constant: T = value
def isConstant = true
override def contentReferencedElementInfos = ReferencedElementInfos.None
override def valueReferencedElementInfos = ReferencedElementInfos.None
* This class is to contain only things that are needed to do
* DPath Expression Compilation. Nothing else.
* This exists because some things have to be compiled (e.g., DPath expressions)
* which then become part of the runtime data for elements or other.
* It becomes circular if all the information is bundled together on the
* RuntimeData or ElementRuntimeData objects. So we split out
* everything needed to compile expressions will get computed separately
* (first), and kept on this object, and then subsequently ERD data
* structures are created which reference these.
* In other words, it's just necessary layering of the different
* phases of computation.
* Some of this dependency is artificial. If every individual attribute was
* computed separately, none bundled together in common data structures,
* AND everything was computed lazily, then this would probably all
* just sort itself out and not be circular. What makes the circularity
* is that the runtime data structures (ElementRuntimeData in particular),
* are not lazy. Everything part of them is forced to be evaluated when those are
* constructed. So anything that needs even one member of an ERD
* is artificially dependent on *everything* in the ERD.
* Similarly these DPath compiler data structures.... anything that depends on them
* is artificially dependent on ALL of their members's values.
* So the separation of DPath compiler info from runtime data structures is
* really as close as we get in Daffodil to organizing the compilation of schemas
* into "passes".
class DPathCompileInfo(
parentsDelay: Delay[Seq[DPathCompileInfo]],
val variableMap: VariableMap,
val namespaces: scala.xml.NamespaceBinding,
val path: String,
override val schemaFileLocation: SchemaFileLocation,
val unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy,
typeCalcMapArg: TypeCalcMap)
extends ImplementsThrowsSDE
with PreSerialization
with HasSchemaFileLocation {
def initialize: Unit = {
* This "parents" val is a backpointer to all DPathCompileInfo's that
* reference this DPathCompileInfo. The problem with this is that when
* elements are shared, these backpointers create a highly connected graph
* that requires a large stack to serialize using the default java
* serialization as it jumps around parents and children. To avoid this large
* stack requirement, we make the parents backpointer transient. This
* prevents jumping back up to parents during serialization and results in
* only needing a stack depth relative to the schema depth. Once all that
* serialization is completed and all the DPathCompileInfo's are serialized,
* we then manually traverse all the DPathCompileInfo's again and serialize
* the parent sequences (via the serailizeParents method). Because all the
* DPathCompileInfo's are already serialized, this just serializes the
* Sequence objects and the stack depth is again relative to the schema
* depth.
lazy val parents = parentsDelay.value
def serializeParents(oos: Unit = {
def deserializeParents(ois: Unit = {
val deserializedParents = ois.readObject().asInstanceOf[Seq[DPathCompileInfo]]
// Set the parents field via reflection so that it can be a val rather than a var
val clazz = this.getClass
val parentsField = try {
} catch {
case e: java.lang.NoSuchFieldException =>
parentsField.set(this, deserializedParents) // set the value to the deserialized value
* This map(identity) pattern appears to work around an unidentified bug with serialization.
lazy val typeCalcMap: TypeCalcMap =
def diagnosticDebugName = path
final private def writeObject(out: Unit = serializeObject(out)
override def toString = "DPathCompileInfo(%s)".format(path)
* The contract here supports the semantics of ".." in paths.
* First we establish the invariant of being on an element. If the
* schema component is an element we're there. Otherwise we move
* outward until we are an element. If there isn't one we return None
* Then we move outward to the enclosing element - and if there
* isn't one we return None. (Which most likely will lead to an SDE.)
* The map(identity) is used work around a bug related to serialization of Lists
* and the SerializationProxy object.
final lazy val enclosingElementCompileInfos: Seq[DPathElementCompileInfo] = {
val eci = elementCompileInfos.flatMap {
val res = eci.flatMap {
* The contract here supports the semantics of "." in paths.
* If this is an element we're done. If not we move outward
* until we reach an enclosing element.
* This is used because paths refer to elements, so we have to
* walk upward until we get elements. At that point we can
* then navigate element to element.
* The map(identity) is used work around a bug related to serialization of
* Lists and the SerializationProxy object.
final lazy val elementCompileInfos: Seq[DPathElementCompileInfo] = {
this match {
case e: DPathElementCompileInfo => Seq(e)
case d: DPathCompileInfo => {
val eci = d.parents
eci flatMap { ci => ci.elementCompileInfos }
* This class is to contain only things that are needed to do
* DPath Expression Compilation. Nothing else.
* This exists because some things have to be compiled (e.g., DPath expressions)
* which then become part of the runtime data for elements or other.
* It becomes circular if all the information is bundled together on the
* RuntimeData or ElementRuntimeData objects. So we split out
* everything needed to compile expressions will get computed separately
* (first), and kept on this object, and then subsequently ERD data
* structures are created which reference these.
class DPathElementCompileInfo
parentsDelay: Delay[Seq[DPathElementCompileInfo]],
// parentsArg is a transient due to serialization order issues,
// there is no delay/lazy/by-name involvement here.
variableMap: VariableMap,
// This next arg must be a Delay as we're creating a circular
// structure here. Element's compile info points down to their children. Children
// point back to their parents, which may be multiple parents if they are shared.
// We choose a Delay object not call-by-name because it has features to enable
// the GC to collect the closure objects that are created from the calling context
// to realize the by-name arg expression.
elementChildrenCompileInfoDelay: Delay[Seq[DPathElementCompileInfo]],
namespaces: scala.xml.NamespaceBinding,
path: String,
val name: String,
val isArray: Boolean,
val namedQName: NamedQName,
val optPrimType: Option[PrimType],
sfl: SchemaFileLocation,
override val unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy,
typeCalcMap: TypeCalcMap,
val sscd: String,
val isOutputValueCalc: Boolean)
extends DPathCompileInfo(
variableMap, namespaces, path, sfl,
typeCalcMap) {
* Cyclic objects require initialization
override lazy val initialize: Unit = {
override def serializeParents(oos: Unit = {
elementChildrenCompileInfo.foreach { _.serializeParents(oos) }
override def deserializeParents(ois: Unit = {
elementChildrenCompileInfo.foreach { _.deserializeParents(ois) }
lazy val elementChildrenCompileInfo = elementChildrenCompileInfoDelay.value
override def preSerialization: Any = {
final def typeNode: NodeInfo.Kind =
if (optPrimType.isDefined) optPrimType.get
else NodeInfo.Complex
* Stores whether or not this element is used in any path step expressions
* during schema compilation. Note that this needs to be a var since its
* value is determined during DPath compilation, which requires that the
* DPathElementCompileInfo already exists. So this must be a mutable value
* that can be flipped during schema compilation.
* Note that in the case of multiple child element decls with the same name,
* we must make sure ALL of them get this var set.
* This is done on the Seq returned when findNameMatches is called.
var isReferencedByExpressions = false
override def toString = "DPathElementCompileInfo(%s)".format(name)
final private def writeObject(out: Unit = serializeObject(out)
final lazy val rootElement: DPathElementCompileInfo =
if (elementCompileInfos.isEmpty) this
else if (enclosingElementCompileInfos.isEmpty) this
* Marks compile info that element is referenced by an expression //
* We must indicate for all children having this path step as their name
* that they are referenced by expression. Expressions that end in such
* a path step are considered "query style" expressions as they may
* return more than one node, which DFDL v1.0 doesn't allow. (They also may
* not return multiple, as the different path step children could be in
* different choice branches. Either way, we have to indicate that they are
* ALL referenced by this path step.
def indicateReferencedByExpression(matches: Seq[DPathElementCompileInfo]): Unit = {
matches.foreach { info =>
info.isReferencedByExpressions = true
* Finds a child ERD that matches a StepQName. This is for matching up
* path steps (for example) to their corresponding ERD.
* TODO: Must eventually change to support query-style expressions where there
* can be more than one such child.
final def findNamedChild(
step: StepQName,
expr: ImplementsThrowsOrSavesSDE): DPathElementCompileInfo = {
val matches = findNamedMatches(step, elementChildrenCompileInfo, expr)
final def findRoot(
step: StepQName,
expr: ImplementsThrowsOrSavesSDE): DPathElementCompileInfo = {
val matches = findNamedMatches(step, Seq(this), expr)
private def findNamedMatches(step: StepQName, possibles: Seq[DPathElementCompileInfo],
expr: ImplementsThrowsOrSavesSDE): Seq[DPathElementCompileInfo] = {
val matchesERD: Seq[DPathElementCompileInfo] = step.findMatches(possibles)
val retryMatchesERD =
if (matchesERD.isEmpty &&
unqualifiedPathStepPolicy == UnqualifiedPathStepPolicy.PreferDefaultNamespace &&
step.prefix.isEmpty && step.namespace != NoNamespace) {
// we failed to find a match with the default namespace. Since the
// default namespace was assumed but didn't match, the unqualified path
// step policy allows us to try to match NoNamespace elements.
val noNamespaceStep = step.copy(namespace = NoNamespace)
} else {
retryMatchesERD.length match {
case 0 => noMatchError(step, possibles)
case 1 => // ok
case _ => queryMatchWarning(step, retryMatchesERD, expr)
final def findNamedChildren(step: StepQName, possibles: Seq[DPathElementCompileInfo]): Seq[DPathElementCompileInfo] = {
val matchesERD = step.findMatches(possibles)
val retryMatchesERD =
if (matchesERD.isEmpty &&
unqualifiedPathStepPolicy == UnqualifiedPathStepPolicy.PreferDefaultNamespace &&
step.prefix.isEmpty && step.namespace != NoNamespace) {
// we failed to find a match with the default namespace. Since the
// default namespace was assumed but didn't match, the unqualified path
// step policy allows us to try to match NoNamespace elements.
val noNamespaceStep = step.copy(namespace = NoNamespace)
} else {
* Issues a good diagnostic with suggestions about near-misses on names
* like missing prefixes.
final def noMatchError(
step: StepQName,
possibles: Seq[DPathElementCompileInfo] = this.elementChildrenCompileInfo) = {
// didn't find a exact match.
// So all the rest of this is about providing a meaningful
// and helpful diagnostic message.
// Did the local name match at all?
val localOnlyERDMatches = {
val localName = step.local
if (step.namespace == NoNamespace) Nil
else { _.namedQName }.collect {
case localMatch if localMatch.local == localName => localMatch
// If the local name matched, then perhaps the user just forgot
// to put on a prefix.
// We want to suggest use of a prefix that is bound to the
// desired namespace already.. that is from within our current scope
val withStepsQNamePrefixes = { qn =>
val stepPrefixForNS = NS.allPrefixes(qn.namespace, this.namespaces)
val proposedStep = stepPrefixForNS match {
case Nil => qn
case Seq(hd, _*) => StepQName(Some(hd), qn.local, qn.namespace)
val interestingCandidates = { _.toPrettyString }.mkString(", ")
if (interestingCandidates.length > 0) {
"No element corresponding to step %s found,\nbut elements with the same local name were found (%s).\nPerhaps a prefix is incorrect or missing on the step name?",
step.toPrettyString, interestingCandidates)
} else {
// There weren't even any local name matches.
val interestingCandidates = { _.namedQName }.mkString(", ")
if (interestingCandidates != "")
"No element corresponding to step %s found. Possibilities for this step include: %s.",
step.toPrettyString, interestingCandidates)
"No element corresponding to step %s found.",
final def queryMatchWarning(step: StepQName, matches: Seq[DPathElementCompileInfo],
expr: ImplementsThrowsOrSavesSDE) = {
expr.SDW(WarnID.QueryStylePathExpression, "Statically ambiguous or query-style paths not supported in step path: '%s'. Matches are at locations:\n%s",
step,"- ", "\n- ", ""))