blob: 95ce0d04aa907950611500149236a15bba105179 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.daffodil.core.dsom
import java.lang.{ Integer => JInt }
import java.util.UUID
import org.apache.daffodil.core.dsom.walker.TermView
import org.apache.daffodil.core.grammar.TermGrammarMixin
import org.apache.daffodil.lib.api.WarnID
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.Found
import org.apache.daffodil.lib.schema.annotation.props.NotFound
import org.apache.daffodil.lib.schema.annotation.props.SeparatorSuppressionPolicy
import org.apache.daffodil.lib.schema.annotation.props.gen.LengthKind
import org.apache.daffodil.lib.schema.annotation.props.gen.OccursCountKind
import org.apache.daffodil.lib.schema.annotation.props.gen.YesNo
/**
* Mixin for objects that are shared, but have consistency checks to be run
* that are based on the concrete Term objects they are associated with.
*
* E.g., DFDL statements may have checks that need to know the encoding
* (if it is known at compile time). We call this on each statement to enable
* the checking code to be expressed on that statement where it is relevant,
* but have it be callable from the concrete Term once it is created.
*
* This is a way to avoid use of backpointers from shared objects to every
* thing referencing them.
*/
trait HasTermCheck {
/**
* Perform checking of an object against the supplied Term arg.
*/
final def checkTerm(term: Term): Unit = {
//
// This public method calling a protected method lets us play tricks
// in the future to avoid repeated check calls by memoizing the
// results.
//
check(term)
}
/**
* Override to perform necessary checks that require information about the
* concrete Term.
*
* This avoids the need for the checking code to have a backpointer to the
* Term.
*/
protected def check(term: Term): Unit = {
// by default this does nothing.
}
}
/**
* Term, and what is and isn't a Term, is a key concept in DSOM.
*
* From the kinds of elements, ElementRef and LocalElementDecl are Term. A GlobalElementDecl is *not* a Term.
* From the kinds of sequences, LocalSequence and SequenceGroupRef are Term. GlobalSequenceGroupDef is *not* a Term.
* From the kinds of choices, Choice and ChoiceGroupRef are Term. GlobalChoiceGroupDef is *not* a Term.
*
* Terms are the things we actually generate parsers/unparsers for. Non-Terms just
* contribute information used by Terms.
*/
trait Term
extends AnnotatedSchemaComponent
with ResolvesScopedProperties
with ResolvesDFDLStatementMixin
with TermRuntimeValuedPropertiesMixin
with TermGrammarMixin
with DelimitedRuntimeValuedPropertiesMixin
with InitiatedTerminatedMixin
with TermEncodingMixin
with EscapeSchemeRefMixin
with TermView {
requiredEvaluationsIfActivated(annotationObjs)
requiredEvaluationsIfActivated(nonDefaultPropertySources)
requiredEvaluationsIfActivated(defaultPropertySources)
requiredEvaluationsIfActivated(termChecks)
private lazy val termChecks = {
statements.foreach { _.checkTerm(this) }
}
/**
* Abbreviation analogous to trd, tci is the compile-time counterpart.
*/
final def tci = dpathCompileInfo
/**
* Used to recursively go through Terms and look for DFDL properties that
* have not been accessed and record it as a warning. This function uses the
* property cache state to determine which properties have been access, so
* this function must only be called after all property accesses are complete
* (e.g. schema compilation has finished) to ensure there are no false
* positives.
*/
final lazy val checkUnusedProperties: Unit = {
// Get the properties defined on this term and what it refers to
val localProps = formatAnnotation.justThisOneProperties
val refProps = optReferredToComponent
.map { _.formatAnnotation.justThisOneProperties }
.getOrElse(Map.empty)
val usedProperties = propCache
localProps.foreach { case (prop, (value, _)) =>
if (!usedProperties.contains(prop)) {
SDW(WarnID.IgnoreDFDLProperty, "DFDL property was ignored: %s=\"%s\"", prop, value)
}
}
refProps.foreach { case (prop, (value, _)) =>
if (!usedProperties.contains(prop)) {
optReferredToComponent.get.SDW(
WarnID.IgnoreDFDLProperty,
"DFDL property was ignored: %s=\"%s\"",
prop,
value
)
}
}
termChildren.foreach { _.checkUnusedProperties }
}
def position: Int
def optIgnoreCase: Option[YesNo] = {
val ic = findPropertyOption("ignoreCase")
ic match {
case Found(value, location, _, _) => Some(YesNo(value, location))
case _ => None
}
}
/**
* A scalar means has no dimension. Exactly one occurrence.
*
* Since terms include both model groups and elements, in DFDL v1.0,
* model groups are always scalar, as DFDL v1.0 doesn't allow min/max
* occurs on model groups.
*/
def isScalar: Boolean
/**
* Determines if the element is optional, as in has zero or one instance only.
*
* There are two senses of optional
*
* 1) Optional as in "might not be present" but for any reason.
* Consistent with this is Required meaning must occur but for any
* reason. So all the occurrences of an array that has fixed number of
* occurrences are required, and some of the occurrences of an array
* that has a variable number of occurrences are optional.
*
* 2) Optional is in minOccurs="0" maxOccurs="1".
*
* Consistent with (2) is defining array as maxOccurs >= 2, and Required
* as minOccurs=maxOccurs=1, but there are also special cases for occursCountKind parsed and stopValue
* since they don't examine min/max occurs - they are only used for validation
* in those occursCountKinds.
*
* The DFDL spec is not entirely consistent here either I don't believe.
*/
override def isOptional: Boolean
/**
* An array can have more than 1 occurrence.
*
* An optional element (minOccurs=0, maxOccurs=1) is an array only
* if occursCountKind is parsed, because then the max/min are ignored.
*/
override def isArray: Boolean
def elementChildren: Seq[ElementBase]
/**
* An integer which is the alignment of this term. This takes into account the
* representation, type, charset encoding and alignment-related properties.
*/
def alignmentValueInBits: JInt
/**
* True if this term is known to have some text aspect. This can be the value, or it can be
* delimiters.
* <p>
* False only if this term cannot ever have text in it. Example: a sequence with no delimiters.
* Example: a binary int with no delimiters.
* <p>
* Note: this is not recursive - it does not roll-up from children terms.
* TODO: it does have to deal with the prefix length situation. The type of the prefix
* may be textual.
* <p>
* Override in element base to take simple type or prefix length situations into account
*/
lazy val couldHaveText = hasDelimiters
// TODO: if we add recursive types capability to DFDL this will have to change
// but so will many of these compiler passes up and down through the DSOM objects.
/**
* The termChildren are the children that are Terms, i.e., derived from the Term
* base class. This is to make it clear
* we're not talking about the XML structures inside the XML parent (which might
* include annotations, etc.
*
* For elements this is Nil for simple types, a single model group for
* complex types. For model groups there can be more children.
*/
def termChildren: Seq[Term]
final val tID = UUID.randomUUID()
final lazy val isRepresented = this match {
case eb: ElementBase => {
val isRep = eb.inputValueCalcOption.isInstanceOf[NotFound]
if (!isRep) {
if (isOptional) {
SDE("inputValueCalc property can not appear on optional elements")
}
if (!isScalar) {
SDE("inputValueCalc property can not appear on array elements")
}
}
isRep
}
case _ => true
}
/**
* Answers whether this term appears anywhere inside an unordered sequence.
*
* A term can be in both ordered and unordered sequences if it appears in a global
* sequence def, which has refs to it that specify sequenceKind='ordered' on some
* and sequenceKind='unordered' on others of the group refs.
*/
final lazy val isEverInUnorderedSequence: Boolean = {
optLexicalParent
.map {
case s: SequenceTermBase => !s.isOrdered
case gsd: GlobalSequenceGroupDef => {
gsd.schemaSet.root.groupRefsTo(gsd).exists { case sgr: SequenceGroupRef =>
!sgr.isOrdered
}
}
case c: ChoiceDefMixin => false
case ct: ComplexTypeBase => false
case x => Assert.invariantFailed("Unexpected lexical parent: " + x)
}
.getOrElse(false)
}
final lazy val immediatelyEnclosingGroupDef: Option[GroupDefLike] = {
optLexicalParent.flatMap { lexicalParent =>
val res: Option[GroupDefLike] = lexicalParent match {
case c: Choice => Some(c)
//
// skip past the implied sequence that is wrapped around choice branches
// to the actual choice
//
case c: ChoiceBranchImpliedSequence => c.immediatelyEnclosingGroupDef
case s: LocalSequence => Some(s)
case d: SchemaDocument => {
// we must be the Root elementRef or a quasi node
Assert.invariant(this.isInstanceOf[Root] || this.isInstanceOf[QuasiElementDeclBase])
None
}
case gdd: GlobalGroupDef => Some(gdd)
case ctd: ComplexTypeBase => None
case rt: RepTypeQuasiElementDecl => rt.immediatelyEnclosingGroupDef
case std: SimpleTypeBase => None
case _ =>
Assert.invariantFailed(
"immediatelyEnclosingModelGroup called on " + this + " with lexical parent " + lexicalParent
)
}
res
}
}
lazy val immediatelyEnclosingModelGroup: Option[ModelGroup] =
immediatelyEnclosingGroupDef.flatMap {
_ match {
case mg: ModelGroup => Some(mg)
case _ => None
}
}
/**
* Prior using one-based position in the enclosing lexical sequence.
*
* Nil if enclosed by a choice def, or this is the root.
*/
final lazy val priorSiblings = {
optLexicalParent match {
case Some(stb: SequenceTermBase) => stb.groupMembers.take(position - 1)
case Some(gsgd: GlobalSequenceGroupDef) => gsgd.groupMembers.take(position - 1)
case _ => Nil
}
}
/**
* Siblings after this in the lexically enclosing group.
*
* Nil if enclosed by a choice def, or this is the root.
*/
final lazy val laterSiblings = {
optLexicalParent match {
case Some(stb: SequenceTermBase) => stb.groupMembers.drop(position)
case Some(gsgd: GlobalSequenceGroupDef) => gsgd.groupMembers.drop(position)
case _ => Nil
}
}
/**
* Siblings (including self) in the lexically enclosing sequence def.
*
* Nil if enclosed by a choice def, or this is the root.
*/
final lazy val allSiblings = {
optLexicalParent match {
case Some(stb: SequenceTermBase) => stb.groupMembers
case Some(gsgd: GlobalSequenceGroupDef) => gsgd.groupMembers
case _ => Nil
}
}
// final lazy val laterElementSiblings = laterSiblings.collect { case elt: ElementBase => elt }
final lazy val priorSibling = priorSiblings.lastOption
final lazy val nextSibling = laterSiblings.headOption
/**
* Does this term have always have statically required instances in the data stream.
*
* This excludes elements that have no representation e.g., elements with dfdl:inputValueCalc.
*
* Terms that are optional either via element having zero occurrences, or via a choice branch
* fail this test.
*/
def hasStaticallyRequiredOccurrencesInDataRepresentation: Boolean
/**
* True if the term has some syntax itself or recursively within itself that
* must appear in the data stream.
*
* False only if the term has possibly no representation whatsoever in the
* data stream.
*/
def hasKnownRequiredSyntax: Boolean
/**
* Can have a varying number of occurrences.
*
* Overridden for elements. See [[ParticleMixin.isVariableOccurrences]]
*/
def isVariableOccurrences: Boolean = false
/**
* The concept of potentially trailing is defined in the DFDL specification.
*
* This concept applies to terms that are direct children of a sequence only.
*
* It is true for terms that may be absent from the representation, but furthermore, may be last
* in a sequence, so that the notion of whether they are trailing, and so their separator may not be
* present, is a relevant issue.
*
* If an element is an array, and has some required instances, then it is not potentially trailing, as some
* instances will have to appear, with separators.
*
* This concept applies only to elements and model groups that have representation in the data stream.
*
* Previously there was a misguided notion that since only DFDL elements can have minOccurs/maxOccurs
* that this notion of potentially trailing didn't apply to model groups. (Sequences and Choices, the other
* kind of Term). But this is not the case.
*
* A sequence/choice which has no framing, and whose content doesn't exist - no child elements, any contained
* model groups recursively with no framing and no content - such a model group effectively "dissapears" from
* the data stream, and in some cases need not have a separator.
*
* This is computed by way of couldBePotentiallyTrailing. This value means that the term, in isolation, looking only
* at its own characteristics, disregarding its following siblings in any given sequence, has the characteristics
* of being potentially trailing.
*
* Then that is combined with information about following siblings in a sequence to determine if a given term, that
* is a child of a sequence, is in fact potentially trailing within that sequence.
*
* These two concepts are mutually recursive, since a sequence that is entirely composed of potentially trailing children
* satisfies couldBePotentialyTrailing in whatever sequence encloses it.
*/
final lazy val isPotentiallyTrailing: Boolean = {
val thisCouldBe = couldBePotentiallyTrailing
lazy val laterSibilingsAre = laterSiblings.forall { _.isPotentiallyTrailing }
val res = thisCouldBe && laterSibilingsAre
res
}
final lazy val couldBePotentiallyTrailing: Boolean = {
import SeparatorSuppressionPolicy._
this match {
case e: ElementBase => {
lazy val allowsZeroOccurs = e.minOccurs == 0
lazy val minOccursNotZeroButDeclaredLast =
!e.isScalar && e.minOccurs > 0 && e.isLastDeclaredRepresentedInSequence
lazy val hasAllowedOCK = (e.occursCountKind eq OccursCountKind.Implicit) ||
(e.occursCountKind eq OccursCountKind.Parsed)
lazy val hasAllowedLengthKind = e.lengthKind eq LengthKind.Delimited
lazy val hasNoDiscriminators = !statements.exists { s =>
s.isInstanceOf[DFDLDiscriminator]
}
val res =
isRepresented &&
(allowsZeroOccurs ||
minOccursNotZeroButDeclaredLast) &&
hasAllowedOCK &&
hasAllowedLengthKind &&
hasNoDiscriminators
res
}
case m: ModelGroup => {
lazy val seqIsNotSSPNever = m match {
case s: SequenceTermBase =>
s.hasSeparator &&
(s.separatorSuppressionPolicy match {
case TrailingEmpty => true
case TrailingEmptyStrict => true
case AnyEmpty => true
case Never => false
})
case _ => true
}
lazy val hasNoStatements = statements.length == 0
lazy val recursivelyOk =
m.representedMembers.forall { m =>
m.couldBePotentiallyTrailing
}
val res =
isRepresented &&
!m.hasFraming &&
seqIsNotSSPNever &&
hasNoStatements &&
recursivelyOk
res
}
}
}
/**
* Returns a list of sibling terms that could appear before this.
*
* Uses only lexically enclosing group/groupDef
*
* Nil if enclosed by a choice or this is root.
*/
lazy val potentialPriorTerms: Seq[Term] = LV('potentialPriorTerms) {
potentialPriorTermsDef
}.value
private def potentialPriorTermsDef = {
optLexicalParent.toSeq.flatMap { lp =>
lp match {
case sq: SequenceTermBase if !sq.isOrdered =>
sq.groupMembers.filter { _.isRepresented }
case sq: SequenceDefMixin => {
val psibs = priorSiblings
val representedPriorSiblings = psibs.filter { _.isRepresented }
val (optionalPotentialPriorReversed, requiredPotentialPriorReversed) =
representedPriorSiblings.reverse.toStream.span { sib =>
sib match {
case eb: ElementBase if eb.isScalar =>
false
case _ =>
true
}
}
val optionalPotentialPrior = optionalPotentialPriorReversed.reverse
val firstNonOptional = requiredPotentialPriorReversed.headOption
optionalPotentialPrior ++ firstNonOptional
}
case _ => Nil
}
}
}
/*
* This function returns a boolean if the values of the term can be figured
* out during unparsing or if they don't need to appear in the infoset at all.
*
* Usually this means the term/its descendants have a default value (i.e defaultable),
* have defined dfdl:outputValueCalc, or are optional (minOccurs=0)
*
* Note that this currently only requires OVC and Optionality since defaults
* aren't fully implemented everywhere. This function may need to change when
* defaults are fully implemented.
*/
lazy val canUnparseIfHidden: Boolean = {
val res = this match {
case s: SequenceTermBase => {
s.groupMembers.forall { member =>
val res = member.canUnparseIfHidden
res
}
}
case c: ChoiceTermBase => {
c.groupMembers.exists { _.canUnparseIfHidden }
}
case e: ElementBase if e.isComplexType => {
e.complexType.group.canUnparseIfHidden
}
case e: ElementBase => {
!e.isRepresented || e.canBeAbsentFromUnparseInfoset
}
}
res
}
/**
* True if this term is the last one in the enclosing sequence that is represented
* in the data stream. That is, it is not an element with dfdl:inputValueCalc.
*
* This means whether the enclosing sequence's separator (if one is defined) is
* relevant.
*/
final lazy val isLastDeclaredRepresentedInSequence = {
val res = laterSiblings.forall(!_.isRepresented)
res
}
final protected lazy val realChildren: Seq[Term] = {
this match {
case mg: ModelGroup => mg.groupMembers.asInstanceOf[Seq[Term]]
case eb: ElementBase if (eb.isComplexType) => Seq(eb.complexType.group)
case eb: ElementBase => Seq()
}
}
}