daffodil-runtime1/src/main/scala/edu/illinois/ncsa/daffodil/processors/Parser.scala - daffodil - Git at Google

 /* Copyright (c) 2012-2015 Tresys Technology, LLC. All rights reserved.
  *
  * Developed by: Tresys Technology, LLC
  *               http://www.tresys.com
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy of
  * this software and associated documentation files (the "Software"), to deal with
  * the Software without restriction, including without limitation the rights to
  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
  *
  *  1. Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimers.
  *
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimers in the
  *     documentation and/or other materials provided with the distribution.
  *
  *  3. Neither the names of Tresys Technology, nor the names of its contributors
  *     may be used to endorse or promote products derived from this Software
  *     without specific prior written permission.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
  * SOFTWARE.
  */

 package edu.illinois.ncsa.daffodil.processors

 import edu.illinois.ncsa.daffodil.api.Diagnostic
 import edu.illinois.ncsa.daffodil.dsom.RuntimeSchemaDefinitionError
 import edu.illinois.ncsa.daffodil.exceptions.Assert
 import edu.illinois.ncsa.daffodil.util._
 import Maybe._
 import edu.illinois.ncsa.daffodil.api._

 object Processor {
   /**
    * This initialize routine is really part of compiling the DFDL schema so should be called from there.
    * This insures that it is not, for example, being multi-threaded. If called from the runtime module, compilation might
    * actually end up happening at run time.
    */
   def initialize(proc: Processor) {
     ensureCompiled(proc)
   }

   private def ensureCompiled(proc: Processor) {
     if (!proc.isInitialized) {
       proc.isInitialized = true
       proc.runtimeDependencies.foreach { ensureCompiled }
       proc.childProcessors.foreach { ensureCompiled }
     }
   }

   private def ensureCompiled(ev: EvaluatableBase[AnyRef]) {
     ev.ensureCompiled
     ev.runtimeDependencies.foreach { ensureCompiled }
   }
 }

 trait Processor
   extends ToBriefXMLImpl
   with Logging
   with Serializable {
   // things common to both unparser and parser go here.
   def context: RuntimeData
   def childProcessors: Seq[Processor]
   def runtimeDependencies: Seq[Evaluatable[AnyRef]]

   var isInitialized: Boolean = false
 }

 trait PrimProcessor extends Processor {
   override def childProcessors: Seq[Processor] = Nil
 }

 /**
  * This mixin for setting up all the characteristics of charset encoding
  */
 trait TextParserUnparserRuntimeBase {

   final protected def setupEncoding(state: ParseOrUnparseState, erd: TermRuntimeData) {
     if (state.dataStream.isEmpty) return
     val encInfo = erd.encodingInfo
     val dis = state.dataStream.get
     dis.setMaybeUTF16Width(encInfo.optionUTF16Width)
     dis.setEncodingErrorPolicy(encInfo.defaultEncodingErrorPolicy)
   }

 }

 /**
  * This mixin for setting up all the characteristics of charset encoding
  */
 trait TextParserRuntimeMixin extends TextParserUnparserRuntimeBase {

   def context: RuntimeData

   /**
    * This is here to get the reflective scanner to find that this parser is dependent
    * on this decoderEv.
    */
   lazy val deps = Seq(context.asInstanceOf[TermRuntimeData].encodingInfo.charsetEv)
   /**
    * Override this in selected derived classes such as the hexBinary ones in order
    * to force use of specific encodings.
    */
   protected def decoder(state: PState, trd: TermRuntimeData) = trd.encodingInfo.getDecoder(state)

   final protected def setupDecoder(state: PState, erd: TermRuntimeData) {
     setupEncoding(state, erd)
     val dis = state.dataInputStream
     dis.setEncodingErrorPolicy(erd.encodingInfo.defaultEncodingErrorPolicy)
     dis.setDecoder(decoder(state, erd)) // must set after the above since this will compute other settings based on those.
   }

 }

 trait BinaryParserUnparserRuntimeMixin {

   final protected def setupByteOrder(state: ParseOrUnparseState, erd: ElementRuntimeData, byteOrdEv: ByteOrderEv) {
     if (state.dataStream.isEmpty) return
     val dis = state.dataStream.get
     val byteOrd = byteOrdEv.evaluate(state)
     dis.setByteOrder(byteOrd)
     dis.setBitOrder(erd.defaultBitOrder)
   }
 }

 /**
  * Encapsulates lower-level parsing with a uniform interface
  *
  * A parser can have sub-parsers. See also PrimParser which is a parser with no sub-parsers.
  *
  * This trait is preferred (and PrimParser) over the ParserObject because this requires one to
  * explicitly manage runtimeDependencies, whereas ParserObject hides that detail, which isn't helpful.
  */
 trait Parser
   extends Processor {

   protected def parserName = Misc.getNameFromClass(this)

   def PE(pstate: PState, s: String, args: Any*) = {
     pstate.setFailed(new ParseError(One(context.schemaFileLocation), One(pstate.currentLocation), s, args: _*))
   }

   def processingError(state: PState, str: String, args: Any*) =
     PE(state, str, args) // long form synonym

   protected def parse(pstate: PState): Unit

   final def parse1(pstate: PState): Unit = {
     Assert.invariant(isInitialized)
     if (pstate.dataProc.isDefined) pstate.dataProc.get.before(pstate, this)
     parse(pstate)
     if (pstate.dataProc.isDefined) pstate.dataProc.get.after(pstate, this)
   }

   // TODO: other methods for things like asking for the ending position of something
   // which would enable fixed-length formats to skip over data and not parse it at all.

 }

 // Deprecated and to be phased out. Use the trait Parser instead.
 abstract class ParserObject(override val context: RuntimeData)
   extends Parser {

   override lazy val runtimeDependencies = Nil
 }

 /**
  * BriefXML is XML-style output, but intended for specific purposes. It is NOT
  * an XML serialization of the data structure. It's an XML-style string, suitable to
  * manipulate, by people, in XML tooling. E.g., can stick into an XML editor to
  * then get it all indented nicely, use a structure editor to expand/collapse subregions,
  * but it is NOT intended to capture all of the state of the object.
  */
 trait ToBriefXMLImpl {

   private lazy val nom_ : String = Misc.getNameFromClass(this)
   def nom = nom_

   protected def briefXMLAttributes: String = ""

   def childProcessors: Seq[Processor]

   // TODO: make this create a DOM tree, not a single string (because of size limits)
   def toBriefXML(depthLimit: Int = -1): String = {
     val eltStartText = nom + (if (briefXMLAttributes == "") "" else " " + briefXMLAttributes + " ")
     if (depthLimit == 0) "..."
     else if (childProcessors.length == 0) "<" + eltStartText + "/>"
     else {
       val lessDepth = depthLimit - 1
       val sb = new StringBuilder
       childProcessors.foreach {
         cp =>
           val s = cp.toBriefXML(lessDepth)
           if (sb.size < 3000) sb.append(s) // hack!
           else sb.append("...")
       }
       "<" + eltStartText + ">" + sb + "</" + nom + ">"
     }
   }

   override def toString = toBriefXML() // pParser.toString + " ~ " + qParser.toString
 }

 class SeqCompParser(context: RuntimeData, val childParsers: Array[Parser])
   extends ParserObject(context) {

   override def childProcessors = childParsers

   override def nom = "seq"

   val numChildParsers = childParsers.size

   def parse(pstate: PState): Unit = {
     var i: Int = 0
     while (i < numChildParsers) {
       val parser = childParsers(i)
       parser.parse1(pstate)
       if (pstate.status ne Success)
         return
       i += 1
     }
   }

 }

 class AltCompParser(context: RuntimeData, val childParsers: Seq[Parser])
   extends ParserObject(context) {

   override lazy val childProcessors = childParsers

   override def nom = "alt"

   def parse(pstate: PState): Unit = {
     var pBefore: PState.Mark = null

     pstate.pushDiscriminator
     var diagnostics: Seq[Diagnostic] = Nil
     var i = 0
     var parser: Parser = null
     val limit = childParsers.length
     var returnFlag = false
     while (!returnFlag && i < limit) {
       parser = childParsers(i)
       i += 1
       log(LogLevel.Debug, "Trying choice alternative: %s", parser)
       pBefore = pstate.mark
       parser.parse1(pstate)
       if (pstate.status eq Success) {
         log(LogLevel.Debug, "Choice alternative success: %s", parser)
         pstate.discard(pBefore)
         pBefore = null
         returnFlag = true
       } else {
         // If we get here, then we had a failure
         log(LogLevel.Debug, "Choice alternative failed: %s", parser)
         //
         // capture diagnostics
         //
         val diag = new ParseAlternativeFailed(context.schemaFileLocation, pstate, pstate.diagnostics)
         diagnostics = diag +: diagnostics
         //
         // check for discriminator evaluated to true.
         //
         if (pstate.discriminator == true) {
           log(LogLevel.Debug, "Failure, but discriminator true. Additional alternatives discarded.")
           // If so, then we don't run the next alternative, we
           // consume this discriminator status result (so it doesn't ripple upward)
           // and return the failed state with all the diagnostics.
           //
           val allDiags = new AltParseFailed(context.schemaFileLocation, pstate, diagnostics.reverse)
           pstate.discard(pBefore) // because disc set, we don't unwind side effects on input stream & infoset
           pBefore = null
           pstate.setFailed(allDiags)
           returnFlag = true
         } else {
           //
           // Here we have a failure, but no discriminator was set, so we try the next alternative.
           // Which means we just go around the loop
           //
           // But we have to unwind side-effects on input-stream, infoset, variables, etc.
           pstate.reset(pBefore)
           pBefore = null
           returnFlag = false
         }
       }
     }
     Assert.invariant(i <= limit)
     if (returnFlag == false) {
       Assert.invariant(i == limit)
       // Out of alternatives. All of them failed.
       val allDiags = new AltParseFailed(context.schemaFileLocation, pstate, diagnostics.reverse)
       pstate.setFailed(allDiags)
       log(LogLevel.Debug, "All AltParser alternatives failed.")
     }

     Assert.invariant(pBefore eq null)
     pstate.popDiscriminator
   }

 }

 case class DummyParser(rd: RuntimeData) extends ParserObject(null) {
   def parse(pstate: PState): Unit = pstate.SDE("Parser for " + rd + " is not yet implemented.")

   override def childProcessors = Nil
   override def toBriefXML(depthLimit: Int = -1) = "<dummy/>"
   override def toString = if (rd == null) "Dummy[null]" else "Dummy[" + rd + "]"
 }

 case class NotParsableParser(context: ElementRuntimeData) extends Parser {

   def parse(state: PState): Unit = {
     // We can't use state.SDE because that needs the infoset to determine the
     // context. No infoset will exist when this is called, so we'll manually
     // create an SDE and toss it
     val rsde = new RuntimeSchemaDefinitionError(context.schemaFileLocation, state, "This schema was compiled without parse support. Check the parseUnparsePolicy tunable or daf:parseUnparsePolicy property.")
     context.toss(rsde)
   }

   override lazy val childProcessors = Nil
   override lazy val runtimeDependencies = Nil
 }
	/* Copyright (c) 2012-2015 Tresys Technology, LLC. All rights reserved.
	*
	* Developed by: Tresys Technology, LLC
	* http://www.tresys.com
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy of
	* this software and associated documentation files (the "Software"), to deal with
	* the Software without restriction, including without limitation the rights to
	* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	* of the Software, and to permit persons to whom the Software is furnished to do
	* so, subject to the following conditions:
	*
	* 1. Redistributions of source code must retain the above copyright notice,
	* this list of conditions and the following disclaimers.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimers in the
	* documentation and/or other materials provided with the distribution.
	*
	* 3. Neither the names of Tresys Technology, nor the names of its contributors
	* may be used to endorse or promote products derived from this Software
	* without specific prior written permission.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
	* SOFTWARE.
	*/

	package edu.illinois.ncsa.daffodil.processors

	import edu.illinois.ncsa.daffodil.api.Diagnostic
	import edu.illinois.ncsa.daffodil.dsom.RuntimeSchemaDefinitionError
	import edu.illinois.ncsa.daffodil.exceptions.Assert
	import edu.illinois.ncsa.daffodil.util._
	import Maybe._
	import edu.illinois.ncsa.daffodil.api._

	object Processor {
	/**
	* This initialize routine is really part of compiling the DFDL schema so should be called from there.
	* This insures that it is not, for example, being multi-threaded. If called from the runtime module, compilation might
	* actually end up happening at run time.
	*/
	def initialize(proc: Processor) {
	ensureCompiled(proc)
	}

	private def ensureCompiled(proc: Processor) {
	if (!proc.isInitialized) {
	proc.isInitialized = true
	proc.runtimeDependencies.foreach { ensureCompiled }
	proc.childProcessors.foreach { ensureCompiled }
	}
	}

	private def ensureCompiled(ev: EvaluatableBase[AnyRef]) {
	ev.ensureCompiled
	ev.runtimeDependencies.foreach { ensureCompiled }
	}
	}

	trait Processor
	extends ToBriefXMLImpl
	with Logging
	with Serializable {
	// things common to both unparser and parser go here.
	def context: RuntimeData
	def childProcessors: Seq[Processor]
	def runtimeDependencies: Seq[Evaluatable[AnyRef]]

	var isInitialized: Boolean = false
	}

	trait PrimProcessor extends Processor {
	override def childProcessors: Seq[Processor] = Nil
	}

	/**
	* This mixin for setting up all the characteristics of charset encoding
	*/
	trait TextParserUnparserRuntimeBase {

	final protected def setupEncoding(state: ParseOrUnparseState, erd: TermRuntimeData) {
	if (state.dataStream.isEmpty) return
	val encInfo = erd.encodingInfo
	val dis = state.dataStream.get
	dis.setMaybeUTF16Width(encInfo.optionUTF16Width)
	dis.setEncodingErrorPolicy(encInfo.defaultEncodingErrorPolicy)
	}

	}

	/**
	* This mixin for setting up all the characteristics of charset encoding
	*/
	trait TextParserRuntimeMixin extends TextParserUnparserRuntimeBase {

	def context: RuntimeData

	/**
	* This is here to get the reflective scanner to find that this parser is dependent
	* on this decoderEv.
	*/
	lazy val deps = Seq(context.asInstanceOf[TermRuntimeData].encodingInfo.charsetEv)
	/**
	* Override this in selected derived classes such as the hexBinary ones in order
	* to force use of specific encodings.
	*/
	protected def decoder(state: PState, trd: TermRuntimeData) = trd.encodingInfo.getDecoder(state)

	final protected def setupDecoder(state: PState, erd: TermRuntimeData) {
	setupEncoding(state, erd)
	val dis = state.dataInputStream
	dis.setEncodingErrorPolicy(erd.encodingInfo.defaultEncodingErrorPolicy)
	dis.setDecoder(decoder(state, erd)) // must set after the above since this will compute other settings based on those.
	}

	}

	trait BinaryParserUnparserRuntimeMixin {

	final protected def setupByteOrder(state: ParseOrUnparseState, erd: ElementRuntimeData, byteOrdEv: ByteOrderEv) {
	if (state.dataStream.isEmpty) return
	val dis = state.dataStream.get
	val byteOrd = byteOrdEv.evaluate(state)
	dis.setByteOrder(byteOrd)
	dis.setBitOrder(erd.defaultBitOrder)
	}
	}

	/**
	* Encapsulates lower-level parsing with a uniform interface
	*
	* A parser can have sub-parsers. See also PrimParser which is a parser with no sub-parsers.
	*
	* This trait is preferred (and PrimParser) over the ParserObject because this requires one to
	* explicitly manage runtimeDependencies, whereas ParserObject hides that detail, which isn't helpful.
	*/
	trait Parser
	extends Processor {

	protected def parserName = Misc.getNameFromClass(this)

	def PE(pstate: PState, s: String, args: Any*) = {
	pstate.setFailed(new ParseError(One(context.schemaFileLocation), One(pstate.currentLocation), s, args: _*))
	}

	def processingError(state: PState, str: String, args: Any*) =
	PE(state, str, args) // long form synonym

	protected def parse(pstate: PState): Unit

	final def parse1(pstate: PState): Unit = {
	Assert.invariant(isInitialized)
	if (pstate.dataProc.isDefined) pstate.dataProc.get.before(pstate, this)
	parse(pstate)
	if (pstate.dataProc.isDefined) pstate.dataProc.get.after(pstate, this)
	}

	// TODO: other methods for things like asking for the ending position of something
	// which would enable fixed-length formats to skip over data and not parse it at all.

	}

	// Deprecated and to be phased out. Use the trait Parser instead.
	abstract class ParserObject(override val context: RuntimeData)
	extends Parser {

	override lazy val runtimeDependencies = Nil
	}

	/**
	* BriefXML is XML-style output, but intended for specific purposes. It is NOT
	* an XML serialization of the data structure. It's an XML-style string, suitable to
	* manipulate, by people, in XML tooling. E.g., can stick into an XML editor to
	* then get it all indented nicely, use a structure editor to expand/collapse subregions,
	* but it is NOT intended to capture all of the state of the object.
	*/
	trait ToBriefXMLImpl {

	private lazy val nom_ : String = Misc.getNameFromClass(this)
	def nom = nom_

	protected def briefXMLAttributes: String = ""

	def childProcessors: Seq[Processor]

	// TODO: make this create a DOM tree, not a single string (because of size limits)
	def toBriefXML(depthLimit: Int = -1): String = {
	val eltStartText = nom + (if (briefXMLAttributes == "") "" else " " + briefXMLAttributes + " ")
	if (depthLimit == 0) "..."
	else if (childProcessors.length == 0) "<" + eltStartText + "/>"
	else {
	val lessDepth = depthLimit - 1
	val sb = new StringBuilder
	childProcessors.foreach {
	cp =>
	val s = cp.toBriefXML(lessDepth)
	if (sb.size < 3000) sb.append(s) // hack!
	else sb.append("...")
	}
	"<" + eltStartText + ">" + sb + "</" + nom + ">"
	}
	}

	override def toString = toBriefXML() // pParser.toString + " ~ " + qParser.toString
	}

	class SeqCompParser(context: RuntimeData, val childParsers: Array[Parser])
	extends ParserObject(context) {

	override def childProcessors = childParsers

	override def nom = "seq"

	val numChildParsers = childParsers.size

	def parse(pstate: PState): Unit = {
	var i: Int = 0
	while (i < numChildParsers) {
	val parser = childParsers(i)
	parser.parse1(pstate)
	if (pstate.status ne Success)
	return
	i += 1
	}
	}

	}

	class AltCompParser(context: RuntimeData, val childParsers: Seq[Parser])
	extends ParserObject(context) {

	override lazy val childProcessors = childParsers

	override def nom = "alt"

	def parse(pstate: PState): Unit = {
	var pBefore: PState.Mark = null

	pstate.pushDiscriminator
	var diagnostics: Seq[Diagnostic] = Nil
	var i = 0
	var parser: Parser = null
	val limit = childParsers.length
	var returnFlag = false
	while (!returnFlag && i < limit) {
	parser = childParsers(i)
	i += 1
	log(LogLevel.Debug, "Trying choice alternative: %s", parser)
	pBefore = pstate.mark
	parser.parse1(pstate)
	if (pstate.status eq Success) {
	log(LogLevel.Debug, "Choice alternative success: %s", parser)
	pstate.discard(pBefore)
	pBefore = null
	returnFlag = true
	} else {
	// If we get here, then we had a failure
	log(LogLevel.Debug, "Choice alternative failed: %s", parser)
	//
	// capture diagnostics
	//
	val diag = new ParseAlternativeFailed(context.schemaFileLocation, pstate, pstate.diagnostics)
	diagnostics = diag +: diagnostics
	//
	// check for discriminator evaluated to true.
	//
	if (pstate.discriminator == true) {
	log(LogLevel.Debug, "Failure, but discriminator true. Additional alternatives discarded.")
	// If so, then we don't run the next alternative, we
	// consume this discriminator status result (so it doesn't ripple upward)
	// and return the failed state with all the diagnostics.
	//
	val allDiags = new AltParseFailed(context.schemaFileLocation, pstate, diagnostics.reverse)
	pstate.discard(pBefore) // because disc set, we don't unwind side effects on input stream & infoset
	pBefore = null
	pstate.setFailed(allDiags)
	returnFlag = true
	} else {
	//
	// Here we have a failure, but no discriminator was set, so we try the next alternative.
	// Which means we just go around the loop
	//
	// But we have to unwind side-effects on input-stream, infoset, variables, etc.
	pstate.reset(pBefore)
	pBefore = null
	returnFlag = false
	}
	}
	}
	Assert.invariant(i <= limit)
	if (returnFlag == false) {
	Assert.invariant(i == limit)
	// Out of alternatives. All of them failed.
	val allDiags = new AltParseFailed(context.schemaFileLocation, pstate, diagnostics.reverse)
	pstate.setFailed(allDiags)
	log(LogLevel.Debug, "All AltParser alternatives failed.")
	}

	Assert.invariant(pBefore eq null)
	pstate.popDiscriminator
	}

	}

	case class DummyParser(rd: RuntimeData) extends ParserObject(null) {
	def parse(pstate: PState): Unit = pstate.SDE("Parser for " + rd + " is not yet implemented.")

	override def childProcessors = Nil
	override def toBriefXML(depthLimit: Int = -1) = "<dummy/>"
	override def toString = if (rd == null) "Dummy[null]" else "Dummy[" + rd + "]"
	}

	case class NotParsableParser(context: ElementRuntimeData) extends Parser {

	def parse(state: PState): Unit = {
	// We can't use state.SDE because that needs the infoset to determine the
	// context. No infoset will exist when this is called, so we'll manually
	// create an SDE and toss it
	val rsde = new RuntimeSchemaDefinitionError(context.schemaFileLocation, state, "This schema was compiled without parse support. Check the parseUnparsePolicy tunable or daf:parseUnparsePolicy property.")
	context.toss(rsde)
	}

	override lazy val childProcessors = Nil
	override lazy val runtimeDependencies = Nil
	}