daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/tdml/TDMLRunner.scala - daffodil - Git at Google

 package edu.illinois.ncsa.daffodil.tdml

 /* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
  *
  * Developed by: Tresys Technology, LLC
  *               http://www.tresys.com
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy of
  * this software and associated documentation files (the "Software"), to deal with
  * the Software without restriction, including without limitation the rights to
  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
  *
  *  1. Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimers.
  *
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimers in the
  *     documentation and/or other materials provided with the distribution.
  *
  *  3. Neither the names of Tresys Technology, nor the names of its contributors
  *     may be used to endorse or promote products derived from this Software
  *     without specific prior written permission.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
  * SOFTWARE.
  */

 import java.io.File
 import scala.Array.canBuildFrom
 import scala.xml.NodeSeq.seqToNodeSeq
 import scala.xml._
 import scala.util.matching.Regex
 import scala.util.matching.Regex.Match
 import org.scalatest.junit.JUnitSuite
 import edu.illinois.ncsa.daffodil.Implicits.using
 import edu.illinois.ncsa.daffodil.compiler.Compiler
 import edu.illinois.ncsa.daffodil.xml.XMLUtils
 import edu.illinois.ncsa.daffodil.util._
 import edu.illinois.ncsa.daffodil.api._
 import junit.framework.Assert.assertEquals
 import junit.framework.Assert.assertTrue
 import junit.framework.Assert.fail
 import edu.illinois.ncsa.daffodil.util.Misc._
 import java.io.FileInputStream
 import java.io.FileNotFoundException
 import org.xml.sax.InputSource
 import java.io.StringReader
 import javax.xml.transform.stream.StreamSource
 import java.net.URL
 import java.net.URI
 import edu.illinois.ncsa.daffodil.exceptions.Assert
 import java.nio.ByteBuffer
 import java.nio.charset.CharsetEncoder
 import com.ibm.icu.charset.CharsetICU
 import java.nio.CharBuffer
 import java.io.InputStream
 import edu.illinois.ncsa.daffodil.processors.GeneralParseFailure
 import edu.illinois.ncsa.daffodil.dsom.EntityReplacer
 import edu.illinois.ncsa.daffodil.xml.DaffodilXMLLoader
 import edu.illinois.ncsa.daffodil.processors.IteratorInputStream
 import edu.illinois.ncsa.daffodil.processors.DFDLCharCounter
 import edu.illinois.ncsa.daffodil.processors.IterableReadableByteChannel

 /**
  * Parses and runs tests expressed in IBM's contributed tdml "Test Data Markup Language"
  */

 //
 // TODO: validate the infoset XML (expected result) against the DFDL Schema, that is using it as an XML Schema
 // for the infoset. This would prevent errors where the infoset instance and the schema drift apart under maintenance.
 //
 // TODO: validate the actual result against the DFDL Schema using it as an XML Schema.
 //
 /**
  * TDML test suite runner
  *
  * Keep this independent of Daffodil, so that it can be used to run tests against other DFDL implementations as well.
  * E.g., it should only need an API specified as a collection of Scala traits, and some simple way to inject
  * dependency on one factory to create processors.
  *
  *
  * Use the validateTDMLFile arg to bypass validation of the TDML document itself.
  *
  * This is used for testing whether one can detect validation errors
  * in the DFDL schema.
  *
  * Without this, you can't get to the validation errors, because it
  * rejects the TDML file itself.
  */

 class DFDLTestSuite(aNodeFileOrURL: Any, validateTDMLFile: Boolean = true)
   extends Logging {

   val errorHandler = new org.xml.sax.ErrorHandler {
     def warning(exception: SAXParseException) = {
       loadingExceptions == exception +: loadingExceptions
       System.err.println("TDMLRunner Warning: " + exception.getMessage())
     }

     def error(exception: SAXParseException) = {
       loadingExceptions = exception :: loadingExceptions
       System.err.println("TDMLRunner Error: " + exception.getMessage())
       isLoadingError = true
     }
     def fatalError(exception: SAXParseException) = {
       loadingExceptions == exception +: loadingExceptions
       System.err.println("TDMLRunner Fatal Error: " + exception.getMessage())
       isLoadingError = true
     }
   }

   var isLoadingError: Boolean = false

   var loadingExceptions: List[Exception] = Nil

   def getLoadingDiagnosticMessages() = {
     val msgs = loadingExceptions.map { _.toString() }.mkString(" ")
     msgs
   }

   /**
    * our loader here accumulates load-time errors here on the
    * test suite object.
    */
   val loader = new DaffodilXMLLoader(errorHandler)
   loader.setValidation(validateTDMLFile)

   val (ts, tdmlFile, tsInputSource) = {
     val tuple = aNodeFileOrURL match {
       case tsNode: Node => {
         val tempFileName = XMLUtils.convertNodeToTempFile(tsNode)
         val newNode = loader.loadFile(tempFileName)
         val tempFile = new File(tempFileName)
         (newNode, null, new InputSource(tempFile.toURI().toASCIIString()))
       }
       case tdmlFile: File => {
         log(LogLevel.Debug, "loading TDML file: %s", tdmlFile)
         val res = (loader.loadFile(tdmlFile), tdmlFile, new InputSource(tdmlFile.toURI().toASCIIString()))
         log(LogLevel.Debug, "done loading TDML file: %s", tdmlFile)
         res
       }
       case tsURL: URL => {
         val res = (loader.load(tsURL), null, new InputSource(tsURL.toURI().toASCIIString()))
         res
       }
       case _ => Assert.usageError("not a Node, File, or URL")
     }
     tuple
   }

   lazy val isTDMLFileValid = !this.isLoadingError

   var checkAllTopLevel: Boolean = false
   def setCheckAllTopLevel(flag: Boolean) {
     checkAllTopLevel = flag
   }

   val parserTestCases = (ts \ "parserTestCase").map { node => ParserTestCase(node, this) }
   //
   // Note: IBM started this TDML file format. They call an unparser test a "serializer" test.
   // We will use their TDML file names, but in the code here, we call it an UnparserTestCase
   //
   val unparserTestCases = (ts \ "serializerTestCase").map { node => UnparserTestCase(node, this) }
   val testCases: Seq[TestCase] = parserTestCases ++
     unparserTestCases
   val suiteName = (ts \ "@suiteName").text
   val suiteID = (ts \ "@ID").text
   val description = (ts \ "@description").text
   val embeddedSchemas = (ts \ "defineSchema").map { node => DefinedSchema(node, this) }

   private val embeddedSchemaGroups = embeddedSchemas.groupBy { _.name }

   embeddedSchemaGroups.foreach {
     case (name, Seq(sch)) => // ok
     case (name, seq) =>
       Assert.usageError("More than one definition for embedded schema " + name)
   }

   def runAllTests(schema: Option[Node] = None) {
     if (isTDMLFileValid)
       testCases.map { _.run(schema) }
     else {
       log(Error("TDML file %s is not valid.", tsInputSource.getSystemId))
     }
   }

   def runOneTest(testName: String, schema: Option[Node] = None) {
     runOneTestWithDataVolumes(testName, schema)
   }

   def runOneTestWithDataVolumes(testName: String, schema: Option[Node] = None): (Long, Long) = {
     if (isTDMLFileValid) {
       val testCase = testCases.find(_.name == testName)
       testCase match {
         case None => throw new Exception("test " + testName + " was not found.")
         case Some(tc) => {
           return tc.run(schema)
         }
       }
     } else {
       log(Error("TDML file %s is not valid.", tsInputSource.getSystemId))
       val msgs = this.loadingExceptions.map { _.toString }.mkString(" ")
       throw new Exception(msgs)
     }
   }

   /**
    * Try a few possibilities to find the model/schema/tdml resources
    *
    * IBM's suites have funny model paths in them. We don't have that file structure,
    * so we look for the schema/model/tdml resources in the working directory, and in the same
    * directory as the tdml file, and some other variations.
    */
   def findTDMLResource(fileName: String): File = {
     val firstTry = new File(fileName)
     if (firstTry.exists()) return firstTry
     // see if it can be found relative to the tdml test file, like next to it.
     val sysId = tsInputSource.getSystemId()
     if (sysId != null) {
       val sysFile = new File(new URI(sysId))
       if (sysFile.exists()) {
         // the system Id of the tdml file was a file.
         val sysPath = sysFile.getParent()
         val resourceFileName = sysPath + File.separator + fileName
         log(LogLevel.Debug, "TDML resource name is: %s", resourceFileName)
         val resourceFile = new File(resourceFileName)
         if (resourceFile.exists()) return resourceFile
       }
     }
     // try ignoring the directory part
     val parts = fileName.split("/")
     if (parts.length > 1) {
       val filePart = parts.last
       val secondTry = findTDMLResource(filePart) // recursively
       if (secondTry.exists()) return secondTry;
     }
     throw new FileNotFoundException("Unable to find tdml resource " + fileName + ".")
   }

   def findModel(modelName: String): Node = {
     // schemas defined with defineSchema take priority as names.
     val es = embeddedSchemas.find { defSch => defSch.name == modelName }
     es match {
       case Some(defschema) => defschema.xsdSchema
       case None => {
         val file = findTDMLResource(modelName)
         val schema = {
           val res = (new DaffodilXMLLoader(errorHandler)).loadFile(file)
           res
         }
         schema
       }
     }
   }

 }

 abstract class TestCase(ptc: NodeSeq, val parent: DFDLTestSuite)
   extends Logging {

   def toOpt[T](n: Seq[T]) = {
     n match {
       case Seq() => None
       case Seq(a) => Some(a)
       // ok for it to error if there is more than one in sequence.
     }
   }

   val document = toOpt(ptc \ "document").map { node => new Document(node, this) }
   val infoset = toOpt(ptc \ "infoset").map { node => new Infoset(node, this) }
   val errors = toOpt(ptc \ "errors").map { node => new ExpectedErrors(node, this) }
   val warnings = toOpt(ptc \ "warnings").map { node => new ExpectedWarnings(node, this) }

   val name = (ptc \ "@name").text
   val ptcID = (ptc \ "@ID").text
   val id = name + (if (ptcID != "") "(" + ptcID + ")" else "")
   val root = (ptc \ "@root").text
   val model = (ptc \ "@model").text
   val description = (ptc \ "@description").text
   val unsupported = (ptc \ "@unsupported").text match {
     case "true" => true
     case "false" => false
     case _ => false
   }

   def findModel(modelName: String): Node = {
     if (modelName == "") {
       suppliedSchema match {
         case None => throw new Exception("No model.")
         case Some(s) => return s
       }
     } else
       parent.findModel(modelName)
   }

   var suppliedSchema: Option[Node] = None

   protected def runProcessor(processor: DFDL.ProcessorFactory,
                              data: Option[DFDL.Input],
                              nBits: Option[Long],
                              infoset: Option[Infoset],
                              errors: Option[ExpectedErrors],
                              warnings: Option[ExpectedWarnings]): Unit

   def run(schema: Option[Node] = None): (Long, Long) = {
     suppliedSchema = schema
     val sch = schema match {
       case Some(sch) => {
         if (model != "") throw new Exception("You supplied a model attribute, and a schema argument. Can't have both.")
         sch
       }
       case None => {
         if (model == "") throw new Exception("No model was found.")
         val schemaNode = findModel(model)
         schemaNode
       }
     }
     val compiler = Compiler()
     compiler.setDistinguishedRootNode(root, null)
     compiler.setCheckAllTopLevel(parent.checkAllTopLevel)
     val pf = compiler.compile(sch)
     val data = document.map { _.data }
     val nBits = document.map { _.nBits }

     runProcessor(pf, data, nBits, infoset, errors, warnings)
     val bytesProcessed = IterableReadableByteChannel.getAndResetCalls
     val charsProcessed = DFDLCharCounter.getAndResetCount
     println("Bytes processed: " + bytesProcessed)
     println("Characters processed: " + charsProcessed)
     (bytesProcessed, charsProcessed)
     // if we get here, the test passed. If we don't get here then some exception was
     // thrown either during the run of the test or during the comparison.
     // log(LogLevel.Debug, "Test %s passed.", id))
   }

   def verifyAllDiagnosticsFound(actual: WithDiagnostics, expectedDiags: Option[ErrorWarningBase]) = {
     val actualDiags = actual.getDiagnostics
     if (actualDiags.length == 0) {
       throw new Exception("""No diagnostic objects found.""")
     } else {
       actualDiags.foreach { ad => log(Error(ad.toString)) }
     }
     val actualDiagMsgs = actualDiags.map { _.toString }
     val expectedDiagMsgs = expectedDiags.map { _.messages }.getOrElse(Nil)
     // must find each expected warning message within some actual warning message.
     expectedDiagMsgs.foreach {
       expected =>
         {
           val wasFound = actualDiagMsgs.exists {
             actual => actual.toLowerCase.contains(expected.toLowerCase)
           }
           if (!wasFound) {
             throw new Exception("""Did not find diagnostic message """" +
               expected + """" in any of the actual diagnostic messages: """ + "\n" +
               actualDiagMsgs.mkString("\n"))
           }
         }
     }
   }

 }

 case class ParserTestCase(ptc: NodeSeq, parentArg: DFDLTestSuite)
   extends TestCase(ptc, parentArg) {

   def runProcessor(pf: DFDL.ProcessorFactory,
                    data: Option[DFDL.Input],
                    lengthLimitInBits: Option[Long],
                    optInfoset: Option[Infoset],
                    optErrors: Option[ExpectedErrors],
                    warnings: Option[ExpectedWarnings]) = {

     val nBits = lengthLimitInBits.get
     val dataToParse = data.get
     (optInfoset, optErrors) match {
       case (Some(infoset), None) => runParseExpectSuccess(pf, dataToParse, nBits, infoset, warnings)
       case (None, Some(errors)) => runParseExpectErrors(pf, dataToParse, nBits, errors, warnings)
       case _ => throw new Exception("Invariant broken. Should be Some None, or None Some only.")
     }

   }

   def verifyParseInfoset(actual: DFDL.ParseResult, infoset: Infoset) {
     val trimmed = Utility.trim(actual.result)
     //
     // Attributes on the XML like xsi:type and also namespaces (I think) are
     // making things fail these comparisons, so we strip all attributes off (since DFDL doesn't
     // use attributes at all)
     //
     val actualNoAttrs = XMLUtils.removeAttributes(trimmed)
     //
     // Would be great to validate the actuals against the DFDL schema, used as
     // an XML schema on the returned infoset XML.
     // Getting this to work is a bigger issue. What with stripping of attributes
     // and that our internal Daffodil XML Catalog has a special treatment of the
     // mapping of the XML Schema URI.
     // etc.
     //
     // TODO: Fix so we can validate here.
     //

     // Something about the way XML is constructed is different between our jdom-converted
     // results and the ones created by scala directly parsing the TDML test files.
     //
     // This has something to do with values being lists of text nodes and entities
     // and not just simple strings. I.e., if you write: <foo>a&#x5E74;</foo>, that's not
     // an element with a string as its value. It's an element with several text nodes as
     // its values.
     //
     // so we run the expected stuff through the same converters that were used to
     // convert the actual.
     val expected = XMLUtils.element2Elem(XMLUtils.elem2Element(infoset.contents))
     // infoset.contents already has attributes removed.

     if (expected != actualNoAttrs) {
       val diffs = XMLUtils.computeDiff(expected, actualNoAttrs)
       if (diffs.length > 0) {
         //throw new Exception("Comparison failed. Expected: " + expected + " but got " + actualNoAttrs)
         throw new Exception("""
 Comparison failed.
 Expected
           %s
 Actual
           %s
 Differences were (path, expected, actual):
  %s""".format(
           expected.toString, actualNoAttrs.toString, diffs.map { _.toString }.mkString("\n")))
       }
     }
   }

   def runParseExpectErrors(pf: DFDL.ProcessorFactory,
                            dataToParse: DFDL.Input,
                            lengthLimitInBits: Long,
                            errors: ExpectedErrors,
                            warnings: Option[ExpectedWarnings]) {

     val objectToDiagnose =
       if (pf.isError) pf
       else {
         val processor = pf.onPath("/")
         if (processor.isError) processor
         else {
           val actual = processor.parse(dataToParse, lengthLimitInBits)
           if (actual.isError) actual
           else {
             val loc: DataLocation = actual.resultState.currentLocation
             if (!loc.isAtEnd) {
               actual.addDiagnostic(new GeneralParseFailure("Left over data: " + loc.toString))
               actual
             } else {
               // We did not get an error!!
               // val diags = actual.getDiagnostics().map(_.getMessage()).foldLeft("")(_ + "\n" + _)
               throw new Exception("Expected error. Didn't get one. Actual result was " + actual.briefResult) // if you just assertTrue(actual.canProceed), and it fails, you get NOTHING useful.
             }
           }
         }
       }
     // check for any test-specified errors
     verifyAllDiagnosticsFound(objectToDiagnose, Some(errors))

     // TODO Implement Warnings
     // check for any test-specified warnings
     // verifyAllDiagnosticsFound(objectToDiagnose, warnings)

   }

   def runParseExpectSuccess(pf: DFDL.ProcessorFactory,
                             dataToParse: DFDL.Input,
                             lengthLimitInBits: Long,
                             infoset: Infoset,
                             warnings: Option[ExpectedWarnings]) {

     val isError = pf.isError
     val diags = pf.getDiagnostics.map(_.getMessage).mkString("\n")
     if (pf.isError) {
       throw new Exception(diags)
     } else {
       val processor = pf.onPath("/")
       if (processor.isError) {
         val diags = processor.getDiagnostics.map(_.getMessage).mkString("\n")
         throw new Exception(diags)
       }
       val actual = processor.parse(dataToParse, lengthLimitInBits)

       if (!actual.canProceed) {
         // Means there was an error, not just warnings.
         val diags = actual.getDiagnostics.map(_.getMessage).mkString("\n")
         throw new Exception(diags) // if you just assertTrue(objectToDiagnose.canProceed), and it fails, you get NOTHING useful.
       }

       val loc: DataLocation = actual.resultState.currentLocation
       val leftOverException = if (!loc.isAtEnd) {
         val leftOverMsg = "Left over data: " + loc.toString
         println(leftOverMsg)
         Some(new Exception(leftOverMsg))
       } else None

       verifyParseInfoset(actual, infoset)

       leftOverException.map { throw _ } // if we get here, throw the left over data exception.

       // TODO: Implement Warnings
       // check for any test-specified warnings
       // verifyAllDiagnosticsFound(actual, warnings)

       // if we get here, the test passed. If we don't get here then some exception was
       // thrown either during the run of the test or during the comparison.
     }
   }
 }

 case class UnparserTestCase(ptc: NodeSeq, parentArg: DFDLTestSuite)
   extends TestCase(ptc, parentArg) {

   def runProcessor(pf: DFDL.ProcessorFactory,
                    optData: Option[DFDL.Input],
                    optNBits: Option[Long],
                    optInfoset: Option[Infoset],
                    optErrors: Option[ExpectedErrors],
                    warnings: Option[ExpectedWarnings]) = {

     val infoset = optInfoset.get

     (optData, optErrors) match {
       case (Some(data), None) => runUnparserExpectSuccess(pf, data, infoset, warnings)
       case (_, Some(errors)) => runUnparserExpectErrors(pf, optData, infoset, errors, warnings)
       case _ => throw new Exception("Invariant broken. Should be Some None, or None Some only.")
     }

   }

   def verifyData(data: DFDL.Input, outStream: java.io.ByteArrayOutputStream) {
     val actualBytes = outStream.toByteArray

     val inbuf = java.nio.ByteBuffer.allocate(1024 * 1024) // TODO: allow override? Detect overrun?
     val readCount = data.read(inbuf)
     data.close()
     if (readCount == -1) {
       // example data was of size 0 (could not read anything). We're not supposed to get any actual data.
       if (actualBytes.length > 0) {
         throw new Exception("Unexpected data was created.")
       }
       return // we're done. Nothing equals nothing.
     }

     Assert.invariant(readCount == inbuf.position())

     // compare expected data to what was output.
     val expectedBytes = inbuf.array().toList.slice(0, readCount)
     if (actualBytes.length != readCount) {
       throw new Exception("output data length " + actualBytes.length + " for " + actualBytes.toList +
         " doesn't match expected value " + readCount + " for " + expectedBytes)
     }

     val pairs = expectedBytes zip actualBytes zip Stream.from(1)
     pairs.foreach {
       case ((expected, actual), index) =>
         if (expected != actual) {
           val msg = "Unparsed data differs at byte %d. Expected 0x%02x. Actual was 0x%02x.".format(index, expected, actual)
           throw new Exception(msg)
         }
     }
   }

   def runUnparserExpectSuccess(pf: DFDL.ProcessorFactory,
                                data: DFDL.Input,
                                infoset: Infoset,
                                warnings: Option[ExpectedWarnings]) {

     val outStream = new java.io.ByteArrayOutputStream()
     val output = java.nio.channels.Channels.newChannel(outStream)
     val node = infoset.contents
     if (pf.isError) {
       val diags = pf.getDiagnostics.map(_.getMessage).mkString("\n")
       throw new Exception(diags)
     }
     val processor = pf.onPath("/")
     if (processor.isError) {
       val diags = processor.getDiagnostics.map(_.getMessage).mkString("\n")
       throw new Exception(diags)
     }
     val actual = processor.unparse(output, node)
     output.close()

     verifyData(data, outStream)

     // TODO: Implement Warnings - check for any test-specified warnings
     // verifyAllDiagnosticsFound(actual, warnings)

   }

   def runUnparserExpectErrors(pf: DFDL.ProcessorFactory,
                               optData: Option[DFDL.Input],
                               infoset: Infoset,
                               errors: ExpectedErrors,
                               warnings: Option[ExpectedWarnings]) {

     val outStream = new java.io.ByteArrayOutputStream()
     val output = java.nio.channels.Channels.newChannel(outStream)
     val node = infoset.contents
     if (pf.isError) {
       // check for any test-specified errors
       verifyAllDiagnosticsFound(pf, Some(errors))

       // check for any test-specified warnings
       verifyAllDiagnosticsFound(pf, warnings)
     }
     val processor = pf.onPath("/")
     if (processor.isError) {
       val diags = processor.getDiagnostics.map(_.getMessage).mkString("\n")
       throw new Exception(diags)
     }
     val actual = processor.unparse(output, node)
     output.close()
     val actualBytes = outStream.toByteArray()

     // Verify that some partial output has shown up in the bytes.
     optData.map { data => verifyData(data, outStream) }

     // check for any test-specified errors
     verifyAllDiagnosticsFound(actual, Some(errors))

     // check for any test-specified warnings
     verifyAllDiagnosticsFound(actual, warnings)

   }

 }

 case class DefinedSchema(xml: Node, parent: DFDLTestSuite) {
   val name = (xml \ "@name").text.toString

   val defineFormats = (xml \ "defineFormat")
   val defaultFormats = (xml \ "format")
   val defineVariables = (xml \ "defineVariable")
   val defineEscapeSchemes = (xml \ "defineEscapeScheme")

   val globalElementDecls = (xml \ "element")
   val globalSimpleTypeDefs = (xml \ "simpleType")
   val globalComplexTypeDefs = (xml \ "complexType")
   val globalGroupDefs = (xml \ "group")

   val dfdlTopLevels = defineFormats ++ defaultFormats ++ defineVariables ++ defineEscapeSchemes
   val xsdTopLevels = globalElementDecls ++ globalSimpleTypeDefs ++
     globalComplexTypeDefs ++ globalGroupDefs
   val fileName = parent.ts.attribute(XMLUtils.INT_NS, XMLUtils.FILE_ATTRIBUTE_NAME) match {
     case Some(seqNodes) => seqNodes.toString
     case None => ""
   }
   val xsdSchema = TestUtils.dfdlTestSchema(dfdlTopLevels, xsdTopLevels, fileName)
 }

 sealed abstract class DocumentContentType
 case object ContentTypeText extends DocumentContentType
 case object ContentTypeByte extends DocumentContentType
 case object ContentTypeBits extends DocumentContentType
 case object ContentTypeFile extends DocumentContentType
 // TODO: add capability to specify character set encoding into which text is to be converted (all UTF-8 currently)

 case class Document(d: NodeSeq, parent: TestCase) {

   val Seq(<document>{ children @ _* }</document>) = d

   val actualDocumentPartElementChildren = children.toList.flatMap {
     child =>
       child match {
         case <documentPart>{ _* }</documentPart> => List(new DocumentPart(child, this))
         case _ => Nil
       }
   }

   // check that document element either contains text content directly with no other documentPart children,
   // or it contains ONLY documentPart children (and whitespace around them).
   //
   if (actualDocumentPartElementChildren.length > 0) {
     children.foreach { child =>
       child match {
         case <documentPart>{ _* }</documentPart> => // ok
         case scala.xml.Text(s) if (s.matches("""\s+""")) => // whitespace text nodes ok
         case x => Assert.usageError("Illegal TDML data document content '" + x + "'")
       }
     }
   }

   val documentParts =
     if (actualDocumentPartElementChildren.length > 0) actualDocumentPartElementChildren
     else List(new DocumentPart(<documentPart type="text">{ children }</documentPart>, this))

   /**
    * When data is coming from the TDML file as small test data, then
    * Due to alignment, and bits-granularity issues, everything is lowered into
    * bits first, and then concatenated, and then converted back into bytes
    *
    * These are all lazy val, since if data is coming from a file these aren't
    * needed at all.
    */
   lazy val documentBits = documentParts.map { _.contentAsBits }.mkString
   lazy val nBits: Long =
     if (isDPFile) -1
     else documentBits.length
   lazy val nFragBits = (nBits % 8).toInt
   lazy val nAddOnBits = if (nFragBits == 0) 0 else 8 - nFragBits
   lazy val addOnBits = (1 to nAddOnBits) collect { case _ => "0" } mkString
   lazy val documentBitsFullBytes = documentBits + addOnBits
   lazy val documentBytes = {
     Assert.usage(!isDPFile, "Cannot call documentBytes if documentPart type is file.")
     bits2Bytes(documentBitsFullBytes)
   }

   /**
    * data coming from a file?
    */
   val isDPFile = {
     val res = documentParts.length > 0 &&
       documentParts(0).partContentType == ContentTypeFile
     if (res) {
       Assert.usage(documentParts.length == 1, "There can be only one documentPart of type file, and it must be the only documentPart.")
     }
     res
   }

   /**
    * this 'data' is the kind our parser's parse method expects.
    */
   lazy val data = {
     if (isDPFile) {
       // direct I/O to the file. No 'bits' lowering involved.
       val dp = documentParts(0)
       val input = dp.fileDataInput
       input
     } else {
       // assemble the input from the various pieces, having lowered
       // everything to bits.
       val bytes = documentBytes.toArray
       val inputStream = new java.io.ByteArrayInputStream(bytes);
       val rbc = java.nio.channels.Channels.newChannel(inputStream);
       rbc.asInstanceOf[DFDL.Input]
     }
   }

 }

 case class DocumentPart(part: Node, parent: Document) {
   val validHexDigits = "0123456789abcdefABCDEF"
   val validBinaryDigits = "01"

   lazy val replaceDFDLEntities: Boolean = {
     val res = (part \ "@replaceDFDLEntities")
     if (res.length == 0) { true }
     else { res(0).toString().toBoolean }
   }
   lazy val partContentType = (part \ "@type").toString match {
     case "text" => ContentTypeText
     case "byte" => ContentTypeByte
     case "bits" => ContentTypeBits
     case "file" => ContentTypeFile
     case _ => Assert.invariantFailed("invalid content type.")
   }
   lazy val encoder = CharsetICU.forNameICU("UTF-8").newEncoder()
   lazy val partRawContent = part.child.text

   lazy val contentAsBits = {
     val res = partContentType match {
       case ContentTypeText => textContentAsBits
       case ContentTypeByte => hexContentAsBits
       case ContentTypeBits => bitDigits
       case ContentTypeFile =>
         Assert.invariantFailed("shouldn't do contentAsBits for file documentPart type")
     }
     res
   }

   lazy val textContentWithoutEntities = {
     if (replaceDFDLEntities) {
       EntityReplacer.replaceAll(partRawContent)
     } else partRawContent
   }

   lazy val textContentToBytes = {
     // Fails here if we use getBytes("UTF-8") because that uses the utf-8 encoder,
     // and that will fail on things like unpaired surrogate characters that we allow
     // in our data and our infoset.
     // So instead we must do our own UTF-8-like encoding of the data
     // so that we can put in codepoints we want.
     val bytes = utf8LikeEncode(textContentWithoutEntities)
     // val bytes = replacedRawContent.getBytes("UTF-8") //must specify charset name (JIRA DFDL-257)
     bytes.toArray
   }

   def byteList(args: Int*) = args.map { _.toByte }

   def utf8LikeEncode(s: String): Seq[Byte] = {
     //
     // Scala/Java strings represent characters above 0xFFFF as a surrogate pair
     // of two codepoints.
     //
     // We want to handle both properly match surrogate pairs, and isolated surrogate characters.
     // That means if we see an isolated low (second) surrogate character, we have to know
     // whether it was preceded by a high surrogate or not.
     //
     // For every 16-bit code point, do do this right we need to potentially also see the previous
     // or next codepoint.
     //
     val bytes = XMLUtils.walkUnicodeString(s)(utf8LikeEncoding).flatten
     // val bytes = tuples.flatMap { case ((prevcp, cp), nextcp) => utf8LikeEncoding(prevcp, cp, nextcp) }
     bytes
   }

   /**
    * Encode in the style of utf-8 (see wikipedia article on utf-8)
    *
    * Variation is that we accept some things that a conventional utf-8 encoder
    * rejects. Examples are illegal codepoints such as isolated Unicode surrogates
    * (not making up a surrogate pair).
    *
    * We also assume we're being handed surrogate pairs for any of the
    * 4-byte character representations.
    *
    */

   def utf8LikeEncoding(prev: Char, c: Char, next: Char): Seq[Byte] = {
     // handles 16-bit codepoints only
     Assert.usage(prev <= 0xFFFF)
     Assert.usage(c <= 0xFFFF)
     Assert.usage(next <= 0xFFFF)

     val i = c.toInt
     val byte1 = ((i >> 8) & 0xFF)
     val byte2 = (i & 0xFF)

     def threeByteEncode() = {
       val low6 = byte2 & 0x3F
       val mid6 = ((byte1 & 0x0F) << 2) | (byte2 >> 6)
       val high4 = byte1 >> 4
       byteList(high4 | 0xE0, mid6 | 0x80, low6 | 0x80)
     }

     /**
      * create 4-byte utf-8 encoding from surrogate pair found
      * in a scala string.
      */
     def fourByteEncode(leadingSurrogate: Char, trailingSurrogate: Char) = {
       val h = leadingSurrogate.toInt // aka 'h for high surrogate'
       val l = trailingSurrogate.toInt // aka 'l for low surrogate'
       val cp = 0x10000 + ((h - 0xD800) * 0x400) + (l - 0xDC00)
       val byte1 = (cp >> 24) & 0xFF
       val byte2 = (cp >> 16) & 0xFF
       val byte3 = (cp >> 8) & 0xFF
       val byte4 = cp & 0xFF
       val low6 = byte4 & 0x3F
       val midlow6 = ((byte3 & 0x0F) << 2) | (byte4 >> 6)
       val midhig6 = ((byte2 & 0x03) << 4) | byte3 >> 4
       val high3 = byte2 >> 2
       byteList(high3 | 0xF0, midhig6 | 0x80, midlow6 | 0x80, low6 | 0x80)
     }

     val res = i match {
       case _ if (i <= 0x7F) => byteList(byte2)
       case _ if (i <= 0x7FF) => {
         val low6 = byte2 & 0x3F
         val high5 = ((byte1 & 0x07) << 2) | (byte2 >> 6)
         byteList(high5 | 0xC0, low6 | 0x80)
       }
       case _ if (XMLUtils.isLeadingSurrogate(c)) => {
         // High (initial) Surrogate character case.
         if (XMLUtils.isTrailingSurrogate(next)) {
           // Next codepoint is a low surrogate.
           // We need to create a 4-byte representation from the
           // two surrogate characters.
           fourByteEncode(c, next)
         } else {
           // isolated high surrogate codepoint case.
           threeByteEncode()
         }
       }
       case _ if (XMLUtils.isTrailingSurrogate(c)) => {
         // Low (subsequent) Surrogate character case.
         if (XMLUtils.isLeadingSurrogate(prev)) {
           // Previous codepoint was a high surrogate.
           // This codepoint was handled as part of converting the
           // surrogate pair.
           // so we output no bytes at all.
           List()
         } else {
           // Isolated low-surrogate codepoint case.
           threeByteEncode()
         }

       }
       case _ if (i <= 0xFFFF) => {
         threeByteEncode()
       }

       case _ => Assert.invariantFailed("char code out of range.")
     }
     res
   }

   lazy val textContentAsBits = bytes2Bits(textContentToBytes)

   lazy val hexContentAsBits = hex2Bits(hexDigits)

   // Note: anything that is not a valid hex digit (or binary digit for binary) is simply skipped
   // TODO: we should check for whitespace and other characters we want to allow, and verify them.
   // TODO: Or better, validate this in the XML Schema for tdml via a pattern facet
   // TODO: Consider whether to support a comment syntax. When showing data examples this may be useful.
   //
   lazy val hexDigits = partRawContent.flatMap { ch => if (validHexDigits.contains(ch)) List(ch) else Nil }

   lazy val bitContentToBytes = bits2Bytes(bitDigits).toList

   lazy val bitDigits = partRawContent.flatMap {
     ch =>
       {
         if (validBinaryDigits.contains(ch))
           List(ch)
         else Nil
       }
   }

   lazy val fileDataInput = {
     val file = parent.parent.parent.findTDMLResource(partRawContent)
     val fis = new FileInputStream(file)
     //    val fileBytes = Stream.continually(fis.read()).takeWhile(_ != -1).map(_.toByte).toArray
     //    bytes2Bits(fileBytes)
     val rbc = fis.getChannel()
     rbc.asInstanceOf[DFDL.Input]
   }
 }

 case class Infoset(i: NodeSeq, parent: TestCase) {
   lazy val Seq(dfdlInfoset) = (i \ "dfdlInfoset").map { node => new DFDLInfoset(Utility.trim(node), this) }
   lazy val contents = dfdlInfoset.contents
 }

 case class DFDLInfoset(di: Node, parent: Infoset) {
   lazy val Seq(contents) = {
     val c = di.child(0)
     val expected = Utility.trim(c) // must be exactly one root element in here.
     val expectedNoAttrs = XMLUtils.removeAttributes(expected)
     //
     // Let's validate the expected content against the schema
     // Just to be sure they don't drift.
     //
     //    val ptc = parent.parent
     //    val schemaNode = ptc.findModel(ptc.model)
     //
     // This is causing trouble, with the stripped attributes, etc.
     // TODO: Fix so we can validate these expected results against
     // the DFDL schema used as a XSD for the expected infoset XML.
     //
     expectedNoAttrs
   }
 }

 abstract class ErrorWarningBase(n: NodeSeq, parent: TestCase) {
   lazy val matchAttrib = (n \ "@match").text
   protected def diagnosticNodes: Seq[Node]
   lazy val messages = diagnosticNodes.map { _.text }
 }

 case class ExpectedErrors(node: NodeSeq, parent: TestCase)
   extends ErrorWarningBase(node, parent) {

   val diagnosticNodes = node \\ "error"

 }

 case class ExpectedWarnings(node: NodeSeq, parent: TestCase)
   extends ErrorWarningBase(node, parent) {

   val diagnosticNodes = node \\ "warning"

 }