server/core/src/main/scala/org.apache.daffodil.debugger.dap/Parse.scala - daffodil-vscode - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.daffodil.debugger.dap

 import cats.Show
 import cats.data._
 import cats.effect._
 import cats.effect.std._
 import cats.syntax.all._
 import com.google.gson.JsonObject
 import com.microsoft.java.debug.core.protocol._
 import com.microsoft.java.debug.core.protocol.Messages.Request
 import com.microsoft.java.debug.core.protocol.Requests._
 import fs2._
 import fs2.concurrent._
 import java.io._
 import java.net.URI
 import java.nio.file._
 import org.apache.commons.io.FileUtils
 import org.apache.daffodil.api.Diagnostic
 import org.apache.daffodil.debugger.dap.{BuildInfo => DAPBuildInfo}
 import org.apache.daffodil.debugger.Debugger
 import org.apache.daffodil.exceptions.SchemaFileLocation
 import org.apache.daffodil.infoset._
 import org.apache.daffodil.processors.dfa.DFADelimiter
 import org.apache.daffodil.processors.parsers._
 import org.apache.daffodil.processors._
 import org.apache.daffodil.sapi.ValidationMode
 import org.apache.daffodil.sapi.infoset.XMLTextInfosetOutputter
 import org.apache.daffodil.sapi.infoset.JsonInfosetOutputter
 import org.apache.daffodil.sapi.io.InputSourceDataInputStream
 import org.apache.daffodil.tdml.TDML
 import org.apache.daffodil.util.Misc
 import org.typelevel.log4cats.Logger
 import org.typelevel.log4cats.slf4j.Slf4jLogger
 import scala.collection.JavaConverters._
 import scala.util.Try

 trait Parse {

   /** Run the parse, returning the bytes of the final infoset. */
   def run(): Stream[IO, Byte]

   /** Request the active parse to stop. */
   def close(): IO[Unit]
 }

 /** A running Daffodil parse. */
 object Parse {
   implicit val logger: Logger[IO] = Slf4jLogger.getLogger

   def apply(
       schema: Path,
       data: InputStream,
       debugger: Debugger,
       infosetFormat: String,
       variables: Map[String, String],
       tunables: Map[String, String]
   ): IO[Parse] =
     for {
       dp <- Compiler()
         .compile(schema, tunables)
         .map(p =>
           p.withDebugger(debugger)
             .withDebugging(true)
             .withExternalVariables(variables)
             .withValidationMode(ValidationMode.Limited)
         )
       done <- Ref[IO].of(false)
       pleaseStop <- Deferred[IO, Unit]
     } yield new Parse {
       def run(): Stream[IO, Byte] =
         fs2.io
           .readOutputStream(4096) { os =>
             val stopper =
               pleaseStop.get *> IO.canceled // will cancel the concurrent parse effect

             val infosetOutputter = infosetFormat match {
               case "xml"  => new XMLTextInfosetOutputter(os, true)
               case "json" => new JsonInfosetOutputter(os, true)
             }

             val parse =
               IO.interruptibleMany {
                 dp.parse(
                   new InputSourceDataInputStream(data),
                   infosetOutputter
                 ) // WARNING: parse doesn't close the OutputStream, so closed below
               }.guaranteeCase(outcome => Logger[IO].debug(s"parse finished: $outcome"))
                 .void

             stopper &> parse.guarantee(IO(os.close) *> done.set(true))
           }

       def close(): IO[Unit] =
         done.get.flatMap {
           case false =>
             Logger[IO].debug("interrupting parse") *> pleaseStop
               .complete(())
               .void
           case true =>
             Logger[IO].debug("parse done, no interruption") *> IO.unit
         }
     }

   class Debugee(
       schema: DAPodil.Source,
       data: DAPodil.Source,
       outputData: Signal[IO, DAPodil.Data],
       outputState: Stream[IO, DAPodil.Debugee.State],
       stateSink: QueueSink[IO, Option[DAPodil.Debugee.State]],
       events: Stream[IO, Events.DebugEvent],
       breakpoints: Breakpoints,
       control: Control
   ) extends DAPodil.Debugee {
     def data(): Signal[IO, DAPodil.Data] =
       outputData

     def state(): Stream[IO, DAPodil.Debugee.State] =
       outputState

     def events(): Stream[IO, Events.DebugEvent] =
       events

     /** We return only the "static" sources of the schema and data file, and notify the debugger of additional sources,
       * if any, via source change events, which only subsequently fetch the content directly via `sourceContent`.
       */
     def sources(): IO[List[DAPodil.Source]] =
       IO.pure(List(schema, data))

     def sourceContent(ref: DAPodil.Source.Ref): IO[Option[DAPodil.Source.Content]] =
       IO.pure(None) // no additional source content available; infoset and data position info sent as custom events

     def sourceChanges(): Stream[IO, DAPodil.Source] =
       Stream.empty

     def step(): IO[Unit] =
       control.step() *> stateSink.offer(
         Some(
           DAPodil.Debugee.State
             .Stopped(DAPodil.Debugee.State.Stopped.Reason.Step)
         )
       )

     def continue(): IO[Unit] =
       control.continue() *> stateSink.offer(Some(DAPodil.Debugee.State.Running))

     def pause(): IO[Unit] =
       control.pause() *> stateSink.offer(
         Some(
           DAPodil.Debugee.State
             .Stopped(DAPodil.Debugee.State.Stopped.Reason.Pause)
         )
       )

     def setBreakpoints(uri: URI, lines: List[DAPodil.Line]): IO[Unit] =
       breakpoints.setBreakpoints(uri, lines)

     def eval(args: EvaluateArguments): IO[Option[Types.Variable]] =
       args.expression match {
         case name => // check all variables in the given frame
           data().get.map { data =>
             for {
               frame <- data.stack.findFrame(DAPodil.Frame.Id(args.frameId))
               variable <- frame.scopes.collectFirstSome { scope =>
                 scope.variables.values.toList
                   .collectFirstSome(_.find(_.name == name))
               }
             } yield variable
           }
       }
   }

   object Debugee {

     sealed trait LaunchArgs

     object LaunchArgs {
       case class Manual(
           schemaPath: Path,
           dataPath: Path,
           stopOnEntry: Boolean,
           infosetFormat: String,
           infosetOutput: LaunchArgs.InfosetOutput,
           variables: Map[String, String],
           tunables: Map[String, String]
       ) extends Arguments
           with LaunchArgs {
         def data: IO[InputStream] =
           IO.blocking(FileUtils.readFileToByteArray(dataPath.toFile))
             .map(new ByteArrayInputStream(_))
       }

       sealed trait InfosetOutput

       object InfosetOutput {
         case object None extends InfosetOutput
         case object Console extends InfosetOutput
         case class File(path: Path) extends InfosetOutput
       }

       sealed trait TDMLConfig extends LaunchArgs
       object TDMLConfig {
         case class Generate(
             schemaPath: Path,
             dataPath: Path,
             stopOnEntry: Boolean,
             infosetFormat: String,
             infosetOutput: LaunchArgs.InfosetOutput,
             name: String,
             description: String,
             path: String,
             variables: Map[String, String],
             tunables: Map[String, String]
         ) extends TDMLConfig

         case class Append(
             schemaPath: Path,
             dataPath: Path,
             stopOnEntry: Boolean,
             infosetFormat: String,
             infosetOutput: LaunchArgs.InfosetOutput,
             name: String,
             description: String,
             path: String,
             variables: Map[String, String],
             tunables: Map[String, String]
         ) extends TDMLConfig

         case class Execute(
             stopOnEntry: Boolean,
             infosetFormat: String,
             infosetOutput: LaunchArgs.InfosetOutput,
             name: String,
             description: String,
             path: String,
             variables: Map[String, String],
             tunables: Map[String, String]
         ) extends TDMLConfig
       }

       def parse(arguments: JsonObject): EitherNel[String, LaunchArgs] =
         // Determine, based on the presence of the tdmlConfig object in the launch config, whether
         //   this is a "normal" DFDL operation or if we should attempt to parse the values from
         //   the tdmlConfig object.
         Option(arguments.getAsJsonObject("tdmlConfig")) match {
           case None             => parseManual(arguments)
           case Some(tdmlConfig) => parseTDML(arguments, tdmlConfig)
         }

       // Parse a launch config that has been found to not have a tdmlConfig object
       def parseManual(arguments: JsonObject): EitherNel[String, LaunchArgs] =
         (
           parseProgram(arguments),
           parseData(arguments),
           parseStopOnEntry(arguments),
           parseInfosetFormat(arguments),
           parseInfosetOutput(arguments),
           parseVariables(arguments),
           parseTunables(arguments)
         ).parMapN(LaunchArgs.Manual.apply)
     }

     // Parse a tdmlConfig object from the launch config
     //
     // tdmlConfig: {
     //   action: '',
     //   name: '',
     //   description: '',
     //   path: ''
     // }
     //
     // The action field is parsed first.
     // If it is a valid action ('generate' | 'append' | 'execute'), create a LaunchArgs object of the appropriate type
     // If it is 'none' or missing, create a LaunchArgs.Manual object. This will ignore any other fields in the tdmlConfig object.
     //
     // arguments:  Launch config
     // tdmlConfig: tdmlConfig object from the launch config
     def parseTDML(arguments: JsonObject, tdmlConfig: JsonObject): EitherNel[String, LaunchArgs] =
       Option(tdmlConfig.getAsJsonPrimitive("action")) match {
         case None =>
           (
             parseProgram(arguments),
             parseData(arguments),
             parseStopOnEntry(arguments),
             parseInfosetFormat(arguments),
             parseInfosetOutput(arguments),
             parseVariables(arguments),
             parseTunables(arguments)
           ).parMapN(LaunchArgs.Manual.apply)
         case Some(action) =>
           action.getAsString() match {
             case "generate" =>
               (
                 parseProgram(arguments),
                 parseData(arguments),
                 parseStopOnEntry(arguments),
                 parseInfosetFormat(arguments),
                 parseInfosetOutput(arguments, true),
                 parseTDMLName(tdmlConfig),
                 parseTDMLDescription(tdmlConfig),
                 parseTDMLPath(tdmlConfig),
                 parseVariables(arguments),
                 parseTunables(arguments)
               ).parMapN(LaunchArgs.TDMLConfig.Generate.apply)
             case "append" =>
               (
                 parseProgram(arguments),
                 parseData(arguments),
                 parseStopOnEntry(arguments),
                 parseInfosetFormat(arguments),
                 parseInfosetOutput(arguments, true),
                 parseTDMLName(tdmlConfig),
                 parseTDMLDescription(tdmlConfig),
                 parseTDMLPath(tdmlConfig),
                 parseVariables(arguments),
                 parseTunables(arguments)
               ).parMapN(LaunchArgs.TDMLConfig.Append.apply)
             case "execute" =>
               (
                 parseStopOnEntry(arguments),
                 parseInfosetFormat(arguments),
                 parseInfosetOutput(arguments),
                 parseTDMLName(tdmlConfig),
                 parseTDMLDescription(tdmlConfig),
                 parseTDMLPath(tdmlConfig),
                 parseVariables(arguments),
                 parseTunables(arguments)
               ).parMapN(LaunchArgs.TDMLConfig.Execute.apply)
             case invalidType =>
               Left(
                 s"invalid 'tdmlConfig.action': '$invalidType', must be 'generate', 'append', or 'execute'"
               ).toEitherNel
           }
       }

     // Parse the program field from the launch config
     // Returns an error if the program field is missing or is an invalid path
     // A case where the user expects a new directory to be created is not a valid path
     //   eg. /path/to/<existing>/<non-existing>/file.tdml
     //
     // arguments: Launch config
     def parseProgram(arguments: JsonObject) =
       Option(arguments.getAsJsonPrimitive("program"))
         .toRight("missing 'program' field from launch request")
         .flatMap(path =>
           Either
             .catchNonFatal(Paths.get(path.getAsString))
             .leftMap(t => s"'program' field from launch request is not a valid path: $t")
             .ensureOr(path => s"program file at $path doesn't exist")(_.toFile().exists())
         )
         .toEitherNel

     // Parse the data field from the launch config
     // Returns an error if the data field is missing or is an invalid path
     // A case where the user expects a new directory to be created is not a valid path
     //   eg. /path/to/<existing>/<non-existing>/file.tdml
     //
     // arguments: Launch config
     def parseData(arguments: JsonObject) =
       Option(arguments.getAsJsonPrimitive("data"))
         .toRight("missing 'data' field from launch request")
         .flatMap(path =>
           Either
             .catchNonFatal(Paths.get(path.getAsString))
             .leftMap(t => s"'data' field from launch request is not a valid path: $t")
             .ensureOr(path => s"data file at $path doesn't exist")(_.toFile().exists())
         )
         .toEitherNel

     // Parse the stopOnEntry field from the launch config
     // Defaults to true
     //
     // arguments: Launch config
     def parseStopOnEntry(arguments: JsonObject) =
       Option(arguments.getAsJsonPrimitive("stopOnEntry"))
         .map(_.getAsBoolean())
         .getOrElse(true)
         .asRight[String]
         .toEitherNel

     // Parse the infosetFormat field from the launch config
     // Defaults to "xml"
     //
     // arguments: Launch config
     def parseInfosetFormat(arguments: JsonObject) =
       Option(arguments.getAsJsonPrimitive("infosetFormat"))
         .map(_.getAsString())
         .getOrElse("xml")
         .asRight[String]
         .toEitherNel

     // Parse the infosetOutput object from the launch config
     //
     // infosetOutput: {
     //   type: '',
     //   path: ''
     // }
     //
     // Type must be 'none' | 'console' | 'file'
     // If type is 'file', there must be a 'path' field that contains a valid path
     //   for the resulting infoset to be written to
     // A case where the user expects a new directory to be created is not a valid path
     //   eg. /path/to/<existing>/<non-existing>/file.tdml
     //
     // arguments:   Launch config
     // requireFile: Whether or not the type field must be set to file. This is a requirement
     //              for the TDML generate operation. Returns an error if this boolean
     //              is set to True, and the type field is set to a value other than file
     def parseInfosetOutput(arguments: JsonObject, requireFile: Boolean = false) =
       Option(arguments.getAsJsonObject("infosetOutput")) match {
         case None => Right(LaunchArgs.InfosetOutput.Console).toEitherNel
         case Some(infosetOutput) =>
           Option(infosetOutput.getAsJsonPrimitive("type")) match {
             case None => Right(LaunchArgs.InfosetOutput.Console).toEitherNel
             case Some(typ) =>
               typ.getAsString() match {
                 case "none" =>
                   if (requireFile)
                     Left("'type' field in 'infosetOutput' must be set to 'file'").toEitherNel
                   else
                     Right(LaunchArgs.InfosetOutput.None).toEitherNel
                 case "console" =>
                   if (requireFile)
                     Left("'type' field in 'infosetOutput' must be set to 'file'").toEitherNel
                   else
                     Right(LaunchArgs.InfosetOutput.Console).toEitherNel
                 case "file" =>
                   Option(infosetOutput.getAsJsonPrimitive("path"))
                     .toRight("missing 'infosetOutput.path' field from launch request")
                     .flatMap(path =>
                       Either
                         .catchNonFatal(LaunchArgs.InfosetOutput.File(Paths.get(path.getAsString)))
                         .leftMap(t => s"'infosetOutput.path' field from launch request is not a valid path: $t")
                         .ensureOr(file => s"can't write to infoset output file at ${file.path}") { f =>
                           val file = f.path.toFile
                           // If an empty string is passed in, it will be set to the workspace directory by default
                           // This is inside the Java code, so we have to make sure that the TDML file we
                           //   are working with is not a directory
                           !file
                             .isDirectory() && (file.canWrite || (!file.exists && file.getParentFile.canWrite))
                         }
                     )
                     .toEitherNel
                 case invalidType =>
                   Left(
                     s"invalid 'infosetOutput.type': '$invalidType', must be 'none', 'console', or 'file'"
                   ).toEitherNel
               }
           }
       }

     // Parse the variables object from the launch config
     //
     // arguments: Launch config
     def parseVariables(arguments: JsonObject) =
       Option(arguments.getAsJsonObject("variables"))
         .map(_.getAsJsonObject().entrySet().asScala.map(kv => kv.getKey -> kv.getValue.getAsString()).toMap)
         .getOrElse(Map.empty[String, String])
         .asRight[String]
         .toEitherNel

     // Parse the tunables object from the launch config
     //
     // arguments: Launch config
     def parseTunables(arguments: JsonObject) =
       Option(arguments.getAsJsonObject("tunables"))
         .map(_.getAsJsonObject().entrySet().asScala.map(kv => kv.getKey -> kv.getValue.getAsString()).toMap)
         .getOrElse(Map.empty[String, String])
         .asRight[String]
         .toEitherNel

     // The following functions granularly parse the tdmlConfig object from the launch config
     //
     // tdmlConfig: {
     //   action: '',
     //   name: '',
     //   description: '',
     //   path: ''
     // }
     //
     // The action field is parsed elsewhere. If these functions are hit, a valid action
     //   other than 'none' was found.

     // Parse the  name field from the tdmlConfig object from the launch config
     // Returns an error if the field is missing or is an empty string
     //
     // tdmlConfig: tdmlConfig object from the launch config
     def parseTDMLName(tdmlConfig: JsonObject) =
       Option(tdmlConfig.getAsJsonPrimitive("name"))
         .toRight("missing 'tdmlConfig.name' field from launch request")
         .map(_.getAsString())
         .flatMap(name => Either.cond(name.length() > 0, name, "'name' field from 'tdmlConfig' object cannot be empty"))
         .toEitherNel

     // Parse the description field from the tdmlConfig object from the launch config
     // Returns an error if the field is missing or is an empty string
     //
     // tdmlConfig: tdmlConfig object from the launch config
     def parseTDMLDescription(tdmlConfig: JsonObject) =
       Option(tdmlConfig.getAsJsonPrimitive("description"))
         .toRight("missing 'tdmlConfig.description' field from launch request")
         .map(_.getAsString())
         .flatMap(description =>
           Either
             .cond(description.length() > 0, description, "'description' field from 'tdmlConfig' object cannot be empty")
         )
         .toEitherNel

     // Parse the path field from the tdmlConfig object from the launch config
     // Returns an error if the field is missing or is an invalid path
     // A case where the user expects a new directory to be created is not a valid path
     //   eg. /path/to/<existing>/<non-existing>/file.tdml
     //
     // tdmlConfig: tdmlConfig object from the launch config
     def parseTDMLPath(tdmlConfig: JsonObject) =
       Option(tdmlConfig.getAsJsonPrimitive("path"))
         .toRight("missing 'tdmlConfig.path' field from launch request")
         .flatMap(path =>
           Either
             .catchNonFatal(Paths.get(path.getAsString).toFile().getAbsolutePath())
             .leftMap(t => s"'infosetOutput.path' field from launch request is not a valid path: $t")
             .ensureOr(file => s"can't write to infoset output file at ${file}") { f =>
               val file = Paths.get(f).toFile()
               // If an empty string is passed in, it will be set to the workspace directory by default
               // This is inside the Java code, so we have to make sure that the TDML file we
               //   are working with is not a directory
               !file
                 .isDirectory() && (file.canWrite || (!file.exists && file.getParentFile.canWrite))
             }
         )
         .toEitherNel
   }

   val infosetSource =
     DAPodil.Source(Paths.get("infoset"), Some(DAPodil.Source.Ref(1)))
   val dataDumpSource =
     DAPodil.Source(Paths.get("data"), Some(DAPodil.Source.Ref(2)))

   def debugee(request: Request): EitherNel[String, Resource[IO, DAPodil.Debugee]] =
     Debugee.LaunchArgs.parse(request.arguments).map {
       case args: Debugee.LaunchArgs.Manual => debugee(args)
       case Debugee.LaunchArgs.TDMLConfig
             .Generate(
               schemaPath,
               dataPath,
               stopOnEntry,
               infosetFormat,
               infosetOutput,
               name,
               description,
               tdmlPath,
               variables,
               tunables
             ) =>
         // Create a LaunchArgs.Manual, run the debugee with it, and then generate the TDML file
         debugee(
           Debugee.LaunchArgs
             .Manual(schemaPath, dataPath, stopOnEntry, infosetFormat, infosetOutput, variables, tunables)
         ).onFinalize(
           infosetOutput match {
             case Debugee.LaunchArgs.InfosetOutput.File(path) =>
               IO(TDML.generate(path, schemaPath, dataPath, name, description, tdmlPath))
             case _ =>
               // This case should never be hit. Validation is being done on launch config prior to
               //   this section of code attempting to run a DFDL operation. If the user is trying to
               //   generate a TDML file and an infosetOutput type of 'none' | 'console' is selected,
               //   an error will be displayed, and the execution will be aborted, before the DFDL operation begins.
               IO.unit
           }
         )
       case Debugee.LaunchArgs.TDMLConfig
             .Append(
               schemaPath,
               dataPath,
               stopOnEntry,
               infosetFormat,
               infosetOutput,
               name,
               description,
               tdmlPath,
               variables,
               tunables
             ) =>
         // Create a LaunchArgs.Manual, run the debugee with it, and then append to the existing TDML file
         debugee(
           Debugee.LaunchArgs
             .Manual(schemaPath, dataPath, stopOnEntry, infosetFormat, infosetOutput, variables, tunables)
         ).onFinalize(
           infosetOutput match {
             case Debugee.LaunchArgs.InfosetOutput.File(path) =>
               IO(TDML.append(path, schemaPath, dataPath, name, description, tdmlPath))
             case _ =>
               // This case should never be hit. Validation is being done on launch config prior to
               //   this section of code attempting to run a DFDL operation. If the user is trying to
               //   append to a TDML file and an infosetOutput type of 'none' | 'console' is selected,
               //   an error will be displayed, and the execution will be aborted, before the DFDL operation begins.
               IO.unit
           }
         )
       case Debugee.LaunchArgs.TDMLConfig
             .Execute(stopOnEntry, infosetFormat, infosetOutput, name, description, tdmlPath, variables, tunables) =>
         // From a TDML file, create a LaunchArgs.Manual from the named test, run the debugee with it
         Resource.eval(IO(TDML.execute(name, description, tdmlPath))).flatMap {
           case None =>
             Resource.raiseError[IO, Debugee, Throwable](
               new RuntimeException(s"couldn't execute TDML with name $name, description $description, path $tdmlPath")
             )
           case Some((schemaPath, dataPath)) =>
             debugee(
               Debugee.LaunchArgs
                 .Manual(schemaPath, dataPath, stopOnEntry, infosetFormat, infosetOutput, variables, tunables)
             )
         }
     }

   def debugee(args: Debugee.LaunchArgs.Manual): Resource[IO, DAPodil.Debugee] =
     for {
       data <- Resource.eval(Queue.bounded[IO, Option[DAPodil.Data]](10))
       state <- Resource.eval(Queue.bounded[IO, Option[DAPodil.Debugee.State]](10))
       dapEvents <- Resource.eval(Queue.bounded[IO, Option[Events.DebugEvent]](10))
       breakpoints <- Resource.eval(Breakpoints())
       infoset <- Resource.eval(
         Queue.bounded[IO, Option[String]](10)
       ) // TODO: it's a bit incongruous to have a separate channel for infoset changes, vs. streaming Parse.Event values
       control <- Resource.eval(Control.stopped())

       latestData <- Stream
         .fromQueueNoneTerminated(data)
         .holdResource(DAPodil.Data.empty)

       latestInfoset <- Resource.eval(SignallingRef[IO, String](""))
       infosetChanges = Stream
         .fromQueueNoneTerminated(infoset)
         .evalTap(latestInfoset.set)
         .evalTap(content =>
           dapEvents.offer(
             Some(
               InfosetEvent(
                 content,
                 args.infosetFormat match {
                   case "xml"  => "text/xml"
                   case "json" => "application/json"
                 }
               )
             )
           )
         )
         .onFinalizeCase(ec => Logger[IO].debug(s"infosetChanges (orig): $ec"))

       events <- Resource.eval(Queue.bounded[IO, Option[Event]](10))
       debugger <- DaffodilDebugger
         .resource(state, events, breakpoints, control, infoset, args.infosetFormat)
       parse <- Resource.eval(
         args.data.flatMap(in => Parse(args.schemaPath, in, debugger, args.infosetFormat, args.variables, args.tunables))
       )

       parsing = args.infosetOutput match {
         case Debugee.LaunchArgs.InfosetOutput.None =>
           parse.run().drain
         case Debugee.LaunchArgs.InfosetOutput.Console =>
           parse
             .run()
             .through(text.utf8.decode)
             .foldMonoid
             .evalTap(_ => Logger[IO].debug("done collecting infoset XML output"))
             .map(infosetXML => Some(Events.OutputEvent.createConsoleOutput(infosetXML)))
             .enqueueUnterminated(dapEvents) // later handling will terminate dapEvents
         case Debugee.LaunchArgs.InfosetOutput.File(path) =>
           parse
             .run()
             .through(fs2.io.file.Files[IO].writeAll(fs2.io.file.Path.fromNioPath(path)))
       }

       nextFrameId <- Resource.eval(
         Next.int.map(_.map(DAPodil.Frame.Id.apply)).flatTap(_.next())
       ) // `.flatTap(_.next())`: ids start at 1
       nextRef <- Resource.eval(
         Next.int.map(_.map(DAPodil.VariablesReference.apply)).flatTap(_.next())
       ) // `.flatTap(_.next())`: ids start at 1

       // convert Parse.Event values to DAPodil.Data values
       deliverParseData = Stream
         .fromQueueNoneTerminated(events)
         .evalTap {
           case start: Event.StartElement if start.isStepping =>
             dapEvents.offer(Some(DataEvent(start.state.currentLocation.bytePos1b)))
           case _ => IO.unit
         }
         .through(fromParse(nextFrameId, nextRef))
         .enqueueNoneTerminated(data)

       debugee = new Debugee(
         DAPodil.Source(args.schemaPath, None),
         DAPodil.Source(args.dataPath, None),
         latestData,
         Stream.fromQueueNoneTerminated(state),
         state,
         Stream.fromQueueNoneTerminated(dapEvents),
         breakpoints,
         control
       )

       startup = dapEvents.offer(Some(ConfigEvent(args))) *>
         (if (args.stopOnEntry)
            control.step() *> state.offer(
              Some(
                DAPodil.Debugee.State
                  .Stopped(DAPodil.Debugee.State.Stopped.Reason.Entry)
              )
            ) // don't use debugee.step as we need to send Stopped.Reason.Entry, not Stopped.Reason.Step
          else debugee.continue())

       _ <- Stream
         .emit(debugee)
         .concurrently(
           Stream(
             Stream.eval(startup),
             parsing.onFinalizeCase(ec => Logger[IO].debug(s"parsing: $ec")),
             deliverParseData.onFinalizeCase {
               case ec @ Resource.ExitCase.Errored(e) =>
                 Logger[IO].warn(e)(s"deliverParseData: $ec")
               case ec => Logger[IO].debug(s"deliverParseData: $ec")
             } ++ Stream.eval(
               dapEvents.offer(None) // ensure dapEvents is terminated when the parse is terminated
             ),
             infosetChanges
           ).parJoinUnbounded
         )
         .compile
         .resource
         .lastOrError

       _ <- Resource.onFinalize(Logger[IO].debug("signalling a stop") *> parse.close())
     } yield debugee

   /** Translate parse events to updated Daffodil state. */
   def fromParse(
       frameIds: Next[DAPodil.Frame.Id],
       variableRefs: Next[DAPodil.VariablesReference]
   ): Stream[IO, Parse.Event] => Stream[IO, DAPodil.Data] =
     events =>
       events.evalScan(DAPodil.Data.empty) {
         case (_, Parse.Event.Init(_)) => IO.pure(DAPodil.Data.empty)
         case (prev, startElement: Parse.Event.StartElement) =>
           createFrame(startElement, frameIds, variableRefs).map(prev.push)
         case (prev, Parse.Event.EndElement(_)) => IO.pure(prev.pop)
         case (prev, _: Parse.Event.Fini.type)  => IO.pure(prev)
       }

   /** Transform Daffodil state to a DAP stack frame.
     *
     * @see
     *   https://microsoft.github.io/debug-adapter-protocol/specification#Types_StackFrame
     */
   def createFrame(
       startElement: Parse.Event.StartElement,
       frameIds: Next[DAPodil.Frame.Id],
       variableRefs: Next[DAPodil.VariablesReference]
   ): IO[DAPodil.Frame] =
     for {
       ids <- (frameIds.next, variableRefs.next, variableRefs.next, variableRefs.next).tupled
       (frameId, parseScopeId, schemaScopeId, dataScopeId) = ids

       stackFrame = new Types.StackFrame(
         /* It must be unique across all threads.
          * This id can be used to retrieve the scopes of the frame with the
          * 'scopesRequest' or to restart the execution of a stackframe.
          */
         frameId.value,
         startElement.name.map(_.value).getOrElse("???"),
         /* If sourceReference > 0 the contents of the source must be retrieved through
          * the SourceRequest (even if a path is specified). */
         Try(
           Paths
             .get(URI.create(startElement.schemaLocation.uriString))
             .toString()
         )
           .fold(
             _ =>
               new Types.Source(
                 startElement.schemaLocation.uriString,
                 null,
                 0
               ), // there is no valid path if the location is a schema contained in a jar file; see #76.
             path => new Types.Source(path, 0)
           ),
         startElement.schemaLocation.lineNumber
           .map(_.toInt)
           .getOrElse(1), // line numbers start at 1 according to InitializeRequest
         0 // column numbers start at 1 according to InitializeRequest, but set to 0 to ignore it; column calculation by Daffodil uses 1 tab = 2 spaces(?), but breakpoints use 1 character per tab
       )

       schemaScope <- schemaScope(schemaScopeId, startElement.state, variableRefs)
       parseScope <- parseScope(parseScopeId, startElement, variableRefs)
     } yield DAPodil.Frame(
       frameId,
       stackFrame,
       List(
         parseScope,
         schemaScope,
         dataScope(dataScopeId, startElement.state)
       )
     )

   def parseScope(
       ref: DAPodil.VariablesReference,
       event: Event.StartElement,
       refs: Next[DAPodil.VariablesReference]
   ): IO[DAPodil.Frame.Scope] =
     (refs.next, refs.next, refs.next, refs.next, refs.next).mapN {
       (pouRef, delimiterStackRef, diagnosticsRef, diagnosticsValidationsRef, diagnosticsErrorsRef) =>
         val hidden = event.state.withinHiddenNest
         val childIndex = if (event.state.childPos != -1) Some(event.state.childPos) else None
         val groupIndex = if (event.state.groupPos != -1) Some(event.state.groupPos) else None
         val occursIndex = if (event.state.arrayPos != -1) Some(event.state.arrayPos) else None
         val foundDelimiter = for {
           dpr <- event.state.delimitedParseResult.toScalaOption
           dv <- dpr.matchedDelimiterValue.toScalaOption
         } yield Misc.remapStringToVisibleGlyphs(dv)
         val foundField = for {
           dpr <- event.state.delimitedParseResult.toScalaOption
           f <- dpr.field.toScalaOption
         } yield Misc.remapStringToVisibleGlyphs(f)

         val pouRootVariable =
           new Types.Variable("Points of uncertainty", "", null, pouRef.value, null)
         val pouVariables =
           event.pointsOfUncertainty.map(pou => new Types.Variable(s"${pou.name.value}", s"${pou.context}"))

         val delimiterStackRootVariable =
           new Types.Variable("Delimiters", "", null, delimiterStackRef.value, null)
         val delimiterStackVariables =
           event.delimiterStack.map(delimiter =>
             new Types.Variable(s"${delimiter.kind}:${delimiter.value.delimType}", s"${delimiter.value.lookingFor}")
           )

         val diagnosticsRootVariable =
           new Types.Variable("Diagnostics", "", null, diagnosticsRef.value, null)
         val diagnosticsValidationsVariable =
           new Types.Variable("Validations", "", null, diagnosticsValidationsRef.value, null)
         val diagnosticsErrorsVariable =
           new Types.Variable("Errors", "", null, diagnosticsErrorsRef.value, null)
         // TODO: Get better values than toString() when https://issues.apache.org/jira/browse/DAFFODIL-1200 is completed.
         val diagnosticsValidations: List[Types.Variable] =
           event.diagnostics.filter(_.isValidation).zipWithIndex.map { case (diag, i) =>
             new Types.Variable(i.toString, diag.toString().replace("Validation Error: ", ""))
           }
         val diagnosticsErrors: List[Types.Variable] =
           event.diagnostics.filterNot(_.isValidation).zipWithIndex.map { case (diag, i) =>
             new Types.Variable(i.toString, diag.toString())
           }

         val parseVariables: List[Types.Variable] =
           (List(
             new Types.Variable("hidden", hidden.toString, "bool", 0, null),
             pouRootVariable,
             delimiterStackRootVariable,
             diagnosticsRootVariable
           ) ++ childIndex.map(ci => new Types.Variable("childIndex", ci.toString)).toList
             ++ groupIndex
               .map(gi => new Types.Variable("groupIndex", gi.toString))
               .toList
             ++ occursIndex
               .map(oi => new Types.Variable("occursIndex", oi.toString))
               .toList
             ++ foundDelimiter.map(fd => new Types.Variable("foundDelimiter", fd)).toList
             ++ foundField.map(ff => new Types.Variable("foundField", ff)).toList)
             .sortBy(_.name)

         DAPodil.Frame.Scope(
           "Parse",
           ref,
           Map(
             ref -> parseVariables,
             pouRef -> pouVariables,
             delimiterStackRef -> delimiterStackVariables,
             diagnosticsRef -> List(diagnosticsValidationsVariable, diagnosticsErrorsVariable),
             diagnosticsValidationsRef -> diagnosticsValidations,
             diagnosticsErrorsRef -> diagnosticsErrors
           )
         )
     }

   // a.k.a. Daffodil variables
   def schemaScope(
       scopeRef: DAPodil.VariablesReference,
       state: StateForDebugger,
       refs: Next[DAPodil.VariablesReference]
   ): IO[DAPodil.Frame.Scope] =
     state.variableMapForDebugger.qnames.toList
       .groupBy(_.namespace) // TODO: handle NoNamespace or UnspecifiedNamespace as top-level?
       .toList
       .flatTraverse { case (ns, vs) =>
         // every namespace is a DAP variable in the current scope, and links to its set of Daffodil-as-DAP variables
         refs.next.map { ref =>
           List(scopeRef -> List(new Types.Variable(ns.toString(), "", null, ref.value, null))) ++
             List(
               ref -> vs
                 .sortBy(_.toPrettyString)
                 .fproduct(state.variableMapForDebugger.find)
                 .map { case (name, value) =>
                   new Types.Variable(
                     name.toQNameString,
                     value
                       .flatMap(v => Option(v.value.value).map(_.toString) orElse Some("null"))
                       .getOrElse("???"),
                     value
                       .map(_.state match {
                         case VariableDefined      => "default"
                         case VariableRead         => "read"
                         case VariableSet          => "set"
                         case VariableUndefined    => "undefined"
                         case VariableBeingDefined => "being defined"
                         case VariableInProcess    => "in process"
                       })
                       .getOrElse("???"),
                     0,
                     null
                   )
                 }
             )
         }
       }
       .map { refVars =>
         val sv = refVars.foldMap(Map(_)) // combine values of map to accumulate namespaces
         DAPodil.Frame.Scope(
           "Schema",
           scopeRef,
           sv
         )
       }

   def dataScope(ref: DAPodil.VariablesReference, state: StateForDebugger): DAPodil.Frame.Scope = {
     val bytePos1b = state.currentLocation.bytePos1b
     val dataVariables: List[Types.Variable] =
       List(new Types.Variable("bytePos1b", bytePos1b.toString, "number", 0, null))

     DAPodil.Frame.Scope(
       "Data",
       ref,
       Map(ref -> dataVariables)
     )
   }

   /** An algebraic data type that reifies the Daffodil `Debugger` callbacks. */
   sealed trait Event

   object Event {
     case class Init(state: StateForDebugger) extends Event
     case class StartElement(
         state: StateForDebugger,
         isStepping: Boolean,
         name: Option[ElementName],
         schemaLocation: SchemaFileLocation,
         pointsOfUncertainty: List[PointOfUncertainty],
         delimiterStack: List[Delimiter],
         diagnostics: List[Diagnostic]
     ) extends Event {
       // PState is mutable, so we copy all the information we might need downstream.
       def this(pstate: PState, isStepping: Boolean) =
         this(
           pstate.copyStateForDebugger,
           isStepping,
           pstate.currentNode.toScalaOption.map(element => ElementName(element.name)),
           pstate.schemaFileLocation,
           pstate.pointsOfUncertainty.iterator.toList.map(mark =>
             PointOfUncertainty(
               Option(mark.disMark).as(mark.bitPos0b),
               mark.context.schemaFileLocation,
               ElementName(mark.element.name),
               mark.context.toString()
             )
           ),
           pstate.mpstate.delimiters.toList.zipWithIndex.map { case (delimiter, i) =>
             Delimiter(if (i < pstate.mpstate.delimitersLocalIndexStack.top) "remote" else "local", delimiter)
           },
           pstate.diagnostics
         )
     }
     case class EndElement(state: StateForDebugger) extends Event
     case object Fini extends Event

     implicit val show: Show[Event] = Show.fromToString
   }

   case class ElementName(value: String) extends AnyVal

   case class PointOfUncertainty(
       bitPosition: Option[Long],
       location: SchemaFileLocation,
       name: ElementName,
       context: String
   )

   case class Delimiter(kind: String, value: DFADelimiter)

   trait Breakpoints {
     def setBreakpoints(uri: URI, lines: List[DAPodil.Line]): IO[Unit]
     def shouldBreak(location: DAPodil.Location): IO[Boolean]
   }

   object Breakpoints {
     def apply(): IO[Breakpoints] =
       for {
         breakpoints <- Ref[IO].of(DAPodil.Breakpoints.empty)
       } yield new Breakpoints {
         def setBreakpoints(uri: URI, lines: List[DAPodil.Line]): IO[Unit] =
           breakpoints.update(bp => bp.set(uri, lines))

         def shouldBreak(location: DAPodil.Location): IO[Boolean] =
           for {
             bp <- breakpoints.get
           } yield bp.contains(location)
       }
   }

   case class ConfigEvent(launchArgs: ConfigEvent.LaunchArgs, buildInfo: ConfigEvent.BuildInfo)
       extends Events.DebugEvent("daffodil.config")
   object ConfigEvent {
     sealed trait LaunchArgs
     object LaunchArgs {
       case class Manual(
           schemaPath: String,
           dataPath: String,
           stopOnEntry: Boolean,
           infosetFormat: String,
           infosetOutput: InfosetOutput,
           variables: Map[String, String],
           tunables: Map[String, String]
       ) extends LaunchArgs
       case class TDMLConfig(action: String, name: String, description: String, path: String) extends LaunchArgs

       object TDMLConfig {
         def apply(that: Debugee.LaunchArgs.TDMLConfig): TDMLConfig =
           that match {
             case Debugee.LaunchArgs.TDMLConfig.Generate(_, _, _, _, _, name, description, path, _, _) =>
               TDMLConfig("generate", name, description, path)
             case Debugee.LaunchArgs.TDMLConfig.Append(_, _, _, _, _, name, description, path, _, _) =>
               TDMLConfig("append", name, description, path)
             case Debugee.LaunchArgs.TDMLConfig.Execute(_, _, _, name, description, path, _, _) =>
               TDMLConfig("execute", name, description, path)
           }
       }
     }

     sealed trait InfosetOutput {
       val `type`: String =
         this match {
           case InfosetOutput.None    => "none"
           case InfosetOutput.Console => "console"
           case InfosetOutput.File(_) => "file"
         }
     }
     object InfosetOutput {
       case object None extends InfosetOutput
       case object Console extends InfosetOutput
       case class File(path: String) extends InfosetOutput

       def apply(that: Debugee.LaunchArgs.InfosetOutput): InfosetOutput =
         that match {
           case Debugee.LaunchArgs.InfosetOutput.None    => None
           case Debugee.LaunchArgs.InfosetOutput.Console => Console
           case Debugee.LaunchArgs.InfosetOutput.File(path) =>
             File(path.toString)
         }
     }

     case class BuildInfo(version: String, daffodilVersion: String, scalaVersion: String)

     def apply(launchArgs: Debugee.LaunchArgs): ConfigEvent =
       ConfigEvent(
         launchArgs match {
           case Debugee.LaunchArgs
                 .Manual(schemaPath, dataPath, stopOnEntry, infosetFormat, infosetOutput, variables, tunables) =>
             ConfigEvent.LaunchArgs.Manual(
               schemaPath.toString,
               dataPath.toString(),
               stopOnEntry,
               infosetFormat,
               InfosetOutput(infosetOutput),
               variables,
               tunables
             )
           case tdmlConfig: Debugee.LaunchArgs.TDMLConfig =>
             ConfigEvent.LaunchArgs.TDMLConfig(tdmlConfig)
         },
         BuildInfo(
           DAPBuildInfo.version,
           DAPBuildInfo.daffodilVersion,
           DAPBuildInfo.scalaVersion
         )
       )
   }
   case class DataEvent(bytePos1b: Long) extends Events.DebugEvent("daffodil.data")
   case class InfosetEvent(content: String, mimeType: String) extends Events.DebugEvent("daffodil.infoset")

   /** Behavior of a stepping debugger that can be running or stopped. */
   sealed trait Control {

     /** Blocks if the current state is stopped, unblocking when a `step` or `continue` happens. Returns true if blocking
       * happened, false otherwise.
       */
     def await(): IO[Boolean]

     /** Start running. */
     def continue(): IO[Unit]

     /** If stopped, advance one "step", remaining stopped.
       *
       * IMPORTANT: Shouldn't return until any work blocked by an `await` completes, otherwise the update that was
       * waiting will race with the code that sees the `step` complete.
       */
     def step(): IO[Unit]

     /** Stop running. */
     def pause(): IO[Unit]
   }

   object Control {
     implicit val logger: Logger[IO] = Slf4jLogger.getLogger

     sealed trait State
     case class AwaitingFirstAwait(waiterArrived: Deferred[IO, Unit]) extends State
     case object Running extends State
     case class Stopped(whenContinued: Deferred[IO, Unit], nextAwaitStarted: Deferred[IO, Unit]) extends State

     /** Create a control initialized to the `Stopped` state. */
     def stopped(): IO[Control] =
       for {
         waiterArrived <- Deferred[IO, Unit]
         state <- Ref[IO].of[State](AwaitingFirstAwait(waiterArrived))
       } yield new Control {
         def await(): IO[Boolean] =
           for {
             nextContinue <- Deferred[IO, Unit]
             nextAwaitStarted <- Deferred[IO, Unit]
             awaited <- state.modify {
               case AwaitingFirstAwait(waiterArrived) =>
                 Stopped(nextContinue, nextAwaitStarted) -> waiterArrived
                   .complete(()) *> nextContinue.get.as(true)
               case Running => Running -> IO.pure(false)
               case s @ Stopped(whenContinued, nextAwaitStarted) =>
                 s -> nextAwaitStarted.complete(()) *> // signal next await happened
                   whenContinued.get.as(true) // block
             }.flatten
           } yield awaited

         def step(): IO[Unit] =
           for {
             nextContinue <- Deferred[IO, Unit]
             nextAwaitStarted <- Deferred[IO, Unit]
             _ <- state.modify {
               case s @ AwaitingFirstAwait(waiterArrived) =>
                 s -> waiterArrived.get *> step
               case Running => Running -> IO.unit
               case Stopped(whenContinued, _) =>
                 Stopped(nextContinue, nextAwaitStarted) -> (
                   whenContinued.complete(()) *> // wake up await-ers
                     nextAwaitStarted.get // block until next await is invoked
                 )
             }.flatten
           } yield ()

         def continue(): IO[Unit] =
           state.modify {
             case s @ AwaitingFirstAwait(waiterArrived) =>
               s -> waiterArrived.get *> continue
             case Running => Running -> IO.unit
             case Stopped(whenContinued, _) =>
               Running -> whenContinued.complete(()).void // wake up await-ers
           }.flatten

         def pause(): IO[Unit] =
           for {
             nextContinue <- Deferred[IO, Unit]
             nextAwaitStarted <- Deferred[IO, Unit]
             _ <- state.update {
               case Running               => Stopped(nextContinue, nextAwaitStarted)
               case s: AwaitingFirstAwait => s
               case s: Stopped            => s
             }
           } yield ()
       }
   }

   /** The Daffodil `Debugger` interface is asynchronously invoked from a running parse, and always returns `Unit`. In
     * order to invoke effects like `IO` but return `Unit`, we use a `Dispatcher` to execute the effects at this
     * "outermost" layer (with respect to the effects).
     */
   class DaffodilDebugger(
       dispatcher: Dispatcher[IO],
       state: QueueSink[IO, Option[DAPodil.Debugee.State]],
       breakpoints: Breakpoints,
       control: Control,
       events: QueueSink[IO, Option[Event]],
       infoset: QueueSink[IO, Option[String]],
       infosetFormat: String
   ) extends Debugger {
     implicit val logger: Logger[IO] = Slf4jLogger.getLogger

     override def init(pstate: PState, processor: Parser): Unit =
       dispatcher.unsafeRunSync {
         events.offer(Some(Event.Init(pstate.copyStateForDebugger)))
       }

     override def fini(processor: Parser): Unit =
       dispatcher.unsafeRunSync {
         events.offer(Some(Event.Fini)) *>
           events.offer(None) *> // no more events after this
           state.offer(None) *> // no more states == the parse is terminated
           infoset.offer(None) *>
           Logger[IO].debug("infoset queue completed")
       }

     override def startElement(pstate: PState, processor: Parser): Unit =
       dispatcher.unsafeRunSync {
         // Generating the infoset requires a PState, not a StateForDebugger, so we can't generate it later from the Event.StartElement (which contains the StateForDebugger).
         lazy val setInfoset = {
           var node = pstate.infoset
           while (node.diParent != null) node = node.diParent
           node match {
             case d: DIDocument if d.contents.size == 0 => IO.unit
             case _                                     => infoset.offer(Some(infosetToString(node)))
           }
         }

         logger.debug("pre-control await") *>
           // may block until external control says to unblock, for stepping behavior
           control.await
             .ifM(
               events.offer(Some(new Event.StartElement(pstate, isStepping = true))) *> setInfoset,
               events.offer(Some(new Event.StartElement(pstate, isStepping = false)))
             ) *>
           logger.debug("post-control await") *>
           logger.debug("pre-checkBreakpoints") *>
           // TODO: there's a race between the readers of `events` and `state`, so somebody could react to a hit breakpoint before the event data was committed
           checkBreakpoints(createLocation(pstate.schemaFileLocation))
       }

     def infosetToString(ie: InfosetElement): String = {
       val bos = new java.io.ByteArrayOutputStream()
       val infosetOutputter = infosetFormat match {
         case "xml"  => new XMLTextInfosetOutputter(bos, true)
         case "json" => new JsonInfosetOutputter(bos, true)
       }

       val iw = InfosetWalker(
         ie.asInstanceOf[DIElement],
         infosetOutputter,
         walkHidden = false,
         ignoreBlocks = true,
         releaseUnneededInfoset = false
       )
       iw.walk(lastWalk = true)
       bos.toString("UTF-8")
     }

     /** If the current location is a breakpoint, pause the control and update the state to notify the breakpoint was
       * hit.
       */
     def checkBreakpoints(location: DAPodil.Location): IO[Unit] =
       breakpoints
         .shouldBreak(location)
         .ifM(
           control.pause() *>
             state
               .offer(
                 Some(
                   DAPodil.Debugee.State.Stopped(
                     DAPodil.Debugee.State.Stopped.Reason
                       .BreakpointHit(location)
                   )
                 )
               ),
           IO.unit
         )

     def createLocation(loc: SchemaFileLocation): DAPodil.Location =
       DAPodil.Location(
         URI.create(loc.uriString).normalize,
         DAPodil.Line(loc.lineNumber.map(_.toInt).getOrElse(0))
       )

     override def endElement(pstate: PState, processor: Parser): Unit =
       dispatcher.unsafeRunSync {
         control.await *> // ensure no events while debugger is paused
           events.offer(Some(Event.EndElement(pstate.copyStateForDebugger)))
       }
   }

   object DaffodilDebugger {
     def resource(
         state: QueueSink[IO, Option[DAPodil.Debugee.State]],
         events: QueueSink[IO, Option[Event]],
         breakpoints: Breakpoints,
         control: Control,
         infoset: QueueSink[IO, Option[String]],
         infosetFormat: String
     ): Resource[IO, DaffodilDebugger] =
       for {
         dispatcher <- Dispatcher[IO]
       } yield new DaffodilDebugger(
         dispatcher,
         state,
         breakpoints,
         control,
         events,
         infoset,
         infosetFormat
       )
   }
 }