daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/processors/DFDLJavaIOStreamDecoder.scala - daffodil - Git at Google

 package edu.illinois.ncsa.daffodil.processors

 /* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
  *
  * Developed by: Tresys Technology, LLC
  *               http://www.tresys.com
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy of
  * this software and associated documentation files (the "Software"), to deal with
  * the Software without restriction, including without limitation the rights to
  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
  *
  *  1. Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimers.
  *
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimers in the
  *     documentation and/or other materials provided with the distribution.
  *
  *  3. Neither the names of Tresys Technology, nor the names of its contributors
  *     may be used to endorse or promote products derived from this Software
  *     without specific prior written permission.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
  * SOFTWARE.
  */

 import java.nio.CharBuffer
 import java.io.FileInputStream
 import java.nio.charset.Charset
 import sun.nio.cs.HistoricallyNamedCharset
 import java.nio.channels.ReadableByteChannel
 import java.nio.channels.Channels
 import java.io.IOException
 import java.nio.channels.FileChannel
 import java.nio.charset.CharsetDecoder
 import edu.illinois.ncsa.daffodil.exceptions.Assert
 import java.nio.charset.CodingErrorAction
 import java.nio.ByteBuffer
 import java.io.InputStream
 import scala.util.control.Breaks._
 import java.nio.charset.CoderResult
 import edu.illinois.ncsa.daffodil.processors.charset.USASCII7BitPackedCharset
 import edu.illinois.ncsa.daffodil.processors.charset.CharsetUtils
 import edu.illinois.ncsa.daffodil.processors.charset.SupportsInitialBitOffset
 import edu.illinois.ncsa.daffodil.compiler.Compiler
 import java.nio.charset.MalformedInputException
 import edu.illinois.ncsa.daffodil.processors.charset.CharacterSetAlignmentError

 /**
  * The purpose of re-implementing this class is to gain control over
  * how the StreamDecoder handles malformed input.  In DFDL we want the
  * malformed input error to be treated as the end of data.  Java's
  * StreamDecoder only ignores, replaces or treats it as an error.
  *
  * Mostly this class tries to remain true to the Java code from which it was derived
  * so as to preserve future inter-operation potential. However, some modifications
  * (for bit-level positioning) make that a lot less likely.
  */
 object DFDLJavaIOStreamDecoder {

   private val DEFAULT_BYTE_BUFFER_SIZE: Int = Compiler.readerByteBufferSize.toInt

   def forInputStreamReader(in: InputStream, charset: Charset, bitOffset0to7: Int, bitLimit: Long): DFDLJavaIOStreamDecoder = {

     val myBB = ByteBuffer.allocateDirect(DFDLJavaIOStreamDecoder.DEFAULT_BYTE_BUFFER_SIZE)
     myBB.flip()

     val chan = in match {
       case fis: FileInputStream => fis.getChannel()
       case _ => Channels.newChannel(in)
     }

     new DFDLJavaIOStreamDecoder(bitOffset0to7, bitLimit, charset, myBB, in, chan)
   }

 }

 /**
  * The purpose of re-implementing this class is to gain control over
  * how the StreamDecoder handles malformed input.  In DFDL we want the
  * malformed input error to be treated as the end of data.  Java's
  * StreamDecoder only ignores, replaces or treats it as an error.
  *
  * Forces the decoder to REPORT on malformed input.
  *
  * We also have to implment the upper-bound aka bitLimit, and not
  * allow consumption of data past it.
  */
 class DFDLJavaIOStreamDecoder private (bitOffsetWithinAByte: Int, val bitLimit: Long, var cs: Charset, var bb: ByteBuffer,
   var in: InputStream, var ch: ReadableByteChannel)
   extends java.io.Reader {

   Assert.usage(ch != null)
   Assert.usage(bitOffsetWithinAByte >= 0)
   Assert.usage(bitOffsetWithinAByte <= 7)

   val decoder = cs.newDecoder().onMalformedInput(CodingErrorAction.REPORT)

   //
   // Now we deal with bit positioning. If the decoder is capable of dealing with a starting bit offset
   // then we configure it that way. If not we insist we are byte aligned and fail if not.
   //
   decoder match {
     case decoderWithBits: SupportsInitialBitOffset =>
       decoderWithBits.setInitialBitOffset(bitOffsetWithinAByte)
     case _ if (bitOffsetWithinAByte == 0) => // ok. Do nothing. We are luckily, aligned to a byte.
     // We're counting on the parser surrounding us to catch this error and turn it into
     // a ParseError.
     case _ => throw new CharacterSetAlignmentError(cs.name, 8, bitOffsetWithinAByte)
   }

   @volatile
   private var isOpen: Boolean = true
   def ensureOpen = { if (!isOpen) throw new java.io.IOException("Stream closed") }

   private var haveLeftoverChar: Boolean = false
   private var leftoverChar: Char = 0

   def getEncoding: String = {
     if (isOpen) { return encodingName }
     return null
   }

   override def read: Int = {
     return read0
   }

   // Leftover char is needed because of utf-16 and the surrogate pair stuff.
   // In that encoding, we need to read two codepoints, and if they are a surrogate pair
   // synthesize one character from them.
   //
   // Presumably this happens in the decoder, but in the case where it isn't a surrogate
   // pair, we save work by returning two decoded characters, not one since we've already
   // decoded the second one. (This little optimization seems very not worth it to me, but
   // is necessary if you can't peek forward 2 bytes into the underlying bytes without
   // consuming them.)
   //
   // Issue is that UTF-16 is really a variable-width 2-byte or 4-byte character encoding. But
   // In the early days of unicode and java, it was treated as a fixed width 2-byte.
   //
   // Hence, in DFDL we provide control.
   // This depends on dfdl:utf16Width="variable". If dfdl:utf16Width="fixed", then
   // we do NOT process surrogate pairs, and return them each as a separate codepoint.
   //
   // So at some point our decoder must implement utf16Width="fixed" which means it must
   // not do this surrogate pair processing. That means either we find a feature for that
   // in CharsetDecoder (it may be there as this is a common need), or we'll have to clone
   // and reimplement that class too.
   //
   // TBD: implement utf16Width="variable". For now we can SDE on utf16Width="variable".
   //
   // This code assumes implicitly that it is ok if the byte stream has been advanced
   // further (by one character decoding) than it would have to produce one character only.
   //
   // So long as we're never asking this stream of characters for a byte position we're ok.
   //
   //
   // Note: we're not sharing this object across threads. We're assuming one instance per, for
   // any sort of parallel execution; hence, we don't do lock.synchronized.
   //
   private def read0: Int = {
     //lock.synchronized // in java this was a synchronized method
     {
       // Return the leftover char, if there is one
       if (haveLeftoverChar) {
         haveLeftoverChar = false
         return leftoverChar
       }

       // Convert more bytes
       val cb: Array[Char] = new Array[Char](2)
       val n: Int = read(cb, 0, 2)
       n match {
         case -1 => return -1
         case 2 => {
           leftoverChar = cb(1)
           haveLeftoverChar = true
           return cb(0)
         }
         case 1 => return cb(0)
         case _ => {
           // TODO: assert false : n
           return -1
         }
       }
     }
   }

   // All lengths and offsets below are in character units.
   //
   def read(cbuf: Array[Char], offset: Int, length: Int): Int = {
     var off: Int = offset
     var len: Int = length

     // lock.synchronized // in java this was a synchronized method
     {
       ensureOpen
       if ((off < 0) || (off > cbuf.length) || (len < 0)
         || ((off + len) > cbuf.length) || ((off + len) < 0)) {
         throw new IndexOutOfBoundsException()
       }

       if (len == 0) { return 0 }
       var n: Int = 0

       if (haveLeftoverChar) {
         // Copy the leftover char into the buffer
         cbuf(off) = leftoverChar
         off += 1
         len -= 1
         haveLeftoverChar = false
         n = 1
         if ((len == 0) || !implReady) {
           // Return now if this is all we can produce w/o blocking
           DFDLCharCounter.incr(n)
           return n
         }
       }

       // If length is 1, then we recursively end up back here with length 2,
       // and that's why it's not a stack overflow.
       if (len == 1) {
         // Treat single-character array reads just like read()
         val c: Int = read0
         if (c == -1) {
           return if (n == 0) -1 else {
             DFDLCharCounter.incr(n)
             n
           }
         }
         cbuf(off) = c.asInstanceOf[Char]
         return {
           DFDLCharCounter.incr(n + 1)
           n + 1
         }
       }
       val res = n + implRead(cbuf, off, off + len)
       if (res > 0) DFDLCharCounter.incr(res)
       return res
     }

   }

   override def ready: Boolean = {
     //    lock.synchronized {
     ensureOpen
     return haveLeftoverChar || implReady
     //    }
   }

   def close: Unit = {
     //    lock.synchronized {
     if (!isOpen) { return }
     implClose
     isOpen = false
     //    }
   }

   private def readBytes: Int = {
     bb.compact()
     try {
       //      if (ch != null) {
       // Read from the channel
       val n: Int = ch.read(bb)
       if (n < 0) return n
       //      } else {
       //        // Read from the input stream, and hten update the buffer
       //        val lim: Int = bb.limit()
       //        val pos: Int = bb.position()
       //        assert(pos <= lim)
       //        val rem: Int = if (pos <= lim) lim - pos else 0
       //        assert(rem > 0)
       //        val n: Int = in.read(bb.array, bb.arrayOffset() + pos, rem)
       //        if (n < 0) return n
       //        if (n == 0) throw new IOException("Underlying input stream returned zero bytes")
       //        //assert(n <= rem) : "n = " + n + ", rem = " + rem
       //        assert(n <= rem)
       //        bb.position(pos + n)
       //      }
     } finally {
       // Flip even when an IOException is thrown,
       // otherwise the stream will stutter
       bb.flip()
     }
     val rem: Int = bb.remaining()
     // assert(rem != 0) : rem
     assert(rem != 0)
     return rem
   }

   /**
    * Contains change that allows us to treat malformed data as
    * end of data.  However, this requires that the decoder is set
    * to REPORT the issue rather than IGNORE or REPLACE it.
    */
   def implRead(cbuf: Array[Char], off: Int, end: Int): Int = {
     // In order to handle surrogate pairs, this method requires that
     // the invoker attempt to read at least two characters.  Saving the
     // extra character, if any, at a higher level is easier
     // to deal with it here.
     assert(end - off > 1)
     var cb: CharBuffer = CharBuffer.wrap(cbuf, off, end - off)
     if (cb.position != 0) {
       // Ensure that cb[0] == cbuf[off]
       cb = cb.slice
     }

     var eof: Boolean = false
     var continue: Boolean = false

     breakable {
       while (true) {
         val cr: CoderResult = decoder.decode(bb, cb, eof) // decode bb to cb
         if (cr.isUnderflow) { // no more data, or not enough data to complete a character.
           if (eof) { break } // eof flag set last time around this loop, so if we get an underflow it just reinforces our eof.
           if (!cb.hasRemaining()) { break } // no more room in cb, so break out.
           if ((cb.position() > 0) && !inReady) { break } // we've got at least one character, and we're not ready to read anymore. So break out.
           val n: Int = readBytes // try to add more bytes to the bb
           if (n < 0) { // got 0 more bytes
             eof = true // so we're at EOF.
             if ((cb.position() == 0) && (!bb.hasRemaining())) { break } // no characters and no room to get more data into bb, then breakout.
             decoder.reset() // TODO ??? why reset. Can we actually discard any decoder state (such as mid-character)?
           }
           continue = true // No continue exists in Scala
         }

         if (!continue) { // No continue exists in Scala, fabricated one
           if (cr.isOverflow) {
             assert(cb.position() > 0)
             break
           }
           Assert.invariant(cr.isMalformed)
           // DFDL Implementors:
           // The whole reason we reimplemented this code in scala is
           // to change this behavior here.
           //
           // DFDL needs decode errors to behave as if end of data was reached.
           // So instead of throw, just set eof true.
           // cr.throwException
           eof = true
           //
           // Setting eof to true isn't enough. Because we may have successfully
           // parsed some characters already, so we'll be returning a count of
           // that many.
           //
           // But since we're at EOF we need to return -1 so the calling context
           // (The addMore member of Page[T] in PagedSeq.scala)
           // knows we are done and no more can be provided.
           //
           // Doc for the CharsetDecoder class says
           //     "The malformed bytes begin at the input buffer's (possibly incremented)
           //      position"
           //
           // So the byte buffer position is left at the end of the correctly decoded
           // characters.
           //

           //
           // This assert doesn't hold because bb.position() can be zero, but cr.length can be 1 or more. When the very first thing
           // are malformed, then bb does not get advanced.
           //
           // Similarly, suppose bb.position() was advanced 2 bytes. But then a 4-character
           // malformed is encountered. So cr.length() is 4. This invariant also will
           // not hold.
           //          Assert.invariant(bb.position() >= cr.length())
           //          bb.position(bb.position() - cr.length())
           break
         }
         continue = false
       } // end while loop
     }

     if (eof) {
       // ## Need to flush decoder
       decoder.reset()
     }

     if (cb.position() == 0) {
       if (eof) return -1 // note: we have to return -1, having decoded zero characters successfully.
       assert(false)
     }

     return cb.position()
   }

   def encodingName: String = {
     return if (cs.isInstanceOf[HistoricallyNamedCharset]) cs.asInstanceOf[HistoricallyNamedCharset].historicalName else cs.name()
   }

   private def inReady: Boolean = {
     try {
       return (((in != null) && (in.available() > 0)) || (ch.isInstanceOf[FileChannel])) // ## RBC.available()?
     } catch {
       case e: IOException => return false
     }
   }

   def implReady: Boolean = { return bb.hasRemaining() || inReady }

   def implClose: Unit = {
     if (ch != null) ch.close()
     else in.close()
   }

 }
	package edu.illinois.ncsa.daffodil.processors

	/* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
	*
	* Developed by: Tresys Technology, LLC
	* http://www.tresys.com
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy of
	* this software and associated documentation files (the "Software"), to deal with
	* the Software without restriction, including without limitation the rights to
	* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	* of the Software, and to permit persons to whom the Software is furnished to do
	* so, subject to the following conditions:
	*
	* 1. Redistributions of source code must retain the above copyright notice,
	* this list of conditions and the following disclaimers.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimers in the
	* documentation and/or other materials provided with the distribution.
	*
	* 3. Neither the names of Tresys Technology, nor the names of its contributors
	* may be used to endorse or promote products derived from this Software
	* without specific prior written permission.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
	* SOFTWARE.
	*/

	import java.nio.CharBuffer
	import java.io.FileInputStream
	import java.nio.charset.Charset
	import sun.nio.cs.HistoricallyNamedCharset
	import java.nio.channels.ReadableByteChannel
	import java.nio.channels.Channels
	import java.io.IOException
	import java.nio.channels.FileChannel
	import java.nio.charset.CharsetDecoder
	import edu.illinois.ncsa.daffodil.exceptions.Assert
	import java.nio.charset.CodingErrorAction
	import java.nio.ByteBuffer
	import java.io.InputStream
	import scala.util.control.Breaks._
	import java.nio.charset.CoderResult
	import edu.illinois.ncsa.daffodil.processors.charset.USASCII7BitPackedCharset
	import edu.illinois.ncsa.daffodil.processors.charset.CharsetUtils
	import edu.illinois.ncsa.daffodil.processors.charset.SupportsInitialBitOffset
	import edu.illinois.ncsa.daffodil.compiler.Compiler
	import java.nio.charset.MalformedInputException
	import edu.illinois.ncsa.daffodil.processors.charset.CharacterSetAlignmentError

	/**
	* The purpose of re-implementing this class is to gain control over
	* how the StreamDecoder handles malformed input. In DFDL we want the
	* malformed input error to be treated as the end of data. Java's
	* StreamDecoder only ignores, replaces or treats it as an error.
	*
	* Mostly this class tries to remain true to the Java code from which it was derived
	* so as to preserve future inter-operation potential. However, some modifications
	* (for bit-level positioning) make that a lot less likely.
	*/
	object DFDLJavaIOStreamDecoder {

	private val DEFAULT_BYTE_BUFFER_SIZE: Int = Compiler.readerByteBufferSize.toInt

	def forInputStreamReader(in: InputStream, charset: Charset, bitOffset0to7: Int, bitLimit: Long): DFDLJavaIOStreamDecoder = {

	val myBB = ByteBuffer.allocateDirect(DFDLJavaIOStreamDecoder.DEFAULT_BYTE_BUFFER_SIZE)
	myBB.flip()

	val chan = in match {
	case fis: FileInputStream => fis.getChannel()
	case _ => Channels.newChannel(in)
	}

	new DFDLJavaIOStreamDecoder(bitOffset0to7, bitLimit, charset, myBB, in, chan)
	}

	}

	/**
	* The purpose of re-implementing this class is to gain control over
	* how the StreamDecoder handles malformed input. In DFDL we want the
	* malformed input error to be treated as the end of data. Java's
	* StreamDecoder only ignores, replaces or treats it as an error.
	*
	* Forces the decoder to REPORT on malformed input.
	*
	* We also have to implment the upper-bound aka bitLimit, and not
	* allow consumption of data past it.
	*/
	class DFDLJavaIOStreamDecoder private (bitOffsetWithinAByte: Int, val bitLimit: Long, var cs: Charset, var bb: ByteBuffer,
	var in: InputStream, var ch: ReadableByteChannel)
	extends java.io.Reader {

	Assert.usage(ch != null)
	Assert.usage(bitOffsetWithinAByte >= 0)
	Assert.usage(bitOffsetWithinAByte <= 7)

	val decoder = cs.newDecoder().onMalformedInput(CodingErrorAction.REPORT)

	//
	// Now we deal with bit positioning. If the decoder is capable of dealing with a starting bit offset
	// then we configure it that way. If not we insist we are byte aligned and fail if not.
	//
	decoder match {
	case decoderWithBits: SupportsInitialBitOffset =>
	decoderWithBits.setInitialBitOffset(bitOffsetWithinAByte)
	case _ if (bitOffsetWithinAByte == 0) => // ok. Do nothing. We are luckily, aligned to a byte.
	// We're counting on the parser surrounding us to catch this error and turn it into
	// a ParseError.
	case _ => throw new CharacterSetAlignmentError(cs.name, 8, bitOffsetWithinAByte)
	}

	@volatile
	private var isOpen: Boolean = true
	def ensureOpen = { if (!isOpen) throw new java.io.IOException("Stream closed") }

	private var haveLeftoverChar: Boolean = false
	private var leftoverChar: Char = 0

	def getEncoding: String = {
	if (isOpen) { return encodingName }
	return null
	}

	override def read: Int = {
	return read0
	}

	// Leftover char is needed because of utf-16 and the surrogate pair stuff.
	// In that encoding, we need to read two codepoints, and if they are a surrogate pair
	// synthesize one character from them.
	//
	// Presumably this happens in the decoder, but in the case where it isn't a surrogate
	// pair, we save work by returning two decoded characters, not one since we've already
	// decoded the second one. (This little optimization seems very not worth it to me, but
	// is necessary if you can't peek forward 2 bytes into the underlying bytes without
	// consuming them.)
	//
	// Issue is that UTF-16 is really a variable-width 2-byte or 4-byte character encoding. But
	// In the early days of unicode and java, it was treated as a fixed width 2-byte.
	//
	// Hence, in DFDL we provide control.
	// This depends on dfdl:utf16Width="variable". If dfdl:utf16Width="fixed", then
	// we do NOT process surrogate pairs, and return them each as a separate codepoint.
	//
	// So at some point our decoder must implement utf16Width="fixed" which means it must
	// not do this surrogate pair processing. That means either we find a feature for that
	// in CharsetDecoder (it may be there as this is a common need), or we'll have to clone
	// and reimplement that class too.
	//
	// TBD: implement utf16Width="variable". For now we can SDE on utf16Width="variable".
	//
	// This code assumes implicitly that it is ok if the byte stream has been advanced
	// further (by one character decoding) than it would have to produce one character only.
	//
	// So long as we're never asking this stream of characters for a byte position we're ok.
	//
	//
	// Note: we're not sharing this object across threads. We're assuming one instance per, for
	// any sort of parallel execution; hence, we don't do lock.synchronized.
	//
	private def read0: Int = {
	//lock.synchronized // in java this was a synchronized method
	{
	// Return the leftover char, if there is one
	if (haveLeftoverChar) {
	haveLeftoverChar = false
	return leftoverChar
	}

	// Convert more bytes
	val cb: Array[Char] = new Array[Char](2)
	val n: Int = read(cb, 0, 2)
	n match {
	case -1 => return -1
	case 2 => {
	leftoverChar = cb(1)
	haveLeftoverChar = true
	return cb(0)
	}
	case 1 => return cb(0)
	case _ => {
	// TODO: assert false : n
	return -1
	}
	}
	}
	}

	// All lengths and offsets below are in character units.
	//
	def read(cbuf: Array[Char], offset: Int, length: Int): Int = {
	var off: Int = offset
	var len: Int = length

	// lock.synchronized // in java this was a synchronized method
	{
	ensureOpen
	if ((off < 0) \|\| (off > cbuf.length) \|\| (len < 0)
	\|\| ((off + len) > cbuf.length) \|\| ((off + len) < 0)) {
	throw new IndexOutOfBoundsException()
	}

	if (len == 0) { return 0 }
	var n: Int = 0

	if (haveLeftoverChar) {
	// Copy the leftover char into the buffer
	cbuf(off) = leftoverChar
	off += 1
	len -= 1
	haveLeftoverChar = false
	n = 1
	if ((len == 0) \|\| !implReady) {
	// Return now if this is all we can produce w/o blocking
	DFDLCharCounter.incr(n)
	return n
	}
	}

	// If length is 1, then we recursively end up back here with length 2,
	// and that's why it's not a stack overflow.
	if (len == 1) {
	// Treat single-character array reads just like read()
	val c: Int = read0
	if (c == -1) {
	return if (n == 0) -1 else {
	DFDLCharCounter.incr(n)
	n
	}
	}
	cbuf(off) = c.asInstanceOf[Char]
	return {
	DFDLCharCounter.incr(n + 1)
	n + 1
	}
	}
	val res = n + implRead(cbuf, off, off + len)
	if (res > 0) DFDLCharCounter.incr(res)
	return res
	}

	}

	override def ready: Boolean = {
	// lock.synchronized {
	ensureOpen
	return haveLeftoverChar \|\| implReady
	// }
	}

	def close: Unit = {
	// lock.synchronized {
	if (!isOpen) { return }
	implClose
	isOpen = false
	// }
	}

	private def readBytes: Int = {
	bb.compact()
	try {
	// if (ch != null) {
	// Read from the channel
	val n: Int = ch.read(bb)
	if (n < 0) return n
	// } else {
	// // Read from the input stream, and hten update the buffer
	// val lim: Int = bb.limit()
	// val pos: Int = bb.position()
	// assert(pos <= lim)
	// val rem: Int = if (pos <= lim) lim - pos else 0
	// assert(rem > 0)
	// val n: Int = in.read(bb.array, bb.arrayOffset() + pos, rem)
	// if (n < 0) return n
	// if (n == 0) throw new IOException("Underlying input stream returned zero bytes")
	// //assert(n <= rem) : "n = " + n + ", rem = " + rem
	// assert(n <= rem)
	// bb.position(pos + n)
	// }
	} finally {
	// Flip even when an IOException is thrown,
	// otherwise the stream will stutter
	bb.flip()
	}
	val rem: Int = bb.remaining()
	// assert(rem != 0) : rem
	assert(rem != 0)
	return rem
	}

	/**
	* Contains change that allows us to treat malformed data as
	* end of data. However, this requires that the decoder is set
	* to REPORT the issue rather than IGNORE or REPLACE it.
	*/
	def implRead(cbuf: Array[Char], off: Int, end: Int): Int = {
	// In order to handle surrogate pairs, this method requires that
	// the invoker attempt to read at least two characters. Saving the
	// extra character, if any, at a higher level is easier
	// to deal with it here.
	assert(end - off > 1)
	var cb: CharBuffer = CharBuffer.wrap(cbuf, off, end - off)
	if (cb.position != 0) {
	// Ensure that cb[0] == cbuf[off]
	cb = cb.slice
	}

	var eof: Boolean = false
	var continue: Boolean = false

	breakable {
	while (true) {
	val cr: CoderResult = decoder.decode(bb, cb, eof) // decode bb to cb
	if (cr.isUnderflow) { // no more data, or not enough data to complete a character.
	if (eof) { break } // eof flag set last time around this loop, so if we get an underflow it just reinforces our eof.
	if (!cb.hasRemaining()) { break } // no more room in cb, so break out.
	if ((cb.position() > 0) && !inReady) { break } // we've got at least one character, and we're not ready to read anymore. So break out.
	val n: Int = readBytes // try to add more bytes to the bb
	if (n < 0) { // got 0 more bytes
	eof = true // so we're at EOF.
	if ((cb.position() == 0) && (!bb.hasRemaining())) { break } // no characters and no room to get more data into bb, then breakout.
	decoder.reset() // TODO ??? why reset. Can we actually discard any decoder state (such as mid-character)?
	}
	continue = true // No continue exists in Scala
	}

	if (!continue) { // No continue exists in Scala, fabricated one
	if (cr.isOverflow) {
	assert(cb.position() > 0)
	break
	}
	Assert.invariant(cr.isMalformed)
	// DFDL Implementors:
	// The whole reason we reimplemented this code in scala is
	// to change this behavior here.
	//
	// DFDL needs decode errors to behave as if end of data was reached.
	// So instead of throw, just set eof true.
	// cr.throwException
	eof = true
	//
	// Setting eof to true isn't enough. Because we may have successfully
	// parsed some characters already, so we'll be returning a count of
	// that many.
	//
	// But since we're at EOF we need to return -1 so the calling context
	// (The addMore member of Page[T] in PagedSeq.scala)
	// knows we are done and no more can be provided.
	//
	// Doc for the CharsetDecoder class says
	// "The malformed bytes begin at the input buffer's (possibly incremented)
	// position"
	//
	// So the byte buffer position is left at the end of the correctly decoded
	// characters.
	//

	//
	// This assert doesn't hold because bb.position() can be zero, but cr.length can be 1 or more. When the very first thing
	// are malformed, then bb does not get advanced.
	//
	// Similarly, suppose bb.position() was advanced 2 bytes. But then a 4-character
	// malformed is encountered. So cr.length() is 4. This invariant also will
	// not hold.
	// Assert.invariant(bb.position() >= cr.length())
	// bb.position(bb.position() - cr.length())
	break
	}
	continue = false
	} // end while loop
	}

	if (eof) {
	// ## Need to flush decoder
	decoder.reset()
	}

	if (cb.position() == 0) {
	if (eof) return -1 // note: we have to return -1, having decoded zero characters successfully.
	assert(false)
	}

	return cb.position()
	}

	def encodingName: String = {
	return if (cs.isInstanceOf[HistoricallyNamedCharset]) cs.asInstanceOf[HistoricallyNamedCharset].historicalName else cs.name()
	}

	private def inReady: Boolean = {
	try {
	return (((in != null) && (in.available() > 0)) \|\| (ch.isInstanceOf[FileChannel])) // ## RBC.available()?
	} catch {
	case e: IOException => return false
	}
	}

	def implReady: Boolean = { return bb.hasRemaining() \|\| inReady }

	def implClose: Unit = {
	if (ch != null) ch.close()
	else in.close()
	}

	}