blob: b88edfa2a3d45877f97576a73bd52561bb3b6e04 [file] [log] [blame]
/* Copyright (c) 2012-2015 Tresys Technology, LLC. All rights reserved.
*
* Developed by: Tresys Technology, LLC
* http://www.tresys.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal with
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of Tresys Technology, nor the names of its contributors
* may be used to endorse or promote products derived from this Software
* without specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
* SOFTWARE.
*/
package edu.illinois.ncsa.daffodil.processors.charset
import edu.illinois.ncsa.daffodil.exceptions.Assert
import edu.illinois.ncsa.daffodil.util.Maybe
import edu.illinois.ncsa.daffodil.util.Maybe._
import java.nio.charset.Charset
import java.nio.charset.IllegalCharsetNameException
import java.io.UnsupportedEncodingException
import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.charset.CodingErrorAction
class DFDLCharset(val charsetName: String) extends Serializable {
charset // Force charset to be evaluted to ensure it's valid at compile time. It's a lazy val so it will be evaluated when de-serialized
@transient lazy val charset = CharsetUtils.getCharset(charsetName)
}
object CharsetUtils {
def getCharset(charsetName: String): Charset = {
// We should throw if csn is null. Tolerating this would just lead to bugs.
Assert.usage(charsetName != null)
Assert.usage(charsetName != "")
// There is no notion of a default charset in DFDL.
// So this can be val.
val csn: String = charsetName
val cs = try {
val cs =
if (csn.toUpperCase() == "US-ASCII-7-BIT-PACKED" || // deprecated name
csn.toUpperCase() == "X-DFDL-US-ASCII-7-BIT-PACKED") // new official name
USASCII7BitPackedCharset
else Charset.forName(csn)
One(cs)
} catch {
case e: IllegalCharsetNameException => Nope
}
if (cs.isEmpty) throw new UnsupportedEncodingException(csn)
cs.value
}
/**
* Subtle bug in decoders in Java 7 when there is room for only 1
* character in the CharBuffer.
*
* While we could just test for Java 8, which doesn't have this bug,
* it is worthwhile to keep this in case we end up trying to support
* Java 7 at some point in the future.
*/
lazy val hasJava7DecoderBug = {
val decoder = Charset.forName("utf-8").newDecoder()
decoder.onMalformedInput(CodingErrorAction.REPORT)
decoder.onUnmappableCharacter(CodingErrorAction.REPORT)
val bb = ByteBuffer.allocate(6)
bb.put(-16.toByte) // invalid first utf-8 byte
bb.limit(6).position(0)
val cb = CharBuffer.allocate(1)
val cr = decoder.decode(bb, cb, true)
if (cr.isOverflow && // This is the bug!
cb.position == 0 &&
bb.position == 0) true
else if (cr.isError) false // no bug
// else if (cr.isOverflow && // This is what *should* happen if CodingErrorAction.REPLACE is used.
// cb.position == 1 &&
// bb.position == 1 &&
// cb.get(0) == this.unicodeReplacementChar) false
else
Assert.invariantFailed("Unexpected decoder behavior. " + cr)
}
val unicodeReplacementChar = '\uFFFD'
}
class CharacterSetAlignmentError(csName: String, requiredAlignmentInBits: Int, alignmentInBitsWas: Int)
extends Exception("Character set %s requires %s alignment (bits), but alignment was %s (bits)".
format(csName, requiredAlignmentInBits, alignmentInBitsWas))