blob: 7364458618da8c1226c8c2f9eda0583211a712f7 [file] [log] [blame]
/* Copyright (c) 2012-2015 Tresys Technology, LLC. All rights reserved.
*
* Developed by: Tresys Technology, LLC
* http://www.tresys.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal with
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of Tresys Technology, nor the names of its contributors
* may be used to endorse or promote products derived from this Software
* without specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
* SOFTWARE.
*/
package edu.illinois.ncsa.daffodil.processors
import java.nio.charset.CharsetDecoder
import java.nio.charset.CharsetEncoder
import edu.illinois.ncsa.daffodil.dsom._
import edu.illinois.ncsa.daffodil.exceptions.Assert
import edu.illinois.ncsa.daffodil.exceptions.SchemaFileLocation
import edu.illinois.ncsa.daffodil.exceptions.ThrowsSDE
import edu.illinois.ncsa.daffodil.processors.charset.DFDLCharset
import edu.illinois.ncsa.daffodil.schema.annotation.props.gen._
import edu.illinois.ncsa.daffodil.schema.annotation.props.gen.EncodingErrorPolicy
import edu.illinois.ncsa.daffodil.schema.annotation.props.gen.UTF16Width
import edu.illinois.ncsa.daffodil.util.PreSerialization
import edu.illinois.ncsa.daffodil.util.TransientParam
import java.nio.charset.StandardCharsets
/**
* To eliminate circularities between RuntimeData objects and the
* encoding compiled expression, all information derived from encodings
* must come from one of these objects.
*
* That way we can construct this separately, the compilation of the
* compiled expression for encoding can happily insist on a runtimeData object
* existing to provide the information it generally needs.
*/
/**
* Definitions that are the same whether we're in the schema compiler
* or runtime are on this trait.
*/
trait KnownEncodingMixin { self: ThrowsSDE =>
def isKnownEncoding: Boolean
def charsetEv: CharsetEv
def knownEncodingAlignmentInBits: Int
def optionUTF16Width: Option[UTF16Width]
/**
* Note that the canonical form for encoding names is all upper case.
*/
final lazy val knownEncodingName = {
Assert.invariant(isKnownEncoding)
val res = charsetEv.optConstant.get.charsetName
res
}
final lazy val knownEncodingCharset = {
new DFDLCharset(knownEncodingName)
}
/**
* enables optimizations and random-access
*
* variable-width character sets require scanning to determine
* their end.
*/
final lazy val knownEncodingIsFixedWidth = {
if (!isKnownEncoding)
false
else {
val maybeFixedWidth = knownEncodingCharset.maybeFixedWidth
maybeFixedWidth.isDefined
}
}
final lazy val knownEncodingWidthInBits = encodingMinimumCodePointWidthInBits(knownEncodingCharset)
final def encodingMinimumCodePointWidthInBits(cs: DFDLCharset) = {
val res = cs.charset match {
case StandardCharsets.UTF_8 => 8
case _ => cs.maybeFixedWidth.get
}
res
}
final lazy val knownEncodingIsUnicode = {
if (!isKnownEncoding) { false }
else { knownEncodingName.toUpperCase.startsWith("UTF") }
}
final lazy val mustBeAnEncodingWith8BitAlignment = {
!isKnownEncoding || knownEncodingAlignmentInBits == 8
}
final lazy val couldBeVariableWidthEncoding = !knownEncodingIsFixedWidth
final def knownFixedWidthEncodingInCharsToBits(nChars: Long): Long = {
Assert.usage(isKnownEncoding)
Assert.usage(knownEncodingIsFixedWidth)
val nBits = knownEncodingWidthInBits * nChars
nBits
}
}
/**
* This is the object we serialize.
*
* At compile time we will create an encodingInfo
* for ourselves supplying as context a schema component.
*
* At runtime we will create an encodingInfo supplying as context
* a TermRuntimeData object.
*/
final class EncodingRuntimeData(
@TransientParam termRuntimeDataArg: => TermRuntimeData,
@TransientParam charsetEvArg: => CharsetEv,
override val schemaFileLocation: SchemaFileLocation,
val optionUTF16Width: Option[UTF16Width],
val defaultEncodingErrorPolicy: EncodingErrorPolicy,
val summaryEncoding: EncodingLattice,
val isKnownEncoding: Boolean,
val isScannable: Boolean,
override val knownEncodingAlignmentInBits: Int)
extends KnownEncodingMixin with ImplementsThrowsSDE with PreSerialization {
lazy val termRuntimeData = termRuntimeDataArg
lazy val charsetEv = charsetEvArg
lazy val runtimeDependencies = List(charsetEv)
def getDecoder(state: ParseOrUnparseState): CharsetDecoder = {
val cs = charsetEv.evaluate(state)
val dec = state.getDecoder(cs.charset)
dec
}
def getEncoder(state: ParseOrUnparseState): CharsetEncoder = {
val cs = charsetEv.evaluate(state)
val enc = state.getEncoder(cs.charset)
enc
}
def getEncoder(state: ParseOrUnparseState, dcs: DFDLCharset): CharsetEncoder = {
val enc = state.getEncoder(dcs.charset)
enc
}
def getDFDLCharset(state: ParseOrUnparseState): DFDLCharset = {
val cs = charsetEv.evaluate(state)
cs
}
override def preSerialization: Any = {
super.preSerialization
termRuntimeData
charsetEv
}
@throws(classOf[java.io.IOException])
private def writeObject(out: java.io.ObjectOutputStream): Unit = serializeObject(out)
/**
* no alignment properties that would explicitly create
* a need to align in a way that is not on a suitable boundary
* for a character.
*/
lazy val hasTextAlignment = {
this.knownEncodingAlignmentInBits == termRuntimeData.alignmentValueInBits
}
}