| /* Copyright (c) 2016 Tresys Technology, LLC. All rights reserved. |
| * |
| * Developed by: Tresys Technology, LLC |
| * http://www.tresys.com |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| * this software and associated documentation files (the "Software"), to deal with |
| * the Software without restriction, including without limitation the rights to |
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is furnished to do |
| * so, subject to the following conditions: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimers. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimers in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * 3. Neither the names of Tresys Technology, nor the names of its contributors |
| * may be used to endorse or promote products derived from this Software |
| * without specific prior written permission. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE |
| * SOFTWARE. |
| */ |
| |
| package edu.illinois.ncsa.daffodil.dsom |
| |
| import edu.illinois.ncsa.daffodil.processors.EncodingRuntimeData |
| import edu.illinois.ncsa.daffodil.schema.annotation.props.gen.Representation |
| import edu.illinois.ncsa.daffodil.schema.annotation.props.gen.EncodingErrorPolicy |
| import edu.illinois.ncsa.daffodil.processors.KnownEncodingMixin |
| import edu.illinois.ncsa.daffodil.io.NonByteSizeCharset |
| |
| trait TermEncodingMixin extends KnownEncodingMixin { self: Term => |
| |
| requiredEvaluations(encodingInfo.preSerialization) |
| |
| protected final lazy val defaultEncodingErrorPolicy = { |
| val policy = |
| if (self.tunable.requireEncodingErrorPolicyProperty) { |
| encodingErrorPolicy |
| } else { |
| optionEncodingErrorPolicy.getOrElse(EncodingErrorPolicy.Replace) |
| } |
| if (policy == EncodingErrorPolicy.Error) { |
| // DFDL-935 to enable |
| notYetImplemented("dfdl:encodingErrorPolicy=\"error\"") |
| } |
| policy |
| } |
| /** |
| * Character encoding common attributes |
| * |
| * Note that since encoding can be computed at runtime, we |
| * create values to tell us if the encoding is known or not |
| * so that we can decide things at compile time when possible. |
| */ |
| |
| final lazy val isKnownEncoding = { |
| val isKnown = this.encodingEv.isConstant |
| if (isKnown) { |
| val encName = encodingEv.optConstant.get.toUpperCase() |
| if (encName.startsWith("UTF-16")) { |
| utf16Width // demand this so checking is done |
| true |
| } |
| } |
| isKnown |
| } |
| |
| /** |
| * When the encoding is known, this tells us the mandatory |
| * alignment required. This is always 1 or 8. |
| */ |
| override final lazy val knownEncodingAlignmentInBits = { |
| if (isKnownEncoding) { |
| schemaDefinitionWarningWhen(knownEncodingName == "US-ASCII-7-BIT-PACKED", |
| "Character set encoding name US-ASCII-7-BIT-PACKED is deprecated." + |
| "Please update your DFDL schema to use the name X-DFDL-US-ASCII-7-BIT-PACKED.") |
| val cs = charsetEv.optConstant.get |
| cs.charset match { |
| case nbs: NonByteSizeCharset => 1 |
| case _ => 8 |
| } |
| } else 8 // unknown encodings always assumed to be 8-bit aligned. |
| } |
| |
| lazy val encodingInfo = |
| new EncodingRuntimeData(termRuntimeData, charsetEv, schemaFileLocation, optionUTF16Width, defaultEncodingErrorPolicy, |
| summaryEncoding, isKnownEncoding, isScannable, knownEncodingAlignmentInBits) |
| |
| /** |
| * True if this element itself consists only of text. No binary stuff like alignment |
| * or skips. |
| * <p> |
| * Not recursive into contained children. |
| */ |
| final lazy val isLocallyTextOnly: Boolean = { |
| val res = this match { |
| case eb: ElementBase => { |
| eb.hasNoSkipRegions && |
| hasTextAlignment && |
| ((eb.isSimpleType && eb.impliedRepresentation == Representation.Text) || |
| eb.isComplexType) |
| } |
| case mg: ModelGroup => { |
| mg.hasNoSkipRegions && |
| hasTextAlignment |
| } |
| case gr: GroupRef => { |
| gr.group.isLocallyTextOnly |
| } |
| } |
| res |
| } |
| /** |
| * True if it is sensible to scan this data e.g., with a regular expression. |
| * Requires that all children have same encoding as enclosing groups and |
| * elements, requires that there is no leading or trailing alignment regions, |
| * skips. We have to be able to determine that we are for sure going to |
| * always be properly aligned for text. |
| * <p> |
| * Caveat: we only care that the encoding is the same if the term |
| * actually could have text (couldHaveText is an LV) as part of its |
| * representation. For example, a sequence |
| * with no initiator, terminator, nor separators can have any encoding at all, |
| * without disqualifying an element containing it from being scannable. There |
| * has to be text that would be part of the scan. |
| * <p> |
| * If the root element isScannable, and encodingErrorPolicy is 'replace', |
| * then we can use a lower-overhead I/O layer - basically we can use a java.io.InputStreamReader |
| * directly. |
| * <p> |
| * We are going to depend on the fact that if the encoding is going to be this |
| * X-DFDL-US-ASCII-7-BIT-PACKED thingy (7-bits wide code units, so aligned at 1 bit) that |
| * this encoding must be specified statically in the schema. |
| * <p> |
| * If an encoding is determined at runtime, then we will |
| * insist on it being 8-bit aligned code units. |
| */ |
| |
| final lazy val isScannable: Boolean = { |
| if (!isRepresented) true |
| else { |
| val res = summaryEncoding match { |
| case Mixed => false |
| case Binary => false |
| case NoText => false |
| case Runtime => false |
| case _ => true |
| } |
| res |
| } |
| } |
| |
| /** |
| * If s1 and s2 are the same encoding name |
| * then s1, else "mixed". Also "notext" combines |
| * with anything. |
| */ |
| private def combinedEncoding( |
| s1: EncodingLattice, |
| s2: EncodingLattice): EncodingLattice = { |
| (s1, s2) match { |
| case (x, y) if (x == y) => x |
| case (Mixed, _) => Mixed |
| case (_, Mixed) => Mixed |
| case (Binary, Binary) => Binary |
| case (Binary, _) => Mixed |
| case (_, Binary) => Mixed |
| case (NoText, x) => x |
| case (x, NoText) => x |
| case (x, y) => Mixed |
| } |
| } |
| |
| /** |
| * Roll up from the bottom. This is abstract interpretation. |
| * The top (aka conflicting encodings) is "mixed" |
| * The bottom is "noText" (combines with anything) |
| * The values are encoding names, or "runtime" for expressions. |
| * <p> |
| * By doing expression analysis we could do a better job |
| * here and determine when things that use expressions |
| * to get the encoding are all going to get the same |
| * expression value. For now, if it is an expression |
| * then we lose. |
| */ |
| final lazy val summaryEncoding: EncodingLattice = { |
| val myEnc = if (!isRepresented) NoText |
| else if (!isLocallyTextOnly) Binary |
| else if (!couldHaveText) NoText |
| else if (!isKnownEncoding) Runtime |
| else NamedEncoding(this.knownEncodingName) |
| val childEncs: Seq[EncodingLattice] = termChildren.map { x => x.summaryEncoding } |
| val res = childEncs.fold(myEnc) { (x, y) => combinedEncoding(x, y) } |
| res |
| } |
| |
| /** |
| * Returns true if this term either cannot conflict because it has no textual |
| * aspects, or if it couldHaveText then the encoding must be same. |
| */ |
| // private def hasCompatibleEncoding(t2: Term): Boolean = { |
| // Assert.usage(isKnownEncoding) |
| // Assert.usage(t2.isKnownEncoding) |
| // if (!couldHaveText) true |
| // else if (!t2.couldHaveText) true |
| // else this.knownEncodingCharset == t2.knownEncodingCharset |
| // } |
| |
| /** |
| * no alignment properties that would explicitly create |
| * a need to align in a way that is not on a suitable boundary |
| * for a character. |
| */ |
| final lazy val hasTextAlignment = { |
| val av = alignmentValueInBits |
| val kav = this.knownEncodingAlignmentInBits |
| av % kav == 0 |
| } |
| |
| } |