blob: d7bd2685fb5120b7ae784c58716a8b9f17a0875d [file] [log] [blame]
package edu.illinois.ncsa.daffodil.processors
/* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
*
* Developed by: Tresys Technology, LLC
* http://www.tresys.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal with
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of Tresys Technology, nor the names of its contributors
* may be used to endorse or promote products derived from this Software
* without specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
* SOFTWARE.
*/
import scala.util.parsing.combinator.RegexParsers
import edu.illinois.ncsa.daffodil.util.Logging
import scala.util.parsing.input.Reader
import edu.illinois.ncsa.daffodil.util.Logging
import edu.illinois.ncsa.daffodil.util.LogLevel
import edu.illinois.ncsa.daffodil.util._
import scala.collection.mutable.Queue
import java.util.regex.Pattern
import edu.illinois.ncsa.daffodil.exceptions.Assert
import edu.illinois.ncsa.daffodil.processors.DelimiterType._
import edu.illinois.ncsa.daffodil.processors.DelimiterLocation._
import edu.illinois.ncsa.daffodil.exceptions.Assert
import scala.Array.canBuildFrom
import scala.language.reflectiveCalls
import scala.language.reflectiveCalls
object TextJustificationType extends Enum {
sealed abstract trait Type extends EnumValueType
case object None extends Type
case object Left extends Type
case object Right extends Type
case object Center extends Type
}
sealed abstract class DelimParseResult(nextArg: Reader[Char]) {
def isSuccess: Boolean
def next = nextReader.asInstanceOf[DFDLCharReader]
def nextReader = nextArg
}
case class DelimParseSuccess(val delimiter: String,
val delimiterType: DelimiterType.Type,
val delimiterLoc: DelimiterLocation.Type,
val numBits: Int,
fieldArg: String,
nextArg: Reader[Char],
val numCharsRead: Int)
extends DelimParseResult(nextArg) {
def isSuccess = true
def field = fieldArg
def get = field
}
case class DelimParseFailure(msgArg: String, nextArg: Reader[Char])
extends DelimParseResult(nextArg) {
def isSuccess = false
def msg = msgArg
}
class DFDLDelimParserCommon(stringBitLengthFunction: String => Int) extends RegexParsers with DebugRegexParsers {
/**
* Thisobject has to be nested because it has as an argument type Success[String]
* and that type is only availble to things that implement the scala...Parsers trait.
*
* This is why you don't want to ball up all your stuff into a trait, you make reuse
* by derivation work, but you make reuse by encapsulation very difficult.
*/
object DelimParseSuccessFactory {
/**
* If content is supplied then it is used to determine the field length.
* If None then the extracted field value itself is used.
*/
def apply(res: Success[String], delimiter: String, delimiterType: DelimiterType.Type, contentOpt: Option[String],
dLoc: DelimiterLocation.Type) = {
val Success(fieldResult, next) = res
val content = contentOpt.getOrElse(res.get)
val charLength = content.length
val fieldResultBits = stringBitLengthFunction(content)
val result = new DelimParseSuccess(delimiter, delimiterType,
dLoc, fieldResultBits, fieldResult, next, charLength)
result
}
}
override val skipWhitespace = false
/**
* Need a parser that will always fail.
* Essentially a parser to reflect the ability to pass in an empty Separator
* or empty Terminator Set.
*/
def parserAlwaysFail[T](expected: String)(name: String) = new Parser[T] {
def apply(in: Input): ParseResult[T] = {
Failure(name + " expected to fail.", in)
}
}
/**
* Constructs an Array of Parser[String] which holds the Parser representations
* of the delimList.
*
* Constructs an Array of String which holds the Regex representations of the
* delimList.
*/
def buildDelims(delimList: Set[String]): (Array[Parser[String]], Array[String]) = {
var delimsParser: Queue[Parser[String]] = Queue.empty
var delimsRegex: Queue[String] = Queue.empty
// We probably always want delims ordered:
// Multi-char delims containing WSP+/*, WSP+, WSP*, multi-char delims, WSP, single-char delims
sortDelims(delimList).toList.foreach(str => {
val d = new Delimiter()
d.compile(str)
delimsParser.enqueue(d.delimRegExParseDelim.r) // The regex representing the actual delimiter
delimsRegex.enqueue(d.delimRegExParseDelim) // The regex representing the actual delimiter
})
(delimsParser.toArray, delimsRegex.toArray)
}
def sortDelims(delimList: Set[String]): Seq[String] = {
val wspStarByItself = delimList.filter(s => s == "%WSP*;")
val wspPlusByItself = delimList.filter(s => s == "%WSP+;")
val filteredDelimList = (delimList -- (wspStarByItself union wspPlusByItself))
val multiCharUnboundedLength = filteredDelimList.filter(s => (s.contains("%WSP*;") || s.contains("%WSP+;")))
val multiChar = (filteredDelimList -- multiCharUnboundedLength).filter(s => s.length() > 1)
val singleChar = filteredDelimList -- (multiChar union multiCharUnboundedLength)
val sortedUnbounded = multiCharUnboundedLength.toArray[String]
val sortedMultiChar = multiChar.toArray[String]
scala.util.Sorting.quickSort(sortedUnbounded)
scala.util.Sorting.quickSort(sortedMultiChar)
val orderedResultSeq: Seq[String] = sortedUnbounded.reverse.toSeq ++ wspPlusByItself ++ wspStarByItself ++ sortedMultiChar.reverse.toSeq ++ singleChar
orderedResultSeq
}
/**
* Combines the delimiters into a single alternation
*/
def combineDelimitersRegex(sepsRegex: Array[String], termsRegex: Array[String]): String = {
val sb = new StringBuilder()
sepsRegex.foreach(x => {
sb.append(x)
sb.append("|")
})
termsRegex.foreach(x => {
sb.append(x)
sb.append("|")
})
val delimRegex = sb.toString().replaceFirst("[\\|]$", "")
delimRegex
}
def getDfdlLiteralRegex(dfdlLiteralList: Set[String]): String = {
val (_, regex) = this.buildDelims(dfdlLiteralList)
combineDelimitersRegex(regex, Array.empty[String])
}
// TODO: does this handle %ES; or do we have to have outside separate checks for that?
// There is a separate check right now in LiteralNilDelimitedOrEndOfData.
def isFieldDfdlLiteral(field: String, dfdlLiteralList: Set[String]): Boolean = {
val dfdlLiteralRegex = getDfdlLiteralRegex(dfdlLiteralList)
val m = Pattern.compile(dfdlLiteralRegex).matcher(field)
m.find()
m.matches()
}
lazy val EOF: Parser[String] = """\z""".r
def generateInputPatternedParser(pattern: String): Parser[String] = {
val thePattern: Parser[String] = "generateInputPatternedParser.thePattern".!!!(("(?s)" + pattern).r)
val entry = "generateInputPatternedParser.entry".!!!(thePattern ~! opt(EOF)) ^^ {
case (p ~ _) => p
}
entry
}
def generateInputNCharactersParser(nChars: Long): Parser[String] = {
val anything: Parser[String] = "generateInputNCharactersParser.anything".!!!(""".*""".r)
val rNChars = """(?s).{""" + nChars.toString() + """}"""
val firstNChars: Parser[String] = "generateInputNCharactersParser.firstNChars".!!!(rNChars.r)
val entry = firstNChars //<~ anything // Technically shouldn't need to add anything, we only want the first nChars
entry
}
}