daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/processors/DFDLDelimiter.scala - daffodil - Git at Google

 package edu.illinois.ncsa.daffodil.processors

 /* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
  *
  * Developed by: Tresys Technology, LLC
  *               http://www.tresys.com
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy of
  * this software and associated documentation files (the "Software"), to deal with
  * the Software without restriction, including without limitation the rights to
  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
  *
  *  1. Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimers.
  *
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimers in the
  *     documentation and/or other materials provided with the distribution.
  *
  *  3. Neither the names of Tresys Technology, nor the names of its contributors
  *     may be used to endorse or promote products derived from this Software
  *     without specific prior written permission.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
  * SOFTWARE.
  */

 import java.util.regex.Pattern
 import java.util.logging.Logging
 import scala.util.control.Breaks
 import java.util.regex.Matcher
 import scala.collection.mutable.Queue
 import edu.illinois.ncsa.daffodil.util.Enum

 object DelimiterType extends Enum {
   sealed abstract trait Type extends EnumValueType
   case object Separator extends Type
   case object Terminator extends Type
   case object NotDelimited extends Type
 }

 object DelimiterLocation extends Enum {
   sealed abstract trait Type extends EnumValueType
   case object Local extends Type
   case object Remote extends Type
 }

 class Delimiter {
   var delimiterStr: String = "" // String representation of delimiter Ex. "%WSP;,%WSP*;"

   var delimBuf: Array[DelimBase] = Array.empty[DelimBase] /* Buffer where each cell (DelimBase) represents a character
 		  												     in the delimiter string */

   var delimRegExParseDelim: String = "" // Regex to actually parse the entire delimiter

   // Pre-compiled RegEx patterns for finding character classes
   lazy val NL = Pattern.compile("%(NL);", Pattern.MULTILINE)
   lazy val WSP = Pattern.compile("%(WSP);", Pattern.MULTILINE)
   lazy val WSP_Plus = Pattern.compile("%(WSP\\+);", Pattern.MULTILINE)
   lazy val WSP_Star = Pattern.compile("%(WSP\\*);", Pattern.MULTILINE)

   override def toString(): String = {
     return "Delimiter[" + delimiterStr + "]"
   }

   // Must call to create the necessary structures
   //
   def apply(pDelimiter: String) = {
     delimiterStr = pDelimiter
     delimBuf = buildDelimBuf(delimiterStr)

     delimRegExParseDelim = this.delimRegexParseDelim(delimBuf)
   }

   // Reduces complicated delimiters containing consecutive WSP, WSP* and WSP+
   // character classes.
   //
   // Ex. %WSP;%WSP*;%NL;%WSP+;%WSP*
   // 	can be reduced to: %WSP+;%NL;%WSP+;
   //
   // TODO: Maybe should have an error message for the example.  What did they mean?
   // Problem because NL characters are in WSP.  Possible to consume the expected NL
   // and thus the rest of the delimiter may not match.
   //
   // Here we should note that %WSP;%WSP;%WSP; is NOT equivalent to %WSP+;
   // as WSP+ would imply that %WSP;%WSP;%WSP;%WSP; is also valid when in fact
   // it may not be.
   //
   def reduceDelimBuf(delims: Array[DelimBase]): Array[DelimBase] = {

     val q: Queue[DelimBase] = new Queue[DelimBase]()

     // Counters to keep track of WSP,+,* objects
     var numWSP: Int = 0
     var numWSP_Plus: Int = 0
     var numWSP_Star: Int = 0

     var idx: Int = 0 // To index the resultant array

     delims.foreach(delim => {
       delim match {
         case wsp: WSPDelim => numWSP += 1
         case wsp: WSPPlusDelim => numWSP_Plus += 1
         case wsp: WSPStarDelim => numWSP_Star += 1
         case _ => {
           // We've reached a non WSP delimiter, check if we've
           // previously encountered any WSP delimiter objects and
           // return the equivalent representation (if any)
           val result = getReducedDelim(numWSP, numWSP_Plus, numWSP_Star)

           result match {
             case Some(x) => {
               // WSP exists and an equivalent representation was found
               x.index = idx // Set the delimiter's index
               q += x
               idx += 1
             }
             case None => {
               // Reduction not possible, but did we come across
               // more than one WSP?

               var i = 0
               while (i < numWSP) {
                 val wsp = new WSPDelim
                 wsp.index = idx
                 q += wsp
                 idx += 1
                 i += 1
               }
             }
           }

           // Set the delimiter's index, needed to
           // update the delimBuf individual node (DelimBase) state later
           delim.index = idx
           q += delim
           idx += 1

           // Reset counters
           numWSP = 0
           numWSP_Plus = 0
           numWSP_Star = 0
         }
       }
     }) // end-for-each

     // Check for leftovers in case the delimiter
     // ends in spaces
     val result = getReducedDelim(numWSP, numWSP_Plus, numWSP_Star)

     result match {
       case Some(x) => {
         x.index = idx
         q += x
       }
       case None => {
         // Reduction not possible, but did we come across
         // more than one WSP?

         var i = 0
         while (i < numWSP) {
           val wsp = new WSPDelim
           wsp.index = idx
           q += wsp
           idx += 1
           i += 1
         }
       }
     }

     q.toArray[DelimBase]
   }

   // Based upon what WSP delimiters were encountered,
   // determine the equivalent representation (if any) and return it.
   //
   def getReducedDelim(numWSP: Int, numWSP_Plus: Int, numWSP_Star: Int): Option[DelimBase] = {
     // 				TRUTH TABLE
     //		WSP		WSP+	WSP*	RESULT
     // 1	0		0		0		NONE
     // 2	0		0		1		WSP*
     // 3	0		1		0		WSP+
     // 4	0		1		1		WSP+
     // 5	1		0		0		WSP
     // 6	1		0		1		WSP+
     // 7	1		1		0		WSP+
     // 8	1		1		1		WSP+
     if (numWSP_Plus != 0) {
       // Case: 3, 4, 7, 8
       return Some(new WSPPlusDelim())
     } else if (numWSP != 0 && numWSP_Plus == 0 && numWSP_Star != 0) { // WSP+ == 0
       // Case: 6
       return Some(new WSPPlusDelim())
     } else if (numWSP == 0 && numWSP_Plus == 0 && numWSP_Star != 0) {
       // Case: 2
       return Some(new WSPStarDelim())
     } else if (numWSP == 1 && numWSP_Plus == 0 && numWSP_Star == 0) {
       // Case: 5
       return Some(new WSPDelim())
     }
     None
   }

   // Creates a RegEx representation of the delimiter.
   // Important for comparing the actual delimiter against
   // the data returned.
   // Ex. separator = "%WSP*;,%WSP*;"
   //	delimiter retrieved from data: ", "
   // There is no way that the separator text can equate to the data
   // when character classes are involved, RegEx allows us to determine
   // if the delimiter/data was in the expected format.
   //
   def delimRegexParseDelim(delimiterBuf: Array[DelimBase] = delimBuf): String = {
     var sb: StringBuilder = new StringBuilder
     delimiterBuf foreach {
       delim =>
         {
           delim match {
             case nl: NLDelim => {
               sb.append("(?>" + // Eliminates needles backtracking. Atomic group of
                 "(\\r\\n)|" + // CRLF
                 "((?<!\\r)\\n)|" + // LF not preceded by CR
                 "(\\r(?!\\n))|" + // CR not followed by LF
                 "\\u0085|\\u2028)")
             }
             case wsp: WSPDelim => {
               sb.append("(\\s|\\u0020|\\u0009|\\u000A|\\u000B|\\u000C|\\u000D|\\u0085" +
                 "|\\u00A0|\\u1680|\\u180E|\\u2000|\\u2001|\\u2002|\\u2003|\\u2004|\\u2005|\\u2006|" +
                 "\\u2007|\\u2008|\\u2009|\\u200A|\\u2028|\\u2029|\\u202F|\\u205F|\\u3000)")
             } // Single space
             case wsp: WSPPlusDelim => {
               sb.append("(\\s|\\u0020|\\u0009|\\u000A|\\u000B|\\u000C|\\u000D|\\u0085" +
                 "|\\u00A0|\\u1680|\\u180E|\\u2000|\\u2001|\\u2002|\\u2003|\\u2004|\\u2005|\\u2006|" +
                 "\\u2007|\\u2008|\\u2009|\\u200A|\\u2028|\\u2029|\\u202F|\\u205F|\\u3000)+")
             } // One or more spaces
             case wsp: WSPStarDelim => {
               sb.append("(\\s|\\u0020|\\u0009|\\u000A|\\u000B|\\u000C|\\u000D|\\u0085" +
                 "|\\u00A0|\\u1680|\\u180E|\\u2000|\\u2001|\\u2002|\\u2003|\\u2004|\\u2005|\\u2006|" +
                 "\\u2007|\\u2008|\\u2009|\\u200A|\\u2028|\\u2029|\\u202F|\\u205F|\\u3000)*")
             } // None or more spaces
             case char: CharDelim => { // Some character
               char.char match {
                 case '[' => sb.append("\\[")
                 case '\\' => sb.append("\\\\")
                 case '^' => sb.append("\\^")
                 case '$' => sb.append("\\$")
                 case '.' => sb.append("\\.")
                 case '|' => sb.append("\\|")
                 case '?' => sb.append("\\?")
                 case '*' => sb.append("\\*")
                 case '+' => sb.append("\\+")
                 case '(' => sb.append("\\(")
                 case ')' => sb.append("\\)")
                 case '{' => sb.append("\\{")
                 case '}' => sb.append("\\}")
                 case x => sb.append(x)
               }
             }
           }
         }
     }
     sb.toString()
   }

   // Returns the first character class in the String
   // or None if one is not found
   //
   def findCharClasses(str: String): (Int, Option[DelimBase]) = {
     val mNL: Matcher = NL.matcher(str)
     val mWSP: Matcher = WSP.matcher(str)
     val mWSP_Plus: Matcher = WSP_Plus.matcher(str)
     val mWSP_Star: Matcher = WSP_Star.matcher(str)
     var length: Int = -1

     val classList: scala.collection.mutable.Map[String, (Int, Int)] = scala.collection.mutable.Map.empty

     if (mNL.find()) {
       classList += ("NL" -> (mNL.start() -> mNL.end()))
     }

     if (mWSP.find()) {
       classList += ("WSP" -> (mWSP.start() -> mWSP.end()))
     }

     if (mWSP_Plus.find()) {
       classList += ("WSP+" -> (mWSP_Plus.start() -> mWSP_Plus.end()))
     }

     if (mWSP_Star.find()) {
       classList += ("WSP*" -> (mWSP_Star.start() -> mWSP_Star.end()))
     }

     if (classList.size > 0) {
       val minItem = classList.minBy(x => x._2._1)
       length = minItem._2._2 - minItem._2._1
       val result = minItem._1 match {
         case "NL" => (length, Some(new NLDelim()))
         case "WSP" => (length, Some(new WSPDelim()))
         case "WSP+" => (length, Some(new WSPPlusDelim()))
         case "WSP*" => (length, Some(new WSPStarDelim()))
       }
       return result
     }
     (-1, None) // Unrecognized CharClass
   }

   // Populates the delimBuf object with an object
   // representation of the characters within the delimiter
   // string.
   //
   def buildDelimBuf(delimStr: String): Array[DelimBase] = {
     val q: Queue[DelimBase] = new Queue[DelimBase]()
     var inc = 0
     val loop = new Breaks

     var newIdx = 0 // index within delimBuf array

     var numCharClass: Int = 0

     loop.breakable {
       for (i <- 0 until delimStr.length()) {
         val idx = i + inc // Advances cursor past the Character Class

         if (idx >= delimStr.length()) {
           // ran off end of delimiter string, break!
           loop.break()
         }

         val c: Char = delimStr.charAt(idx)

         if (c == '%') {
           // Possible character class, check patterns

           // According to JavaDoc, split will always return at least
           // one result even if there is no match.
           val split = delimStr.substring(idx + 1).split("%")

           val subStr: String = "%" + split(0)
           val (matchLength, delimObj) = findCharClasses(subStr)

           if (matchLength != -1) {
             // Have a match, add the object
             val obj = delimObj.get
             obj.index = newIdx // Index within delimBuf Array
             q += obj
             inc += matchLength - 1 // advance cursor past the Character Class
             newIdx += 1
             numCharClass += 1
           } else {
             // Not a CharClass or unrecognized,
             // therefore treat as a CharDelim
             val obj = new CharDelim(c)
             obj.index = newIdx // Index within delimBuf Array
             newIdx += 1
             q += obj
           }

         } else {
           // A CharDelim
           val obj = new CharDelim(c)
           obj.index = newIdx // Index within delimBuf Array
           newIdx += 1
           q += obj
         }
       } // END for-loop
     } // END loop-breakable
     var resDelimBuf: Array[DelimBase] = null
     if (numCharClass > 1) {
       // More than one Char Class, reduction possible!
       resDelimBuf = reduceDelimBuf(q.toArray[DelimBase])
     } else {
       // No need to reduce
       resDelimBuf = q.toArray[DelimBase]
     }
     resDelimBuf
   }
 }

 abstract class DelimBase extends Base {
   def typeName: String
   def print
   def printStr: String
   override def toString(): String = {
     return typeName
   }
 }

 trait Base {
   var isMatched: Boolean = false
   var index: Int = -1
   var charPos: Int = -1
   var charPosEnd: Int = -1

   def clear = {
     isMatched = false
     charPos = -1
     charPosEnd = -1
   }

   def checkMatch(charIn: Char): Boolean
 }

 class CharDelim(val char: Char) extends DelimBase {
   def checkMatch(charIn: Char): Boolean = {
     val matched = charIn == char
     matched
   }

   lazy val typeName = "CharDelim"
   def print = {
     //log(LogLevel.Debug, "\t\t\t" + typeName + ": '" + char + "' d" + char.toInt + " isMatched: " + isMatched.toString()))
   }

   def printStr = {
     val res = typeName + "(" + char + ")"
     res
   }

   override def toString(): String = {
     return typeName + "[" + char + "]"
   }
 }

 trait CharacterClass {
   def convertUnicodeToChar(unicode: String): Char = {
     val c: Char = Integer.parseInt(unicode.substring(2), 16).asInstanceOf[Char]
     c
   }
 }

 class NLDelim extends DelimBase with CharacterClass {
   lazy val typeName = "NLDelim"

   lazy val LF: Char = { convertUnicodeToChar("\\u000A") }
   lazy val CR: Char = { convertUnicodeToChar("\\u000D") }
   lazy val NEL: Char = { convertUnicodeToChar("\\u0085") }
   lazy val LS: Char = { convertUnicodeToChar("\\u2028") }

   def checkMatch(charIn: Char): Boolean = {
     charIn match {
       case LF | CR | NEL | LS => isMatched = true
       case _ => isMatched = false
     }
     isMatched
   }

   def print = {
     //log(LogLevel.Debug, "\t\t\t" + typeName + ": NL" + " isMatched: " + isMatched.toString()))
   }
   def printStr = {
     val res = typeName
     res
   }
 }

 trait WSP extends CharacterClass {
   lazy val CTRL0: Char = { convertUnicodeToChar("\\u0009") }
   lazy val CTRL1: Char = { convertUnicodeToChar("\\u000A") }
   lazy val CTRL2: Char = { convertUnicodeToChar("\\u000B") }
   lazy val CTRL3: Char = { convertUnicodeToChar("\\u000C") }
   lazy val CTRL4: Char = { convertUnicodeToChar("\\u000D") }

   lazy val SPACE: Char = { convertUnicodeToChar("\\u0020") }

   lazy val NEL: Char = { convertUnicodeToChar("\\u0085") }

   lazy val NBSP: Char = { convertUnicodeToChar("\\u00A0") }

   lazy val OGHAM: Char = { convertUnicodeToChar("\\u1680") }
   lazy val MONG: Char = { convertUnicodeToChar("\\u180E") }

   lazy val SP0: Char = { convertUnicodeToChar("\\u2000") }
   lazy val SP1: Char = { convertUnicodeToChar("\\u2001") }
   lazy val SP2: Char = { convertUnicodeToChar("\\u2002") }
   lazy val SP3: Char = { convertUnicodeToChar("\\u2003") }
   lazy val SP4: Char = { convertUnicodeToChar("\\u2004") }
   lazy val SP5: Char = { convertUnicodeToChar("\\u2005") }
   lazy val SP6: Char = { convertUnicodeToChar("\\u2006") }
   lazy val SP7: Char = { convertUnicodeToChar("\\u2007") }
   lazy val SP8: Char = { convertUnicodeToChar("\\u2008") }
   lazy val SP9: Char = { convertUnicodeToChar("\\u2009") }
   lazy val SP10: Char = { convertUnicodeToChar("\\u200A") }

   lazy val LSP: Char = { convertUnicodeToChar("\\u2028") }
   lazy val PSP: Char = { convertUnicodeToChar("\\u2029") }
   lazy val NARROW: Char = { convertUnicodeToChar("\\u202F") }
   lazy val MED: Char = { convertUnicodeToChar("\\u205F") }
   lazy val IDE: Char = { convertUnicodeToChar("\\u3000") }
 }

 class WSPBase extends DelimBase with WSP {
   lazy val typeName = "WSPBase"
   def checkMatch(charIn: Char): Boolean = {
     charIn match {
       case CTRL0 | CTRL1 | CTRL2 | CTRL3 | CTRL4 => isMatched = true
       case SPACE | NEL | NBSP | OGHAM | MONG => isMatched = true
       case SP0 | SP1 | SP2 | SP3 | SP4 | SP5 | SP6 | SP7 | SP8 | SP9 | SP10 => isMatched = true
       case LSP | PSP | NARROW | MED | IDE => isMatched = true
       case _ => isMatched = false
     }
     isMatched
   }
   def print = {
     //log(LogLevel.Debug, "\t\t\t" + typeName + ": WSPBase" + " isMatched: " + isMatched.toString()))
   }
   def printStr = {
     val res = typeName
     res
   }
 }

 class WSPDelim extends WSPBase with WSP {
   override lazy val typeName = "WSPDelim"
   override def print = {
     //log(LogLevel.Debug, "\t\t\t" + typeName + ": WSP" + " isMatched: " + isMatched.toString()))
   }
   override def printStr = {
     val res = typeName
     res
   }
 }

 class WSPPlusDelim extends WSPBase with WSP {
   override lazy val typeName = "WSP+Delim"
   override def print = {
     //log(LogLevel.Debug, "\t\t\t" + typeName + ": WSP+" + " isMatched: " + isMatched.toString()))
   }
   override def printStr = {
     val res = typeName
     res
   }
 }

 class WSPStarDelim extends WSPBase with WSP {
   override lazy val typeName = "WSP*Delim"
   override def print = {
     //log(LogLevel.Debug, "\t\t\t" + typeName + ": WSP*" + " isMatched: " + isMatched.toString()))
   }
   override def printStr = {
     val res = typeName
     res
   }
 }
	package edu.illinois.ncsa.daffodil.processors

	/* Copyright (c) 2012-2013 Tresys Technology, LLC. All rights reserved.
	*
	* Developed by: Tresys Technology, LLC
	* http://www.tresys.com
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy of
	* this software and associated documentation files (the "Software"), to deal with
	* the Software without restriction, including without limitation the rights to
	* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	* of the Software, and to permit persons to whom the Software is furnished to do
	* so, subject to the following conditions:
	*
	* 1. Redistributions of source code must retain the above copyright notice,
	* this list of conditions and the following disclaimers.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimers in the
	* documentation and/or other materials provided with the distribution.
	*
	* 3. Neither the names of Tresys Technology, nor the names of its contributors
	* may be used to endorse or promote products derived from this Software
	* without specific prior written permission.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
	* SOFTWARE.
	*/

	import java.util.regex.Pattern
	import java.util.logging.Logging
	import scala.util.control.Breaks
	import java.util.regex.Matcher
	import scala.collection.mutable.Queue
	import edu.illinois.ncsa.daffodil.util.Enum

	object DelimiterType extends Enum {
	sealed abstract trait Type extends EnumValueType
	case object Separator extends Type
	case object Terminator extends Type
	case object NotDelimited extends Type
	}

	object DelimiterLocation extends Enum {
	sealed abstract trait Type extends EnumValueType
	case object Local extends Type
	case object Remote extends Type
	}

	class Delimiter {
	var delimiterStr: String = "" // String representation of delimiter Ex. "%WSP;,%WSP*;"

	var delimBuf: Array[DelimBase] = Array.empty[DelimBase] /* Buffer where each cell (DelimBase) represents a character
	in the delimiter string */

	var delimRegExParseDelim: String = "" // Regex to actually parse the entire delimiter

	// Pre-compiled RegEx patterns for finding character classes
	lazy val NL = Pattern.compile("%(NL);", Pattern.MULTILINE)
	lazy val WSP = Pattern.compile("%(WSP);", Pattern.MULTILINE)
	lazy val WSP_Plus = Pattern.compile("%(WSP\\+);", Pattern.MULTILINE)
	lazy val WSP_Star = Pattern.compile("%(WSP\\*);", Pattern.MULTILINE)

	override def toString(): String = {
	return "Delimiter[" + delimiterStr + "]"
	}

	// Must call to create the necessary structures
	//
	def apply(pDelimiter: String) = {
	delimiterStr = pDelimiter
	delimBuf = buildDelimBuf(delimiterStr)

	delimRegExParseDelim = this.delimRegexParseDelim(delimBuf)
	}

	// Reduces complicated delimiters containing consecutive WSP, WSP* and WSP+
	// character classes.
	//
	// Ex. %WSP;%WSP;%NL;%WSP+;%WSP
	// can be reduced to: %WSP+;%NL;%WSP+;
	//
	// TODO: Maybe should have an error message for the example. What did they mean?
	// Problem because NL characters are in WSP. Possible to consume the expected NL
	// and thus the rest of the delimiter may not match.
	//
	// Here we should note that %WSP;%WSP;%WSP; is NOT equivalent to %WSP+;
	// as WSP+ would imply that %WSP;%WSP;%WSP;%WSP; is also valid when in fact
	// it may not be.
	//
	def reduceDelimBuf(delims: Array[DelimBase]): Array[DelimBase] = {

	val q: Queue[DelimBase] = new Queue[DelimBase]()

	// Counters to keep track of WSP,+,* objects
	var numWSP: Int = 0
	var numWSP_Plus: Int = 0
	var numWSP_Star: Int = 0

	var idx: Int = 0 // To index the resultant array

	delims.foreach(delim => {
	delim match {
	case wsp: WSPDelim => numWSP += 1
	case wsp: WSPPlusDelim => numWSP_Plus += 1
	case wsp: WSPStarDelim => numWSP_Star += 1
	case _ => {
	// We've reached a non WSP delimiter, check if we've
	// previously encountered any WSP delimiter objects and
	// return the equivalent representation (if any)
	val result = getReducedDelim(numWSP, numWSP_Plus, numWSP_Star)

	result match {
	case Some(x) => {
	// WSP exists and an equivalent representation was found
	x.index = idx // Set the delimiter's index
	q += x
	idx += 1
	}
	case None => {
	// Reduction not possible, but did we come across
	// more than one WSP?

	var i = 0
	while (i < numWSP) {
	val wsp = new WSPDelim
	wsp.index = idx
	q += wsp
	idx += 1
	i += 1
	}
	}
	}

	// Set the delimiter's index, needed to
	// update the delimBuf individual node (DelimBase) state later
	delim.index = idx
	q += delim
	idx += 1

	// Reset counters
	numWSP = 0
	numWSP_Plus = 0
	numWSP_Star = 0
	}
	}
	}) // end-for-each

	// Check for leftovers in case the delimiter
	// ends in spaces
	val result = getReducedDelim(numWSP, numWSP_Plus, numWSP_Star)

	result match {
	case Some(x) => {
	x.index = idx
	q += x
	}
	case None => {
	// Reduction not possible, but did we come across
	// more than one WSP?

	var i = 0
	while (i < numWSP) {
	val wsp = new WSPDelim
	wsp.index = idx
	q += wsp
	idx += 1
	i += 1
	}
	}
	}

	q.toArray[DelimBase]
	}

	// Based upon what WSP delimiters were encountered,
	// determine the equivalent representation (if any) and return it.
	//
	def getReducedDelim(numWSP: Int, numWSP_Plus: Int, numWSP_Star: Int): Option[DelimBase] = {
	// TRUTH TABLE
	// WSP WSP+ WSP* RESULT
	// 1 0 0 0 NONE
	// 2 0 0 1 WSP*
	// 3 0 1 0 WSP+
	// 4 0 1 1 WSP+
	// 5 1 0 0 WSP
	// 6 1 0 1 WSP+
	// 7 1 1 0 WSP+
	// 8 1 1 1 WSP+
	if (numWSP_Plus != 0) {
	// Case: 3, 4, 7, 8
	return Some(new WSPPlusDelim())
	} else if (numWSP != 0 && numWSP_Plus == 0 && numWSP_Star != 0) { // WSP+ == 0
	// Case: 6
	return Some(new WSPPlusDelim())
	} else if (numWSP == 0 && numWSP_Plus == 0 && numWSP_Star != 0) {
	// Case: 2
	return Some(new WSPStarDelim())
	} else if (numWSP == 1 && numWSP_Plus == 0 && numWSP_Star == 0) {
	// Case: 5
	return Some(new WSPDelim())
	}
	None
	}

	// Creates a RegEx representation of the delimiter.
	// Important for comparing the actual delimiter against
	// the data returned.
	// Ex. separator = "%WSP;,%WSP;"
	// delimiter retrieved from data: ", "
	// There is no way that the separator text can equate to the data
	// when character classes are involved, RegEx allows us to determine
	// if the delimiter/data was in the expected format.
	//
	def delimRegexParseDelim(delimiterBuf: Array[DelimBase] = delimBuf): String = {
	var sb: StringBuilder = new StringBuilder
	delimiterBuf foreach {
	delim =>
	{
	delim match {
	case nl: NLDelim => {
	sb.append("(?>" + // Eliminates needles backtracking. Atomic group of
	"(\\r\\n)\|" + // CRLF
	"((?<!\\r)\\n)\|" + // LF not preceded by CR
	"(\\r(?!\\n))\|" + // CR not followed by LF
	"\\u0085\|\\u2028)")
	}
	case wsp: WSPDelim => {
	sb.append("(\\s\|\\u0020\|\\u0009\|\\u000A\|\\u000B\|\\u000C\|\\u000D\|\\u0085" +
	"\|\\u00A0\|\\u1680\|\\u180E\|\\u2000\|\\u2001\|\\u2002\|\\u2003\|\\u2004\|\\u2005\|\\u2006\|" +
	"\\u2007\|\\u2008\|\\u2009\|\\u200A\|\\u2028\|\\u2029\|\\u202F\|\\u205F\|\\u3000)")
	} // Single space
	case wsp: WSPPlusDelim => {
	sb.append("(\\s\|\\u0020\|\\u0009\|\\u000A\|\\u000B\|\\u000C\|\\u000D\|\\u0085" +
	"\|\\u00A0\|\\u1680\|\\u180E\|\\u2000\|\\u2001\|\\u2002\|\\u2003\|\\u2004\|\\u2005\|\\u2006\|" +
	"\\u2007\|\\u2008\|\\u2009\|\\u200A\|\\u2028\|\\u2029\|\\u202F\|\\u205F\|\\u3000)+")
	} // One or more spaces
	case wsp: WSPStarDelim => {
	sb.append("(\\s\|\\u0020\|\\u0009\|\\u000A\|\\u000B\|\\u000C\|\\u000D\|\\u0085" +
	"\|\\u00A0\|\\u1680\|\\u180E\|\\u2000\|\\u2001\|\\u2002\|\\u2003\|\\u2004\|\\u2005\|\\u2006\|" +
	"\\u2007\|\\u2008\|\\u2009\|\\u200A\|\\u2028\|\\u2029\|\\u202F\|\\u205F\|\\u3000)*")
	} // None or more spaces
	case char: CharDelim => { // Some character
	char.char match {
	case '[' => sb.append("\\[")
	case '\\' => sb.append("\\\\")
	case '^' => sb.append("\\^")
	case '$' => sb.append("\\$")
	case '.' => sb.append("\\.")
	case '\|' => sb.append("\\\|")
	case '?' => sb.append("\\?")
	case '' => sb.append("\\")
	case '+' => sb.append("\\+")
	case '(' => sb.append("\\(")
	case ')' => sb.append("\\)")
	case '{' => sb.append("\\{")
	case '}' => sb.append("\\}")
	case x => sb.append(x)
	}
	}
	}
	}
	}
	sb.toString()
	}

	// Returns the first character class in the String
	// or None if one is not found
	//
	def findCharClasses(str: String): (Int, Option[DelimBase]) = {
	val mNL: Matcher = NL.matcher(str)
	val mWSP: Matcher = WSP.matcher(str)
	val mWSP_Plus: Matcher = WSP_Plus.matcher(str)
	val mWSP_Star: Matcher = WSP_Star.matcher(str)
	var length: Int = -1

	val classList: scala.collection.mutable.Map[String, (Int, Int)] = scala.collection.mutable.Map.empty

	if (mNL.find()) {
	classList += ("NL" -> (mNL.start() -> mNL.end()))
	}

	if (mWSP.find()) {
	classList += ("WSP" -> (mWSP.start() -> mWSP.end()))
	}

	if (mWSP_Plus.find()) {
	classList += ("WSP+" -> (mWSP_Plus.start() -> mWSP_Plus.end()))
	}

	if (mWSP_Star.find()) {
	classList += ("WSP*" -> (mWSP_Star.start() -> mWSP_Star.end()))
	}

	if (classList.size > 0) {
	val minItem = classList.minBy(x => x._2._1)
	length = minItem._2._2 - minItem._2._1
	val result = minItem._1 match {
	case "NL" => (length, Some(new NLDelim()))
	case "WSP" => (length, Some(new WSPDelim()))
	case "WSP+" => (length, Some(new WSPPlusDelim()))
	case "WSP*" => (length, Some(new WSPStarDelim()))
	}
	return result
	}
	(-1, None) // Unrecognized CharClass
	}

	// Populates the delimBuf object with an object
	// representation of the characters within the delimiter
	// string.
	//
	def buildDelimBuf(delimStr: String): Array[DelimBase] = {
	val q: Queue[DelimBase] = new Queue[DelimBase]()
	var inc = 0
	val loop = new Breaks

	var newIdx = 0 // index within delimBuf array

	var numCharClass: Int = 0

	loop.breakable {
	for (i <- 0 until delimStr.length()) {
	val idx = i + inc // Advances cursor past the Character Class

	if (idx >= delimStr.length()) {
	// ran off end of delimiter string, break!
	loop.break()
	}

	val c: Char = delimStr.charAt(idx)

	if (c == '%') {
	// Possible character class, check patterns

	// According to JavaDoc, split will always return at least
	// one result even if there is no match.
	val split = delimStr.substring(idx + 1).split("%")

	val subStr: String = "%" + split(0)
	val (matchLength, delimObj) = findCharClasses(subStr)

	if (matchLength != -1) {
	// Have a match, add the object
	val obj = delimObj.get
	obj.index = newIdx // Index within delimBuf Array
	q += obj
	inc += matchLength - 1 // advance cursor past the Character Class
	newIdx += 1
	numCharClass += 1
	} else {
	// Not a CharClass or unrecognized,
	// therefore treat as a CharDelim
	val obj = new CharDelim(c)
	obj.index = newIdx // Index within delimBuf Array
	newIdx += 1
	q += obj
	}

	} else {
	// A CharDelim
	val obj = new CharDelim(c)
	obj.index = newIdx // Index within delimBuf Array
	newIdx += 1
	q += obj
	}
	} // END for-loop
	} // END loop-breakable
	var resDelimBuf: Array[DelimBase] = null
	if (numCharClass > 1) {
	// More than one Char Class, reduction possible!
	resDelimBuf = reduceDelimBuf(q.toArray[DelimBase])
	} else {
	// No need to reduce
	resDelimBuf = q.toArray[DelimBase]
	}
	resDelimBuf
	}
	}

	abstract class DelimBase extends Base {
	def typeName: String
	def print
	def printStr: String
	override def toString(): String = {
	return typeName
	}
	}

	trait Base {
	var isMatched: Boolean = false
	var index: Int = -1
	var charPos: Int = -1
	var charPosEnd: Int = -1

	def clear = {
	isMatched = false
	charPos = -1
	charPosEnd = -1
	}

	def checkMatch(charIn: Char): Boolean
	}

	class CharDelim(val char: Char) extends DelimBase {
	def checkMatch(charIn: Char): Boolean = {
	val matched = charIn == char
	matched
	}

	lazy val typeName = "CharDelim"
	def print = {
	//log(LogLevel.Debug, "\t\t\t" + typeName + ": '" + char + "' d" + char.toInt + " isMatched: " + isMatched.toString()))
	}

	def printStr = {
	val res = typeName + "(" + char + ")"
	res
	}

	override def toString(): String = {
	return typeName + "[" + char + "]"
	}
	}

	trait CharacterClass {
	def convertUnicodeToChar(unicode: String): Char = {
	val c: Char = Integer.parseInt(unicode.substring(2), 16).asInstanceOf[Char]
	c
	}
	}

	class NLDelim extends DelimBase with CharacterClass {
	lazy val typeName = "NLDelim"

	lazy val LF: Char = { convertUnicodeToChar("\\u000A") }
	lazy val CR: Char = { convertUnicodeToChar("\\u000D") }
	lazy val NEL: Char = { convertUnicodeToChar("\\u0085") }
	lazy val LS: Char = { convertUnicodeToChar("\\u2028") }

	def checkMatch(charIn: Char): Boolean = {
	charIn match {
	case LF \| CR \| NEL \| LS => isMatched = true
	case _ => isMatched = false
	}
	isMatched
	}

	def print = {
	//log(LogLevel.Debug, "\t\t\t" + typeName + ": NL" + " isMatched: " + isMatched.toString()))
	}
	def printStr = {
	val res = typeName
	res
	}
	}

	trait WSP extends CharacterClass {
	lazy val CTRL0: Char = { convertUnicodeToChar("\\u0009") }
	lazy val CTRL1: Char = { convertUnicodeToChar("\\u000A") }
	lazy val CTRL2: Char = { convertUnicodeToChar("\\u000B") }
	lazy val CTRL3: Char = { convertUnicodeToChar("\\u000C") }
	lazy val CTRL4: Char = { convertUnicodeToChar("\\u000D") }

	lazy val SPACE: Char = { convertUnicodeToChar("\\u0020") }

	lazy val NEL: Char = { convertUnicodeToChar("\\u0085") }

	lazy val NBSP: Char = { convertUnicodeToChar("\\u00A0") }

	lazy val OGHAM: Char = { convertUnicodeToChar("\\u1680") }
	lazy val MONG: Char = { convertUnicodeToChar("\\u180E") }

	lazy val SP0: Char = { convertUnicodeToChar("\\u2000") }
	lazy val SP1: Char = { convertUnicodeToChar("\\u2001") }
	lazy val SP2: Char = { convertUnicodeToChar("\\u2002") }
	lazy val SP3: Char = { convertUnicodeToChar("\\u2003") }
	lazy val SP4: Char = { convertUnicodeToChar("\\u2004") }
	lazy val SP5: Char = { convertUnicodeToChar("\\u2005") }
	lazy val SP6: Char = { convertUnicodeToChar("\\u2006") }
	lazy val SP7: Char = { convertUnicodeToChar("\\u2007") }
	lazy val SP8: Char = { convertUnicodeToChar("\\u2008") }
	lazy val SP9: Char = { convertUnicodeToChar("\\u2009") }
	lazy val SP10: Char = { convertUnicodeToChar("\\u200A") }

	lazy val LSP: Char = { convertUnicodeToChar("\\u2028") }
	lazy val PSP: Char = { convertUnicodeToChar("\\u2029") }
	lazy val NARROW: Char = { convertUnicodeToChar("\\u202F") }
	lazy val MED: Char = { convertUnicodeToChar("\\u205F") }
	lazy val IDE: Char = { convertUnicodeToChar("\\u3000") }
	}

	class WSPBase extends DelimBase with WSP {
	lazy val typeName = "WSPBase"
	def checkMatch(charIn: Char): Boolean = {
	charIn match {
	case CTRL0 \| CTRL1 \| CTRL2 \| CTRL3 \| CTRL4 => isMatched = true
	case SPACE \| NEL \| NBSP \| OGHAM \| MONG => isMatched = true
	case SP0 \| SP1 \| SP2 \| SP3 \| SP4 \| SP5 \| SP6 \| SP7 \| SP8 \| SP9 \| SP10 => isMatched = true
	case LSP \| PSP \| NARROW \| MED \| IDE => isMatched = true
	case _ => isMatched = false
	}
	isMatched
	}
	def print = {
	//log(LogLevel.Debug, "\t\t\t" + typeName + ": WSPBase" + " isMatched: " + isMatched.toString()))
	}
	def printStr = {
	val res = typeName
	res
	}
	}

	class WSPDelim extends WSPBase with WSP {
	override lazy val typeName = "WSPDelim"
	override def print = {
	//log(LogLevel.Debug, "\t\t\t" + typeName + ": WSP" + " isMatched: " + isMatched.toString()))
	}
	override def printStr = {
	val res = typeName
	res
	}
	}

	class WSPPlusDelim extends WSPBase with WSP {
	override lazy val typeName = "WSP+Delim"
	override def print = {
	//log(LogLevel.Debug, "\t\t\t" + typeName + ": WSP+" + " isMatched: " + isMatched.toString()))
	}
	override def printStr = {
	val res = typeName
	res
	}
	}

	class WSPStarDelim extends WSPBase with WSP {
	override lazy val typeName = "WSP*Delim"
	override def print = {
	//log(LogLevel.Debug, "\t\t\t" + typeName + ": WSP*" + " isMatched: " + isMatched.toString()))
	}
	override def printStr = {
	val res = typeName
	res
	}
	}