blob: 486926b9f7cf496aa4f150669fbda1a969ef1b25 [file] [log] [blame]
package edu.illinois.ncsa.daffodil.io
import java.nio.ByteBuffer
/**
* Highly optimized converter for Ascii to Unicode
*/
object FastAsciiToUnicodeConverter {
def convert (bb : ByteBuffer) = {
val cb = ByteBuffer.allocate(2 * bb.limit)
val cbChar = cb.asCharBuffer()
//
// Go after data in the largest chunks we can (Long)
// so as to eliminate per-byte/char bounds checks
//
val bbBytesOfWholeLongWords = ((bb.limit >> 3) << 3).toLong
val numBytesTrailingFragment = bb.limit - bbBytesOfWholeLongWords
val bLong = bb.asLongBuffer()
val cbLong = cb.asLongBuffer()
1 to bLong.limit foreach { i =>
val bbl = bLong.get()
val long1 : Int = (bbl >> 32).toInt & 0xFFFFFFFF
val long2 : Int = bbl.toInt & 0xFFFFFFFF
val cbl1 = convertLong(long1)
val cbl2 = convertLong(long2)
cbLong.put(cbl1)
cbLong.put(cbl2)
}
1 to numBytesTrailingFragment.toInt foreach { j =>
val pos = bb.limit - j
val byte = bb.get(pos)
val char = convertByte(byte)
cbChar.put(pos, char)
}
cb.asCharBuffer()
}
val UnicodeReplacementCharacter = 0xFFFD.toChar
/**
* Convert a single byte of ascii to unicode.
* If the MSBit is set (negative byte) then that's
* not a legal character code so produce the unicode
* replacement character.
*/
@inline
def convertByte(byte: Byte) = {
if (byte < 0) UnicodeReplacementCharacter
else byte.toChar
}
@inline
def convertInt(int : Int) = {
val i = int & 0xFF
if (i > 127) UnicodeReplacementCharacter
else i.toChar
}
@inline
def convertLong(bytes : Int) : Long = {
val int1 = bytes & 0xFF
val int2 = (bytes >> 8) & 0xFF
val int3 = (bytes >> 16) & 0xFF
val int4 = (bytes >> 24) & 0xFF
val char1 = convertInt(int1)
val char2 = convertInt(int2)
val char3 = convertInt(int3)
val char4 = convertInt(int4)
val res = (char4.toLong << 48) | (char3.toLong << 32) | (char2.toLong << 16) | char1
res
}
}