blob: a2a24cef4a60c2c9f289bc9e8cf5d0fd51c2e9e3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hessian
import (
"bufio"
"bytes"
"fmt"
"io"
"reflect"
"strconv"
"unicode/utf8"
"unsafe"
)
import (
gxbytes "github.com/dubbogo/gost/bytes"
perrors "github.com/pkg/errors"
)
/////////////////////////////////////////
// String
/////////////////////////////////////////
// Slice convert string to byte slice
func Slice(s string) (b []byte) {
pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
pstring := (*reflect.StringHeader)(unsafe.Pointer(&s))
pbytes.Data = pstring.Data
pbytes.Len = pstring.Len
pbytes.Cap = pstring.Len
return
}
// NOTE: The length of hessian string is the number of 16-bit characters,
// which may be different than the number of bytes.
// String chunks may not split surrogate pairs.
//
// While golang support ucs-4, a rune may exceed 16-bit, which need convert to ucs-2.
//
// ref:
// - https://en.wikipedia.org/wiki/UTF-16
// - https://en.wikipedia.org/wiki/UCS-4
// - http://www.unicode.org/glossary/#code_point
func encodeUcs4Rune(b []byte, r rune) (int, int) {
if r >= 0x10000 && r <= 0x10FFFF {
t := uint32(r) - 0x10000
n := encodeUcs2Rune(b, t>>10+0xD800)
n += encodeUcs2Rune(b[n:], t&0x3FF+0xDC00)
return n, 2
}
// in fact, a rune over 0x10FFFF can't be encoded by hessian, ignore it currently
return utf8.EncodeRune(b, r), 1
}
func encodeUcs2Rune(b []byte, ch uint32) int {
if ch < 0x80 {
b[0] = byte(ch)
return 1
}
if ch < 0x800 {
b[0] = byte(0xc0 + ((ch >> 6) & 0x1f))
b[1] = byte(0x80 + (ch & 0x3f))
return 2
}
b[0] = byte(0xe0 + ((ch >> 12) & 0x0f))
b[1] = byte(0x80 + ((ch >> 6) & 0x3f))
b[2] = byte(0x80 + (ch & 0x3f))
return 3
}
func decodeUcs4Rune(r *bufio.Reader) (c rune, cLen, bLen int, err error) {
c1, n1, err1 := decodeUcs2Rune(r)
if err1 != nil {
return c1, 0, n1, err1
}
if c1 >= 0xD800 && c1 <= 0xDBFF {
c2, n2, err2 := decodeUcs2Rune(r)
if err2 != nil {
return c2, 0, n2, err2
}
c := (c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
return c, 2, n1 + n2, nil
}
return c1, 1, n1, nil
}
func decodeUcs2Rune(r *bufio.Reader) (rune, int, error) {
ch, err := r.ReadByte()
if err != nil {
return utf8.RuneError, 1, err
}
if ch < 0x80 {
return rune(ch), 1, nil
}
if (ch & 0xe0) == 0xc0 {
ch1, err := r.ReadByte()
if err != nil {
return utf8.RuneError, 2, err
}
return rune(((uint32(ch) & 0x1f) << 6) + (uint32(ch1) & 0x3f)), 2, nil
}
if (ch & 0xf0) == 0xe0 {
ch1, err := r.ReadByte()
if err != nil {
return utf8.RuneError, 2, err
}
ch2, err := r.ReadByte()
if err != nil {
return utf8.RuneError, 3, err
}
c := ((uint32(ch) & 0x0f) << 12) + ((uint32(ch1) & 0x3f) << 6) + (uint32(ch2) & 0x3f)
return rune(c), 3, nil
}
return utf8.RuneError, 0, fmt.Errorf("bad utf-8 encoding at %x", ch)
}
// # UTF-8 encoded character string split into 64k chunks
// ::= x52 b1 b0 <utf8-data> string # non-final chunk
// ::= 'S' b1 b0 <utf8-data> # string of length 0-65535
// ::= [x00-x1f] <utf8-data> # string of length 0-31
// ::= [x30-x34] <utf8-data> # string of length 0-1023
func encString(b []byte, v string) []byte {
if v == "" {
return encByte(b, BC_STRING_DIRECT)
}
var (
byteLen = 0
charLen = 0
vBuf = *bytes.NewBufferString(v)
byteRead = 0
charCount = 0
byteCount = 0
)
bufp := gxbytes.AcquireBytes(CHUNK_SIZE * 3)
defer gxbytes.ReleaseBytes(bufp)
buf := *bufp
for {
if vBuf.Len() <= 0 {
break
}
charCount = 0
byteCount = 0
for charCount < CHUNK_SIZE {
r, _, err := vBuf.ReadRune()
if err != nil {
break
}
byteLen, charLen = encodeUcs4Rune(buf[byteCount:], r)
charCount += charLen
byteCount += byteLen
}
if charCount == 0 {
break
}
switch {
case vBuf.Len() > 0 && charCount >= CHUNK_SIZE:
b = encByte(b, BC_STRING_CHUNK)
b = encByte(b, PackUint16(uint16(charCount))...)
case charCount <= int(STRING_DIRECT_MAX):
b = encByte(b, byte(charCount+int(BC_STRING_DIRECT)))
case charCount <= STRING_SHORT_MAX:
b = encByte(b, byte((charCount>>8)+int(BC_STRING_SHORT)), byte(charCount))
default:
b = encByte(b, BC_STRING)
b = encByte(b, PackUint16(uint16(charCount))...)
}
b = append(b, buf[:byteCount]...)
byteRead = byteRead + byteCount
}
return b
}
/////////////////////////////////////////
// String
/////////////////////////////////////////
// # UTF-8 encoded character string split into 64k chunks
// ::= x52 b1 b0 <utf8-data> string # non-final chunk
// ::= 'S' b1 b0 <utf8-data> # string of length 0-65535
// ::= [x00-x1f] <utf8-data> # string of length 0-31
// ::= [x30-x34] <utf8-data> # string of length 0-1023
func (d *Decoder) getStringLength(tag byte) (int, error) {
var length int
switch {
case tag >= BC_STRING_DIRECT && tag <= STRING_DIRECT_MAX:
return int(tag - 0x00), nil
case tag >= 0x30 && tag <= 0x33:
b, err := d.readByte()
if err != nil {
return -1, perrors.WithStack(err)
}
length = int(tag-0x30)<<8 + int(b)
return length, nil
case tag == BC_STRING_CHUNK || tag == BC_STRING:
b0, err := d.readByte()
if err != nil {
return -1, perrors.WithStack(err)
}
b1, err := d.readByte()
if err != nil {
return -1, perrors.WithStack(err)
}
length = int(b0)<<8 + int(b1)
return length, nil
default:
return -1, perrors.Errorf("string decode: unknown tag %b", tag)
}
}
func (d *Decoder) decString(flag int32) (string, error) {
var (
tag byte
last bool
s string
)
if flag != TAG_READ {
tag = byte(flag)
} else {
tag, _ = d.readByte()
}
switch {
case tag == BC_NULL:
return STRING_NIL, nil
case tag == BC_TRUE:
return STRING_TRUE, nil
case tag == BC_FALSE:
return STRING_FALSE, nil
case (0x80 <= tag && tag <= 0xbf) || (0xc0 <= tag && tag <= 0xcf) ||
(0xd0 <= tag && tag <= 0xd7) || tag == BC_INT ||
(tag >= 0xd8 && tag <= 0xef) || (tag >= 0xf0 && tag <= 0xff) ||
(tag >= 0x38 && tag <= 0x3f) || (tag == BC_LONG_INT) || (tag == BC_LONG):
i64, err := d.decInt64(int32(tag))
if err != nil {
return "", perrors.Wrapf(err, "tag:%+v", tag)
}
return strconv.Itoa(int(i64)), nil
case tag == BC_DOUBLE_ZERO:
return STRING_ZERO, nil
case tag == BC_DOUBLE_ONE:
return STRING_ONE, nil
case tag == BC_DOUBLE_BYTE || tag == BC_DOUBLE_SHORT:
f, err := d.decDouble(int32(tag))
if err != nil {
return "", perrors.Wrapf(err, "tag:%+v", tag)
}
return strconv.FormatFloat(f.(float64), 'E', -1, 64), nil
}
if (tag >= BC_STRING_DIRECT && tag <= STRING_DIRECT_MAX) ||
(tag >= 0x30 && tag <= 0x33) ||
(tag == BC_STRING_CHUNK || tag == BC_STRING) {
if tag == BC_STRING_CHUNK {
last = false
} else {
last = true
}
chunkLen, err := d.getStringLength(tag)
if err != nil {
return s, perrors.WithStack(err)
}
bytesBuf := make([]byte, chunkLen<<2)
offset := 0
for {
if chunkLen <= 0 {
if last {
b := bytesBuf[:offset]
return *(*string)(unsafe.Pointer(&b)), nil
}
b, _ := d.readByte()
switch {
case (tag >= BC_STRING_DIRECT && tag <= STRING_DIRECT_MAX) ||
(tag >= 0x30 && tag <= 0x33) ||
(tag == BC_STRING_CHUNK || tag == BC_STRING):
if b == BC_STRING_CHUNK {
last = false
} else {
last = true
}
chunkLen, err = d.getStringLength(b)
if err != nil {
return s, perrors.WithStack(err)
}
remain, cap := len(bytesBuf)-offset, chunkLen<<2
if remain < cap {
grow := len(bytesBuf) + cap
bs := make([]byte, grow)
copy(bs, bytesBuf)
bytesBuf = bs
}
default:
return s, perrors.New("expect string tag")
}
}
if chunkLen > 0 {
nread, err := d.next(bytesBuf[offset : offset+chunkLen])
if err != nil {
if err == io.EOF {
break
}
return s, perrors.WithStack(err)
}
// quickly detect the actual number of bytes
prev, i := offset, offset
len := offset + nread
copied := false
for r, r1 := len-1, len-2; i < len; chunkLen-- {
ch := bytesBuf[offset]
if ch < 0x80 {
i++
offset++
} else if (ch & 0xe0) == 0xc0 {
i += 2
offset += 2
} else if (ch & 0xf0) == 0xe0 {
// handle the 3-byte right edge
// case:
// 1. Expect 3 bytes, but the current byte is on the right
// 2. Expect 3 bytes, but the current byte is second to last to the right
if i == r {
bytesBuf[i+1], err = d.reader.ReadByte()
if err != nil {
return s, perrors.WithStack(err)
}
bytesBuf[i+2], err = d.reader.ReadByte()
if err != nil {
return s, perrors.WithStack(err)
}
nread += 2
len += 2
} else if i == r1 {
bytesBuf[i+2], err = d.reader.ReadByte()
if err != nil {
return s, perrors.WithStack(err)
}
nread++
len++
}
// we detect emoji first
c1 := ((uint32(ch) & 0x0f) << 12) + ((uint32(bytesBuf[i+1]) & 0x3f) << 6) + (uint32(bytesBuf[i+2]) & 0x3f)
if c1 >= 0xD800 && c1 <= 0xDBFF {
var (
c2 rune
n2 int
err error
ch0 byte
)
// more cache byte available
if i+3 < len {
ch0 = bytesBuf[i+3]
} else {
ch0, err = d.reader.ReadByte()
if err != nil {
return s, perrors.WithStack(err)
}
// update accumulates read bytes,
// because it reads more than thunk bytes
nread++
len++
}
if ch0 < 0x80 {
c2, n2 = rune(ch0), 1
} else if (ch0 & 0xe0) == 0xc0 {
var ch1 byte
if i+4 < len {
ch1 = bytesBuf[i+4]
} else {
// out of the chunk byte data
bytesBuf[i+4], err = d.reader.ReadByte()
ch1 = bytesBuf[i+4]
nread++
len++
}
c2, n2 = rune(((uint32(ch0)&0x1f)<<6)+(uint32(ch1)&0x3f)), 2
} else if (ch0 & 0xf0) == 0xe0 {
var ch1, ch2 byte
if i+5 < len {
ch1 = bytesBuf[i+4]
ch2 = bytesBuf[i+5]
} else {
ch1, err = d.reader.ReadByte()
if err != nil {
return s, perrors.WithStack(err)
}
ch2, err = d.reader.ReadByte()
len += 2
nread += 2
}
c := ((uint32(ch0) & 0x0f) << 12) + ((uint32(ch1) & 0x3f) << 6) + (uint32(ch2) & 0x3f)
c2, n2 = rune(c), 3
}
c := rune(c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
n3 := utf8.EncodeRune(bytesBuf[i:], c)
if copied = n3 > 0 && n3 < /** front three byte */ 3+n2; copied {
// We need to move the bytes,
// for example, less bytes after decoding
offset = i + n3
copy(bytesBuf[offset:], bytesBuf[i+3+n2:len])
}
i += n2
chunkLen--
}
i += 3
// fix read the next byte index
if copied {
copied = false
continue
}
offset += 3
} else {
return s, perrors.Errorf("bad utf-8 encoding")
}
}
if remain := offset - prev - nread; remain > 0 {
if remain == 1 {
ch, err := d.readByte()
if err != nil {
return s, perrors.WithStack(err)
}
bytesBuf[offset-1] = ch
} else {
var err error
if buffed := d.Buffered(); buffed < remain {
// trigger fill data if required
copy(bytesBuf[offset-remain:offset], d.peek(remain))
_, err = d.reader.Discard(remain)
} else {
// copy remaining bytes.
_, err = d.next(bytesBuf[offset-remain : offset])
}
if err != nil {
return s, perrors.WithStack(err)
}
}
}
// the expected length string has been processed.
if chunkLen <= 0 {
// we need to detect next chunk
continue
}
}
// decode byte
ch, err := d.readByte()
if err != nil {
if err == io.EOF {
break
}
return s, perrors.WithStack(err)
}
if ch < 0x80 {
bytesBuf[offset] = ch
offset++
} else if (ch & 0xe0) == 0xc0 {
ch1, err := d.readByte()
if err != nil {
return s, perrors.WithStack(err)
}
bytesBuf[offset] = ch
bytesBuf[offset+1] = ch1
offset += 2
} else if (ch & 0xf0) == 0xe0 {
var err error
if buffed := d.Buffered(); buffed < 2 {
// trigger fill data if required
copy(bytesBuf[offset+1:offset+3], d.peek(2))
_, err = d.reader.Discard(2)
} else {
_, err = d.next(bytesBuf[offset+1 : offset+3])
}
if err != nil {
return s, perrors.WithStack(err)
}
bytesBuf[offset] = ch
// we detect emoji first
c1 := ((uint32(ch) & 0x0f) << 12) + ((uint32(bytesBuf[offset+1]) & 0x3f) << 6) + (uint32(bytesBuf[offset+2]) & 0x3f)
if c1 >= 0xD800 && c1 <= 0xDBFF {
c2, n2, err := decodeUcs2Rune(d.reader)
if err != nil {
return s, perrors.WithStack(err)
}
c := rune(c1-0xD800)<<10 + (c2 - 0xDC00) + 0x10000
utf8.EncodeRune(bytesBuf[offset:], c)
// update next rune
offset += n2
chunkLen--
}
offset += 3
} else {
return s, perrors.Errorf("bad utf-8 encoding, offset=%d\n", offset)
}
chunkLen--
}
b := bytesBuf[:offset]
return *(*string)(unsafe.Pointer(&b)), nil
}
return s, perrors.Errorf("unknown string tag %#x\n", tag)
}