完善字符编码,添加对emoji的支持🤤 😒 😓 🙄😕🤔
diff --git a/README.md b/README.md
index ae6d2aa..c58b193 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,6 @@
* Python字节相关的转化操作:<https://docs.python.org/2/library/struct.html>
* Hessian2的编码规则:<http://hessian.caucho.com/doc/hessian-serialization.html>
* 实现Hessian2编码时的参考:[参考1](https://github.com/WKPlus/pyhessian2/blob/3.1.5/pyhessian2/encoder.py),[参考2](https://github.com/zhouyougit/PyDubbo/blob/master/dubbo/hessian2.py)
-* Dubbo相关的编码规则:[参考1](http://fe.58qf.com/2017/11/07/node-dubbo/) 和 [参考2](http://cxis.me/2017/03/19/Dubbo%E4%B8%AD%E7%BC%96%E7%A0%81%E5%92%8C%E8%A7%A3%E7%A0%81%E7%9A%84%E8%A7%A3%E6%9E%90/)
+* Dubbo相关的编码规则:[参考1](http://fe.58qf.com/2017/11/07/node-dubbo/),[参考2](http://cxis.me/2017/03/19/Dubbo%E4%B8%AD%E7%BC%96%E7%A0%81%E5%92%8C%E8%A7%A3%E7%A0%81%E7%9A%84%E8%A7%A3%E6%9E%90/)
* Dubbo的心跳机制:<http://www.cnblogs.com/java-zhao/p/8539046.html>
-* 部分实现参考了dubbo的Java源码的实现
+* 部分实现参考了dubbo的Java源码中的实现
diff --git a/dubbo/codec/decoder.py b/dubbo/codec/decoder.py
index 885cc5f..16fc9e6 100644
--- a/dubbo/codec/decoder.py
+++ b/dubbo/codec/decoder.py
@@ -161,19 +161,20 @@
:param length:
:return:
"""
- value = []
- while length > 0:
- c = self.read_byte()
- value.append(c)
- if c < 0x80:
- pass
- elif (c & 0xe0) == 0xc0:
- value.extend(self.read_bytes(1))
- elif (c & 0xf0) == 0xe0:
- value.extend(self.read_bytes(2))
- elif (c & 0xf8) == 0xf0:
- value.extend(self.read_bytes(3))
- length -= 1
+ value = ''
+ for i in range(length):
+ ch = self.read_byte()
+ if ch < 0x80:
+ value += unichr(ch)
+ elif (ch & 0xe0) == 0xc0:
+ ch1 = self.read_byte()
+ value += unichr(((ch & 0x1f) << 6) + (ch1 & 0x3f))
+ elif (ch & 0xf0) == 0xe0:
+ ch1 = self.read_byte()
+ ch2 = self.read_byte()
+ value += unichr(((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f))
+ else:
+ raise ValueError('Can\'t parse utf-8 char {}'.format(ch))
return value
@ranges((0x00, 0x1f), (0x30, 0x33), 0x52, ord('S'))
@@ -183,10 +184,10 @@
:return:
"""
value = self.read_byte()
- buf = []
+ string = ''
while value == 0x52:
length = unpack('!h', self.read_bytes(2))[0]
- buf.extend(self._read_utf(length))
+ string += self._read_utf(length)
value = self.read_byte()
if value == ord('S'):
@@ -196,8 +197,8 @@
else:
length = (value - 0x30) << 8 | self.read_byte()
- buf.extend(self._read_utf(length))
- return str(bytearray(buf))
+ string += self._read_utf(length)
+ return string.encode('utf-8') # 将unicode转化为str类型
@ranges((0x60, 0x6f), ord('O'))
def read_object(self):
diff --git a/dubbo/codec/encoder.py b/dubbo/codec/encoder.py
index 4f1ee86..0652224 100644
--- a/dubbo/codec/encoder.py
+++ b/dubbo/codec/encoder.py
@@ -216,10 +216,14 @@
result.append(bits >> 8)
result.append(bits)
return result
+ # 如果是unicode则转义为str类型后再进行操作
+ elif isinstance(value, unicode):
+ return self._encode_single_value(value.encode('utf-8'))
# 字符串类型
elif isinstance(value, str):
- # 根据hessian协议这里的长度必须是字符串长度而不是字节长度,所以需要Unicode类型
- length = len(value.decode('utf-8'))
+ # 根据hessian协议这里的长度必须是字符(char)长度而不是字节(byte)长度,所以需要Unicode类型
+ value = value.decode('utf-8')
+ length = len(value)
if length <= STRING_DIRECT_MAX:
result.append(BC_STRING_DIRECT + length)
elif length <= STRING_SHORT_MAX:
@@ -229,7 +233,20 @@
result.append(ord('S'))
result.append(length >> 8)
result.append(length)
- result.extend(list(bytearray(value))) # 加上变量数组
+
+ # 对字符串进行编码,编码格式utf-8
+ # 参见方法:com.alibaba.com.caucho.hessian.io.Hessian2Output#printString
+ for v in value:
+ ch = ord(v)
+ if ch < 0x80:
+ result.append(ch & 0xff)
+ elif ch < 0x800:
+ result.append((0xc0 + ((ch >> 6) & 0x1f)) & 0xff)
+ result.append((0x80 + (ch & 0x3f)) & 0xff)
+ else:
+ result.append((0xe0 + ((ch >> 12) & 0xf)) & 0xff)
+ result.append((0x80 + ((ch >> 6) & 0x3f)) & 0xff)
+ result.append((0x80 + (ch & 0x3f)) & 0xff)
return result
# 对象类型
elif isinstance(value, Object):
diff --git a/dubbo/connection/connections.py b/dubbo/connection/connections.py
index ca9aa36..45b8618 100644
--- a/dubbo/connection/connections.py
+++ b/dubbo/connection/connections.py
@@ -4,6 +4,7 @@
import socket
import threading
import time
+import traceback
from struct import unpack
from dubbo.codec.encoder import Request
@@ -145,6 +146,7 @@
else:
raise DubboResponseException("Unknown result flag, expect '0' '1' '2', get " + flag)
except Exception as e:
+ traceback.print_exc()
self.results[host] = e
finally:
conn.notify() # 唤醒请求线程
diff --git a/tests/dubbo_test.py b/tests/dubbo_test.py
index 5312348..067c807 100644
--- a/tests/dubbo_test.py
+++ b/tests/dubbo_test.py
@@ -148,7 +148,7 @@
employee['name'] = '我勒个去居然不能用emoji啊'
lock = Object('me.hourui.echo.bean.retail.Lock')
- lock['lockReason'] = '加锁的原因是什么呢?'
+ lock['lockReason'] = '加锁的原因是什么呢?🤔'
lock['employee'] = employee
lock['locked'] = True