完善对unicode和str的支持
diff --git a/README.md b/README.md
index c58b193..31937c6 100644
--- a/README.md
+++ b/README.md
@@ -17,3 +17,6 @@
* Dubbo相关的编码规则:[参考1](http://fe.58qf.com/2017/11/07/node-dubbo/),[参考2](http://cxis.me/2017/03/19/Dubbo%E4%B8%AD%E7%BC%96%E7%A0%81%E5%92%8C%E8%A7%A3%E7%A0%81%E7%9A%84%E8%A7%A3%E6%9E%90/)
* Dubbo的心跳机制:<http://www.cnblogs.com/java-zhao/p/8539046.html>
* 部分实现参考了dubbo的Java源码中的实现
+* 对于所有的字符串,在网络传输前进行编码,编码一律使用unicode来完成,如果一个字符串是str则先将其decode为unicode之后再进行操作;
+* 对于所有的字符串,在网络上获取到的数据之后进行解码,解码得到的字符串是unicode,之后将其encode为str再交给客户程序;
+* 支持传输utf-8编码和Emoji🧐
\ No newline at end of file
diff --git a/dubbo/codec/decoder.py b/dubbo/codec/decoder.py
index 16fc9e6..cd9ba0f 100644
--- a/dubbo/codec/decoder.py
+++ b/dubbo/codec/decoder.py
@@ -161,7 +161,7 @@
:param length:
:return:
"""
- value = ''
+ value = u''
for i in range(length):
ch = self.read_byte()
if ch < 0x80:
@@ -175,7 +175,7 @@
value += unichr(((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f))
else:
raise ValueError('Can\'t parse utf-8 char {}'.format(ch))
- return value
+ return value.encode('utf-8') # 将unicode转化为str类型
@ranges((0x00, 0x1f), (0x30, 0x33), 0x52, ord('S'))
def read_string(self):
@@ -185,7 +185,7 @@
"""
value = self.read_byte()
string = ''
- while value == 0x52:
+ while value == 'R':
length = unpack('!h', self.read_bytes(2))[0]
string += self._read_utf(length)
value = self.read_byte()
@@ -198,7 +198,7 @@
length = (value - 0x30) << 8 | self.read_byte()
string += self._read_utf(length)
- return string.encode('utf-8') # 将unicode转化为str类型
+ return string
@ranges((0x60, 0x6f), ord('O'))
def read_object(self):
diff --git a/dubbo/codec/encoder.py b/dubbo/codec/encoder.py
index 0652224..4c9b1e5 100644
--- a/dubbo/codec/encoder.py
+++ b/dubbo/codec/encoder.py
@@ -91,7 +91,7 @@
parameter_types += 'J'
elif isinstance(argument, float):
parameter_types += 'D'
- elif isinstance(argument, str):
+ elif isinstance(argument, (str, unicode)):
parameter_types += 'Ljava/lang/String;'
elif isinstance(argument, Object):
path = argument.get_path()
@@ -157,7 +157,7 @@
elif isinstance(value, int):
if value > MAX_INT_32 or value < MIN_INT_32:
result.append(ord('L'))
- result.extend(list(bytearray(struct.pack('>q', value))))
+ result.extend(list(bytearray(struct.pack('!q', value))))
return result
if INT_DIRECT_MIN <= value <= INT_DIRECT_MAX:
@@ -216,13 +216,11 @@
result.append(bits >> 8)
result.append(bits)
return result
- # 如果是unicode则转义为str类型后再进行操作
- elif isinstance(value, unicode):
- return self._encode_single_value(value.encode('utf-8'))
# 字符串类型
- elif isinstance(value, str):
- # 根据hessian协议这里的长度必须是字符(char)长度而不是字节(byte)长度,所以需要Unicode类型
- value = value.decode('utf-8')
+ elif isinstance(value, (str, unicode)):
+ # 在进行网络传输操作时一律使用unicode进行操作
+ if isinstance(value, str):
+ value = value.decode('utf-8')
length = len(value)
if length <= STRING_DIRECT_MAX:
result.append(BC_STRING_DIRECT + length)
diff --git a/tests/run_test.py b/tests/run_test.py
index 99fefa4..9629e19 100644
--- a/tests/run_test.py
+++ b/tests/run_test.py
@@ -16,7 +16,7 @@
self.dubbo = DubboClient('me.hourui.echo.provider.Echo', host='127.0.0.1:20880')
def test_run(self):
- print self.dubbo.call('test2', '🐶')
+ print self.dubbo.call('test2', u'🐶🐶🐶111🐶🐶🐶你好啊啊🐶🐶🐶🐶の🐶🐶🐶🐶')
# print dubbo.call('echo', ['张老师', '三', 19, 2000.0, True])
#