Fix uescapes for combining characters
I had a silly direction mistake in a bit shift that was causing the high
portion of all combining characters to be printed as \uD800 which is
obviously wrong. This bug only affects people using the non-default
uescape option during encoding.
diff --git a/c_src/utf8.c b/c_src/utf8.c
index 53fc1b5..3ac65cb 100644
--- a/c_src/utf8.c
+++ b/c_src/utf8.c
@@ -234,7 +234,7 @@
n = val - 0x10000;
p[0] = '\\';
p[1] = 'u';
- if(int_to_hex((0xD800 | ((n << 10) & 0x03FF)), p+2) < 0) {
+ if(int_to_hex((0xD800 | ((n >> 10) & 0x03FF)), p+2) < 0) {
return -1;
}
p[6] = '\\';
diff --git a/test/004-strings.t b/test/004-strings.t
index 8396bbd..99852a3 100755
--- a/test/004-strings.t
+++ b/test/004-strings.t
@@ -6,7 +6,7 @@
code:add_pathz("ebin"),
code:add_pathz("test"),
- etap:plan(83),
+ etap:plan(87),
util:test_good(good()),
util:test_good(uescaped(), [uescape]),
util:test_errors(errors()),
@@ -36,6 +36,14 @@
{
<<"\"\\u8CA8\\u5481\\u3002\\u0091\\u0091\"">>,
<<232,178,168,229,146,129,227,128,130,194,145,194,145>>
+ },
+ {
+ <<"\"\\uD834\\uDD1E\"">>,
+ <<240, 157, 132, 158>>
+ },
+ {
+ <<"\"\\uD83D\\uDE0A\"">>,
+ <<240, 159, 152, 138>>
}
].