Fix for wrong encoding of Unicode values above 0xffff
diff --git a/lang/c++/impl/json/JsonIO.cc b/lang/c++/impl/json/JsonIO.cc
index 6254948..da2d85f 100644
--- a/lang/c++/impl/json/JsonIO.cc
+++ b/lang/c++/impl/json/JsonIO.cc
@@ -314,11 +314,37 @@
     }
 }
 
+static string::const_iterator unicodeParse(string::const_iterator b, string::const_iterator e, uint32_t &n) {
+    string::const_iterator start = b;
+    for (int i = 0; i < 4; i++) {
+        ++b;
+        if (b == e) {
+            throw Exception(boost::format(
+                                "Invalid unicode escape: %1%") % string(start, b));
+        }
+        n *= 16;
+        char c = *b;
+        if (isdigit(c)) {
+            n += c - '0';
+        } else if (c >= 'a' && c <= 'f') {
+            n += c - 'a' + 10;
+        } else if (c >= 'A' && c <= 'F') {
+            n += c - 'A' + 10;
+        } else {
+            throw Exception(boost::format( "Invalid hex character: %1%") % c);
+        }
+    }
+    return b;
+}
+
+// Decode the given string and return contents as UTF8-encoded bytes.
+// The input does not have the enclosing double-quotes.
 string JsonParser::decodeString(const string &s, bool binary) {
     string result;
     for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
         char ch = *it;
         if (ch == '\\') {
+            string::const_iterator startSeq = it;
             ch = *++it;
             switch (ch) {
                 case '"':
@@ -344,29 +370,49 @@
                 case 'u':
                 case 'U': {
                     uint32_t n = 0;
-                    char e[4];
-                    for (char &i : e) {
-                        n *= 16;
-                        char c = *++it;
-                        i = c;
-                        if (isdigit(c)) {
-                            n += c - '0';
-                        } else if (c >= 'a' && c <= 'f') {
-                            n += c - 'a' + 10;
-                        } else if (c >= 'A' && c <= 'F') {
-                            n += c - 'A' + 10;
-                        }
-                    }
+                    it = unicodeParse(it, s.end(), n);
                     if (binary) {
                         if (n > 0xff) {
                             throw Exception(boost::format(
                                                 "Invalid byte for binary: %1%%2%")
-                                            % ch % string(e, 4));
+                                            % ch % string(startSeq, ++it));
                         } else {
                             result.push_back(n);
                             continue;
                         }
                     }
+                    if (n >= 0xd800) {
+                        ++it;
+                        if (n > 0xdbff || it == s.end()) {
+                            throw Exception(boost::format(
+                                                "Invalid unicode sequence: %1%")
+                                            % string(startSeq, it));
+                        }
+                        if (*it != '\\') {
+                            throw Exception(boost::format(
+                                                "Invalid unicode sequence: %1%")
+                                            % string(startSeq, ++it));
+                        }
+                        ++it;
+                        if (it == s.end()) {
+                            throw Exception(boost::format(
+                                                "Invalid unicode sequence: %1%")
+                                            % string(startSeq, it));
+                        }
+                        if (*it != 'u' && *it != 'U') {
+                            throw Exception(boost::format(
+                                                "Invalid unicode sequence: %1%")
+                                            % string(startSeq, ++it));
+                        }
+                        uint32_t m = 0;
+                        it = unicodeParse(it, s.end(), m);
+                        if (m < 0xdc00 || m > 0xdfff) {
+                            throw Exception(boost::format(
+                                                "Invalid unicode sequence: %1%")
+                                            % string(startSeq, ++it));
+                        }
+                        n = 0x10000 + (((n - 0xd800) << 10) | (m - 0xdc00));
+                    }
                     if (n < 0x80) {
                         result.push_back(n);
                     } else if (n < 0x800) {
@@ -376,15 +422,15 @@
                         result.push_back((n >> 12) | 0xe0);
                         result.push_back(((n >> 6) & 0x3f) | 0x80);
                         result.push_back((n & 0x3f) | 0x80);
-                    } else if (n < 110000) {
+                    } else if (n < 0x110000) {
                         result.push_back((n >> 18) | 0xf0);
                         result.push_back(((n >> 12) & 0x3f) | 0x80);
                         result.push_back(((n >> 6) & 0x3f) | 0x80);
                         result.push_back((n & 0x3f) | 0x80);
                     } else {
                         throw Exception(boost::format(
-                                            "Invalid unicode value: %1%i%2%")
-                                        % ch % string(e, 4));
+                                            "Invalid unicode value: %1%%2%")
+                                        % n % string(startSeq, ++it));
                     }
                 }
                     continue;
diff --git a/lang/c++/impl/json/JsonIO.hh b/lang/c++/impl/json/JsonIO.hh
index 94889e5..447c0b0 100644
--- a/lang/c++/impl/json/JsonIO.hh
+++ b/lang/c++/impl/json/JsonIO.hh
@@ -263,12 +263,23 @@
         out_.write(toHex((static_cast<unsigned char>(c)) % 16));
     }
 
-    void escapeUnicode(uint32_t c) {
+    void escapeUnicode16(uint32_t c) {
         out_.write('\\');
         out_.write('u');
         writeHex((c >> 8) & 0xff);
         writeHex(c & 0xff);
     }
+    void escapeUnicode(uint32_t c) {
+        if (c < 0x10000) {
+            escapeUnicode16(c);
+        } else if (c < 0x110000) {
+            c -= 0x10000;
+            escapeUnicode16(((c >> 10) & 0x3ff) | 0xd800);
+            escapeUnicode16((c & 0x3ff) | 0xdc00);
+        } else {
+            throw Exception(boost::format("Invalid code-point: %1%") % c);
+        }
+    }
     void doEncodeString(const char *b, size_t len, bool binary) {
         const char *e = b + len;
         out_.write('"');
diff --git a/lang/c++/test/JsonTests.cc b/lang/c++/test/JsonTests.cc
index da9722f..125b6d6 100644
--- a/lang/c++/test/JsonTests.cc
+++ b/lang/c++/test/JsonTests.cc
@@ -68,6 +68,7 @@
     {R"("\/")", EntityType::String, "/", R"("\/")"},
     {R"("\u20ac")", EntityType::String, "\xe2\x82\xac", R"("\u20ac")"},
     {R"("\u03c0")", EntityType::String, "\xcf\x80", R"("\u03c0")"},
+    {R"("\Ud8ab\udccd")", EntityType::String, "\xf0\xba\xb3\x8d", R"("\ud8ab\udccd")"},
 };
 
 void testBool(const TestData<bool> &d) {