| %% @author Bob Ippolito <bob@mochimedia.com> |
| %% @copyright 2007 Mochi Media, Inc. |
| %% |
| %% Permission is hereby granted, free of charge, to any person obtaining a |
| %% copy of this software and associated documentation files (the "Software"), |
| %% to deal in the Software without restriction, including without limitation |
| %% the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| %% and/or sell copies of the Software, and to permit persons to whom the |
| %% Software is furnished to do so, subject to the following conditions: |
| %% |
| %% The above copyright notice and this permission notice shall be included in |
| %% all copies or substantial portions of the Software. |
| %% |
| %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| %% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| %% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| %% DEALINGS IN THE SOFTWARE. |
| |
| %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works |
| %% with binaries as strings, arrays as lists (without an {array, _}) |
| %% wrapper and it only knows how to decode UTF-8 (and ASCII). |
| %% |
| %% JSON terms are decoded as follows (javascript -> erlang): |
| %% <ul> |
| %% <li>{"key": "value"} -> |
| %% {struct, [{<<"key">>, <<"value">>}]}</li> |
| %% <li>["array", 123, 12.34, true, false, null] -> |
| %% [<<"array">>, 123, 12.34, true, false, null] |
| %% </li> |
| %% </ul> |
| %% <ul> |
| %% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li> |
| %% <li>Objects decode to {struct, PropList}</li> |
| %% <li>Numbers decode to integer or float</li> |
| %% <li>true, false, null decode to their respective terms.</li> |
| %% </ul> |
| %% The encoder will accept the same format that the decoder will produce, |
| %% but will also allow additional cases for leniency: |
| %% <ul> |
| %% <li>atoms other than true, false, null will be considered UTF-8 |
| %% strings (even as a proplist key) |
| %% </li> |
| %% <li>{json, IoList} will insert IoList directly into the output |
| %% with no validation |
| %% </li> |
| %% <li>{array, Array} will be encoded as Array |
| %% (legacy mochijson style) |
| %% </li> |
| %% <li>A non-empty raw proplist will be encoded as an object as long |
| %% as the first pair does not have an atom key of json, struct, |
| %% or array |
| %% </li> |
| %% </ul> |
| |
| -module(mochijson2). |
| -author('bob@mochimedia.com'). |
| -export([encoder/1, encode/1]). |
| -export([decoder/1, decode/1, decode/2]). |
| |
| %% This is a macro to placate syntax highlighters.. |
| -define(Q, $\"). |
| -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, |
| column=N+S#decoder.column}). |
| -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, |
| column=1+S#decoder.column}). |
| -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, |
| column=1, |
| line=1+S#decoder.line}). |
| -define(INC_CHAR(S, C), |
| case C of |
| $\n -> |
| S#decoder{column=1, |
| line=1+S#decoder.line, |
| offset=1+S#decoder.offset}; |
| _ -> |
| S#decoder{column=1+S#decoder.column, |
| offset=1+S#decoder.offset} |
| end). |
| -define(IS_WHITESPACE(C), |
| (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). |
| |
| -ifdef(map_unavailable). |
| -define(IS_MAP(_), false). |
| -else. |
| -define(IS_MAP(X), is_map(X)). |
| -endif. |
| |
| %% @type json_string() = atom | binary() |
| %% @type json_number() = integer() | float() |
| %% @type json_array() = [json_term()] |
| %% @type json_object() = {struct, [{json_string(), json_term()}]} |
| %% @type json_eep18_object() = {[{json_string(), json_term()}]} |
| %% @type json_iolist() = {json, iolist()} |
| %% @type json_term() = json_string() | json_number() | json_array() | |
| %% json_object() | json_eep18_object() | json_iolist() |
| |
| -record(encoder, {handler=null, |
| utf8=false}). |
| |
| -record(decoder, {object_hook=null, |
| offset=0, |
| line=1, |
| column=1, |
| state=null}). |
| |
| %% @spec encoder([encoder_option()]) -> function() |
| %% @doc Create an encoder/1 with the given options. |
| %% @type encoder_option() = handler_option() | utf8_option() |
| %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) |
| encoder(Options) -> |
| State = parse_encoder_options(Options, #encoder{}), |
| fun (O) -> json_encode(O, State) end. |
| |
| %% @spec encode(json_term()) -> iolist() |
| %% @doc Encode the given as JSON to an iolist. |
| encode(Any) -> |
| json_encode(Any, #encoder{}). |
| |
| %% @spec decoder([decoder_option()]) -> function() |
| %% @doc Create a decoder/1 with the given options. |
| decoder(Options) -> |
| State = parse_decoder_options(Options, #decoder{}), |
| fun (O) -> json_decode(O, State) end. |
| |
| %% @spec decode(iolist(), [{format, proplist | eep18 | struct | map}]) -> json_term() |
| %% @doc Decode the given iolist to Erlang terms using the given object format |
| %% for decoding, where proplist returns JSON objects as [{binary(), json_term()}] |
| %% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, |
| %% map returns JSON objects as #{binary() => json_term()}, and struct |
| %% returns them as-is. |
| decode(S, Options) -> |
| json_decode(S, parse_decoder_options(Options, #decoder{})). |
| |
| %% @spec decode(iolist()) -> json_term() |
| %% @doc Decode the given iolist to Erlang terms. |
| decode(S) -> |
| json_decode(S, #decoder{}). |
| |
| %% Internal API |
| |
| parse_encoder_options([], State) -> |
| State; |
| parse_encoder_options([{handler, Handler} | Rest], State) -> |
| parse_encoder_options(Rest, State#encoder{handler=Handler}); |
| parse_encoder_options([{utf8, Switch} | Rest], State) -> |
| parse_encoder_options(Rest, State#encoder{utf8=Switch}). |
| |
| parse_decoder_options([], State) -> |
| State; |
| parse_decoder_options([{object_hook, Hook} | Rest], State) -> |
| parse_decoder_options(Rest, State#decoder{object_hook=Hook}); |
| parse_decoder_options([{format, map} | Rest], State) -> |
| Hook = make_object_hook_for_map(), |
| parse_decoder_options(Rest, State#decoder{object_hook=Hook}); |
| parse_decoder_options([{format, Format} | Rest], State) |
| when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist -> |
| parse_decoder_options(Rest, State#decoder{object_hook=Format}). |
| |
| -ifdef(map_unavailable). |
| make_object_hook_for_map() -> |
| exit({json_decode, {bad_format, map_unavailable}}). |
| -else. |
| make_object_hook_for_map() -> |
| fun ({struct, P}) -> maps:from_list(P) end. |
| -endif. |
| |
| |
| json_encode(true, _State) -> |
| <<"true">>; |
| json_encode(false, _State) -> |
| <<"false">>; |
| json_encode(null, _State) -> |
| <<"null">>; |
| json_encode(I, _State) when is_integer(I) -> |
| integer_to_list(I); |
| json_encode(F, _State) when is_float(F) -> |
| mochinum:digits(F); |
| json_encode(S, State) when is_binary(S); is_atom(S) -> |
| json_encode_string(S, State); |
| json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso |
| K =/= array andalso |
| K =/= json) -> |
| json_encode_proplist(Props, State); |
| json_encode({struct, Props}, State) when is_list(Props) -> |
| json_encode_proplist(Props, State); |
| json_encode({Props}, State) when is_list(Props) -> |
| json_encode_proplist(Props, State); |
| json_encode({}, State) -> |
| json_encode_proplist([], State); |
| json_encode(Array, State) when is_list(Array) -> |
| json_encode_array(Array, State); |
| json_encode({array, Array}, State) when is_list(Array) -> |
| json_encode_array(Array, State); |
| json_encode(M, State) when ?IS_MAP(M) -> |
| json_encode_map(M, State); |
| json_encode({json, IoList}, _State) -> |
| IoList; |
| json_encode(Bad, #encoder{handler=null}) -> |
| exit({json_encode, {bad_term, Bad}}); |
| json_encode(Bad, State=#encoder{handler=Handler}) -> |
| json_encode(Handler(Bad), State). |
| |
| json_encode_array([], _State) -> |
| <<"[]">>; |
| json_encode_array(L, State) -> |
| F = fun (O, Acc) -> |
| [$,, json_encode(O, State) | Acc] |
| end, |
| [$, | Acc1] = lists:foldl(F, "[", L), |
| lists:reverse([$\] | Acc1]). |
| |
| json_encode_proplist([], _State) -> |
| <<"{}">>; |
| json_encode_proplist(Props, State) -> |
| F = fun ({K, V}, Acc) -> |
| KS = json_encode_string(K, State), |
| VS = json_encode(V, State), |
| [$,, VS, $:, KS | Acc] |
| end, |
| [$, | Acc1] = lists:foldl(F, "{", Props), |
| lists:reverse([$\} | Acc1]). |
| |
| -ifdef(map_unavailable). |
| json_encode_map(Bad, _State) -> |
| %% IS_MAP definition guarantees that this branch is dead |
| exit({json_encode, {bad_term, Bad}}). |
| -else. |
| json_encode_map(Map, _State) when map_size(Map) =:= 0 -> |
| <<"{}">>; |
| json_encode_map(Map, State) -> |
| F = fun(K, V, Acc) -> |
| KS = json_encode_string(K, State), |
| VS = json_encode(V, State), |
| [$,, VS, $:, KS | Acc] |
| end, |
| [$, | Acc1] = maps:fold(F, "{", Map), |
| lists:reverse([$\} | Acc1]). |
| -endif. |
| |
| json_encode_string(A, State) when is_atom(A) -> |
| json_encode_string(atom_to_binary(A, latin1), State); |
| json_encode_string(B, State) when is_binary(B) -> |
| case json_bin_is_safe(B) of |
| true -> |
| [?Q, B, ?Q]; |
| false -> |
| json_encode_string_unicode(unicode:characters_to_list(B), State, [?Q]) |
| end; |
| json_encode_string(I, _State) when is_integer(I) -> |
| [?Q, integer_to_list(I), ?Q]; |
| json_encode_string(L, State) when is_list(L) -> |
| case json_string_is_safe(L) of |
| true -> |
| [?Q, L, ?Q]; |
| false -> |
| json_encode_string_unicode(L, State, [?Q]) |
| end. |
| |
| json_string_is_safe([]) -> |
| true; |
| json_string_is_safe([C | Rest]) -> |
| case C of |
| ?Q -> |
| false; |
| $\\ -> |
| false; |
| $\b -> |
| false; |
| $\f -> |
| false; |
| $\n -> |
| false; |
| $\r -> |
| false; |
| $\t -> |
| false; |
| C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> |
| false; |
| C when C < 16#7f -> |
| json_string_is_safe(Rest); |
| _ -> |
| exit({json_encode, {bad_char, C}}) |
| end. |
| |
| json_bin_is_safe(<<>>) -> |
| true; |
| json_bin_is_safe(<<C, Rest/binary>>) -> |
| case C of |
| ?Q -> |
| false; |
| $\\ -> |
| false; |
| $\b -> |
| false; |
| $\f -> |
| false; |
| $\n -> |
| false; |
| $\r -> |
| false; |
| $\t -> |
| false; |
| C when C >= 0, C < $\s; C >= 16#7f -> |
| false; |
| C when C < 16#7f -> |
| json_bin_is_safe(Rest) |
| end. |
| |
| json_encode_string_unicode([], _State, Acc) -> |
| lists:reverse([$\" | Acc]); |
| json_encode_string_unicode([C | Cs], State, Acc) -> |
| Acc1 = case C of |
| ?Q -> |
| [?Q, $\\ | Acc]; |
| %% Escaping solidus is only useful when trying to protect |
| %% against "</script>" injection attacks which are only |
| %% possible when JSON is inserted into a HTML document |
| %% in-line. mochijson2 does not protect you from this, so |
| %% if you do insert directly into HTML then you need to |
| %% uncomment the following case or escape the output of encode. |
| %% |
| %% $/ -> |
| %% [$/, $\\ | Acc]; |
| %% |
| $\\ -> |
| [$\\, $\\ | Acc]; |
| $\b -> |
| [$b, $\\ | Acc]; |
| $\f -> |
| [$f, $\\ | Acc]; |
| $\n -> |
| [$n, $\\ | Acc]; |
| $\r -> |
| [$r, $\\ | Acc]; |
| $\t -> |
| [$t, $\\ | Acc]; |
| C when C >= 0, C < $\s -> |
| [unihex(C) | Acc]; |
| C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> |
| [unicode:characters_to_binary([C]) | Acc]; |
| C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> |
| [unihex(C) | Acc]; |
| C when C < 16#7f -> |
| [C | Acc]; |
| _ -> |
| %% json_string_is_safe guarantees that this branch is dead |
| exit({json_encode, {bad_char, C}}) |
| end, |
| json_encode_string_unicode(Cs, State, Acc1). |
| |
| hexdigit(C) when C >= 0, C =< 9 -> |
| C + $0; |
| hexdigit(C) when C =< 15 -> |
| C + $a - 10. |
| |
| unihex(C) when C < 16#10000 -> |
| <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, |
| Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], |
| [$\\, $u | Digits]; |
| unihex(C) when C =< 16#10FFFF -> |
| N = C - 16#10000, |
| S1 = 16#d800 bor ((N bsr 10) band 16#3ff), |
| S2 = 16#dc00 bor (N band 16#3ff), |
| [unihex(S1), unihex(S2)]. |
| |
| json_decode(L, S) when is_list(L) -> |
| json_decode(iolist_to_binary(L), S); |
| json_decode(B, S) -> |
| {Res, S1} = decode1(B, S), |
| {eof, _} = tokenize(B, S1#decoder{state=trim}), |
| Res. |
| |
| decode1(B, S=#decoder{state=null}) -> |
| case tokenize(B, S#decoder{state=any}) of |
| {{const, C}, S1} -> |
| {C, S1}; |
| {start_array, S1} -> |
| decode_array(B, S1); |
| {start_object, S1} -> |
| decode_object(B, S1) |
| end. |
| |
| make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct -> |
| V; |
| make_object({struct, P}, #decoder{object_hook=eep18}) -> |
| {P}; |
| make_object({struct, P}, #decoder{object_hook=proplist}) -> |
| P; |
| make_object(V, #decoder{object_hook=Hook}) -> |
| Hook(V). |
| |
| decode_object(B, S) -> |
| decode_object(B, S#decoder{state=key}, []). |
| |
| decode_object(B, S=#decoder{state=key}, Acc) -> |
| case tokenize(B, S) of |
| {end_object, S1} -> |
| V = make_object({struct, lists:reverse(Acc)}, S1), |
| {V, S1#decoder{state=null}}; |
| {{const, K}, S1} -> |
| {colon, S2} = tokenize(B, S1), |
| {V, S3} = decode1(B, S2#decoder{state=null}), |
| decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) |
| end; |
| decode_object(B, S=#decoder{state=comma}, Acc) -> |
| case tokenize(B, S) of |
| {end_object, S1} -> |
| V = make_object({struct, lists:reverse(Acc)}, S1), |
| {V, S1#decoder{state=null}}; |
| {comma, S1} -> |
| decode_object(B, S1#decoder{state=key}, Acc) |
| end. |
| |
| decode_array(B, S) -> |
| decode_array(B, S#decoder{state=any}, []). |
| |
| decode_array(B, S=#decoder{state=any}, Acc) -> |
| case tokenize(B, S) of |
| {end_array, S1} -> |
| {lists:reverse(Acc), S1#decoder{state=null}}; |
| {start_array, S1} -> |
| {Array, S2} = decode_array(B, S1), |
| decode_array(B, S2#decoder{state=comma}, [Array | Acc]); |
| {start_object, S1} -> |
| {Array, S2} = decode_object(B, S1), |
| decode_array(B, S2#decoder{state=comma}, [Array | Acc]); |
| {{const, Const}, S1} -> |
| decode_array(B, S1#decoder{state=comma}, [Const | Acc]) |
| end; |
| decode_array(B, S=#decoder{state=comma}, Acc) -> |
| case tokenize(B, S) of |
| {end_array, S1} -> |
| {lists:reverse(Acc), S1#decoder{state=null}}; |
| {comma, S1} -> |
| decode_array(B, S1#decoder{state=any}, Acc) |
| end. |
| |
| tokenize_string(B, S=#decoder{offset=O}) -> |
| case tokenize_string_fast(B, O) of |
| {escape, O1} -> |
| Length = O1 - O, |
| S1 = ?ADV_COL(S, Length), |
| <<_:O/binary, Head:Length/binary, _/binary>> = B, |
| tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); |
| O1 -> |
| Length = O1 - O, |
| <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, |
| {{const, String}, ?ADV_COL(S, Length + 1)} |
| end. |
| |
| tokenize_string_fast(B, O) -> |
| case B of |
| <<_:O/binary, ?Q, _/binary>> -> |
| O; |
| <<_:O/binary, $\\, _/binary>> -> |
| {escape, O}; |
| <<_:O/binary, C1, _/binary>> when C1 < 128 -> |
| tokenize_string_fast(B, 1 + O); |
| <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, |
| C2 >= 128, C2 =< 191 -> |
| tokenize_string_fast(B, 2 + O); |
| <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, |
| C2 >= 128, C2 =< 191, |
| C3 >= 128, C3 =< 191 -> |
| tokenize_string_fast(B, 3 + O); |
| <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, |
| C2 >= 128, C2 =< 191, |
| C3 >= 128, C3 =< 191, |
| C4 >= 128, C4 =< 191 -> |
| tokenize_string_fast(B, 4 + O); |
| _ -> |
| throw(invalid_utf8) |
| end. |
| |
| tokenize_string(B, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, ?Q, _/binary>> -> |
| {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; |
| <<_:O/binary, "\\\"", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); |
| <<_:O/binary, "\\\\", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); |
| <<_:O/binary, "\\/", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); |
| <<_:O/binary, "\\b", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); |
| <<_:O/binary, "\\f", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); |
| <<_:O/binary, "\\n", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); |
| <<_:O/binary, "\\r", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); |
| <<_:O/binary, "\\t", _/binary>> -> |
| tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); |
| <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> |
| C = erlang:list_to_integer([C3, C2, C1, C0], 16), |
| if C > 16#D7FF, C < 16#DC00 -> |
| %% coalesce UTF-16 surrogate pair |
| <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, |
| D = erlang:list_to_integer([D3,D2,D1,D0], 16), |
| Acc1 = [unicode:characters_to_binary( |
| <<C:16/big-unsigned-integer, |
| D:16/big-unsigned-integer>>, |
| utf16) |
| | Acc], |
| tokenize_string(B, ?ADV_COL(S, 12), Acc1); |
| true -> |
| Acc1 = [unicode:characters_to_binary([C]) | Acc], |
| tokenize_string(B, ?ADV_COL(S, 6), Acc1) |
| end; |
| <<_:O/binary, C1, _/binary>> when C1 < 128 -> |
| tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); |
| <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, |
| C2 >= 128, C2 =< 191 -> |
| tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); |
| <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, |
| C2 >= 128, C2 =< 191, |
| C3 >= 128, C3 =< 191 -> |
| tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); |
| <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, |
| C2 >= 128, C2 =< 191, |
| C3 >= 128, C3 =< 191, |
| C4 >= 128, C4 =< 191 -> |
| tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); |
| _ -> |
| throw(invalid_utf8) |
| end. |
| |
| tokenize_number(B, S) -> |
| case tokenize_number(B, sign, S, []) of |
| {{int, Int}, S1} -> |
| {{const, list_to_integer(Int)}, S1}; |
| {{float, Float}, S1} -> |
| {{const, list_to_float(Float)}, S1} |
| end. |
| |
| tokenize_number(B, sign, S=#decoder{offset=O}, []) -> |
| case B of |
| <<_:O/binary, $-, _/binary>> -> |
| tokenize_number(B, int, ?INC_COL(S), [$-]); |
| _ -> |
| tokenize_number(B, int, S, []) |
| end; |
| tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, $0, _/binary>> -> |
| tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); |
| <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> |
| tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) |
| end; |
| tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> |
| tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); |
| _ -> |
| tokenize_number(B, frac, S, Acc) |
| end; |
| tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> |
| tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); |
| <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> |
| tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); |
| _ -> |
| {{int, lists:reverse(Acc)}, S} |
| end; |
| tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> |
| tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); |
| <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> |
| tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); |
| _ -> |
| {{float, lists:reverse(Acc)}, S} |
| end; |
| tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> |
| tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); |
| _ -> |
| tokenize_number(B, eint, S, Acc) |
| end; |
| tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> |
| tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) |
| end; |
| tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> |
| case B of |
| <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> |
| tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); |
| _ -> |
| {{float, lists:reverse(Acc)}, S} |
| end. |
| |
| tokenize(B, S=#decoder{offset=O}) -> |
| case B of |
| <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> |
| tokenize(B, ?INC_CHAR(S, C)); |
| <<_:O/binary, "{", _/binary>> -> |
| {start_object, ?INC_COL(S)}; |
| <<_:O/binary, "}", _/binary>> -> |
| {end_object, ?INC_COL(S)}; |
| <<_:O/binary, "[", _/binary>> -> |
| {start_array, ?INC_COL(S)}; |
| <<_:O/binary, "]", _/binary>> -> |
| {end_array, ?INC_COL(S)}; |
| <<_:O/binary, ",", _/binary>> -> |
| {comma, ?INC_COL(S)}; |
| <<_:O/binary, ":", _/binary>> -> |
| {colon, ?INC_COL(S)}; |
| <<_:O/binary, "null", _/binary>> -> |
| {{const, null}, ?ADV_COL(S, 4)}; |
| <<_:O/binary, "true", _/binary>> -> |
| {{const, true}, ?ADV_COL(S, 4)}; |
| <<_:O/binary, "false", _/binary>> -> |
| {{const, false}, ?ADV_COL(S, 5)}; |
| <<_:O/binary, "\"", _/binary>> -> |
| tokenize_string(B, ?INC_COL(S)); |
| <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) |
| orelse C =:= $- -> |
| tokenize_number(B, S); |
| <<_:O/binary>> -> |
| trim = S#decoder.state, |
| {eof, S} |
| end. |
| %% |
| %% Tests |
| %% |
| -ifdef(TEST). |
| -include_lib("eunit/include/eunit.hrl"). |
| |
| |
| %% testing constructs borrowed from the Yaws JSON implementation. |
| |
| %% Create an object from a list of Key/Value pairs. |
| |
| obj_new() -> |
| {struct, []}. |
| |
| is_obj({struct, Props}) -> |
| F = fun ({K, _}) when is_binary(K) -> true end, |
| lists:all(F, Props). |
| |
| obj_from_list(Props) -> |
| Obj = {struct, Props}, |
| ?assert(is_obj(Obj)), |
| Obj. |
| |
| %% Test for equivalence of Erlang terms. |
| %% Due to arbitrary order of construction, equivalent objects might |
| %% compare unequal as erlang terms, so we need to carefully recurse |
| %% through aggregates (tuples and objects). |
| |
| equiv({struct, Props1}, {struct, Props2}) -> |
| equiv_object(Props1, Props2); |
| equiv(L1, L2) when is_list(L1), is_list(L2) -> |
| equiv_list(L1, L2); |
| equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; |
| equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; |
| equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. |
| |
| %% Object representation and traversal order is unknown. |
| %% Use the sledgehammer and sort property lists. |
| |
| equiv_object(Props1, Props2) -> |
| L1 = lists:keysort(1, Props1), |
| L2 = lists:keysort(1, Props2), |
| Pairs = lists:zip(L1, L2), |
| true = lists:all(fun({{K1, V1}, {K2, V2}}) -> |
| equiv(K1, K2) and equiv(V1, V2) |
| end, Pairs). |
| |
| %% Recursively compare tuple elements for equivalence. |
| |
| equiv_list([], []) -> |
| true; |
| equiv_list([V1 | L1], [V2 | L2]) -> |
| equiv(V1, V2) andalso equiv_list(L1, L2). |
| |
| decode_test() -> |
| [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), |
| <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). |
| |
| e2j_vec_test() -> |
| test_one(e2j_test_vec(utf8), 1). |
| |
| test_one([], _N) -> |
| %% io:format("~p tests passed~n", [N-1]), |
| ok; |
| test_one([{E, J} | Rest], N) -> |
| %% io:format("[~p] ~p ~p~n", [N, E, J]), |
| true = equiv(E, decode(J)), |
| true = equiv(E, decode(encode(E))), |
| test_one(Rest, 1+N). |
| |
| e2j_test_vec(utf8) -> |
| [ |
| {1, "1"}, |
| {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes |
| {-1, "-1"}, |
| {-3.1416, "-3.14160"}, |
| {12.0e10, "1.20000e+11"}, |
| {1.234E+10, "1.23400e+10"}, |
| {-1.234E-10, "-1.23400e-10"}, |
| {10.0, "1.0e+01"}, |
| {123.456, "1.23456E+2"}, |
| {10.0, "1e1"}, |
| {<<"foo">>, "\"foo\""}, |
| {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, |
| {<<"">>, "\"\""}, |
| {<<"\n\n\n">>, "\"\\n\\n\\n\""}, |
| {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, |
| {obj_new(), "{}"}, |
| {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, |
| {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), |
| "{\"foo\":\"bar\",\"baz\":123}"}, |
| {[], "[]"}, |
| {[[]], "[[]]"}, |
| {[1, <<"foo">>], "[1,\"foo\"]"}, |
| |
| %% json array in a json object |
| {obj_from_list([{<<"foo">>, [123]}]), |
| "{\"foo\":[123]}"}, |
| |
| %% json object in a json object |
| {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), |
| "{\"foo\":{\"bar\":true}}"}, |
| |
| %% fold evaluation order |
| {obj_from_list([{<<"foo">>, []}, |
| {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, |
| {<<"alice">>, <<"bob">>}]), |
| "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, |
| |
| %% json object in a json array |
| {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], |
| "[-123,\"foo\",{\"bar\":[]},null]"} |
| ]. |
| |
| %% test utf8 encoding |
| encoder_utf8_test() -> |
| %% safe conversion case (default) |
| <<"\"\\u0001\\u0442\\u0435\\u0441\\u0442\"">> = |
| iolist_to_binary(encode(<<1,"\321\202\320\265\321\201\321\202">>)), |
| |
| %% raw utf8 output (optional) |
| Enc = mochijson2:encoder([{utf8, true}]), |
| <<34,"\\u0001",209,130,208,181,209,129,209,130,34>> = |
| iolist_to_binary(Enc(<<1,"\321\202\320\265\321\201\321\202">>)). |
| |
| input_validation_test() -> |
| Good = [ |
| {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound |
| {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro |
| {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius |
| ], |
| lists:foreach(fun({CodePoint, UTF8}) -> |
| Expect = unicode:characters_to_binary([CodePoint]), |
| Expect = decode(UTF8) |
| end, Good), |
| |
| Bad = [ |
| %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte |
| <<?Q, 16#80, ?Q>>, |
| %% missing continuations, last byte in each should be 80-BF |
| <<?Q, 16#C2, 16#7F, ?Q>>, |
| <<?Q, 16#E0, 16#80,16#7F, ?Q>>, |
| <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, |
| %% we don't support code points > 10FFFF per RFC 3629 |
| <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>, |
| %% escape characters trigger a different code path |
| <<?Q, $\\, $\n, 16#80, ?Q>> |
| ], |
| lists:foreach( |
| fun(X) -> |
| ok = try decode(X) catch invalid_utf8 -> ok end, |
| %% could be {ucs,{bad_utf8_character_code}} or |
| %% {json_encode,{bad_char,_}} |
| {'EXIT', _} = (catch encode(X)) |
| end, Bad). |
| |
| inline_json_test() -> |
| ?assertEqual(<<"\"iodata iodata\"">>, |
| iolist_to_binary( |
| encode({json, [<<"\"iodata">>, " iodata\""]}))), |
| ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, |
| decode( |
| encode({struct, |
| [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), |
| ok. |
| |
| big_unicode_test() -> |
| UTF8Seq = unicode:characters_to_binary([16#0001d120]), |
| ?assertEqual( |
| <<"\"\\ud834\\udd20\"">>, |
| iolist_to_binary(encode(UTF8Seq))), |
| ?assertEqual( |
| UTF8Seq, |
| decode(iolist_to_binary(encode(UTF8Seq)))), |
| ok. |
| |
| custom_decoder_test() -> |
| ?assertEqual( |
| {struct, [{<<"key">>, <<"value">>}]}, |
| (decoder([]))("{\"key\": \"value\"}")), |
| F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, |
| ?assertEqual( |
| win, |
| (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), |
| ok. |
| |
| atom_test() -> |
| %% JSON native atoms |
| [begin |
| ?assertEqual(A, decode(atom_to_list(A))), |
| ?assertEqual(iolist_to_binary(atom_to_list(A)), |
| iolist_to_binary(encode(A))) |
| end || A <- [true, false, null]], |
| %% Atom to string |
| ?assertEqual( |
| <<"\"foo\"">>, |
| iolist_to_binary(encode(foo))), |
| ?assertEqual( |
| <<"\"\\ud834\\udd20\"">>, |
| iolist_to_binary( |
| encode( |
| binary_to_atom( |
| unicode:characters_to_binary([16#0001d120]), latin1)))), |
| ok. |
| |
| key_encode_test() -> |
| %% Some forms are accepted as keys that would not be strings in other |
| %% cases |
| ?assertEqual( |
| <<"{\"foo\":1}">>, |
| iolist_to_binary(encode({struct, [{foo, 1}]}))), |
| ?assertEqual( |
| <<"{\"foo\":1}">>, |
| iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), |
| ?assertEqual( |
| <<"{\"foo\":1}">>, |
| iolist_to_binary(encode({struct, [{"foo", 1}]}))), |
| ?assertEqual( |
| <<"{\"foo\":1}">>, |
| iolist_to_binary(encode([{foo, 1}]))), |
| ?assertEqual( |
| <<"{\"foo\":1}">>, |
| iolist_to_binary(encode([{<<"foo">>, 1}]))), |
| ?assertEqual( |
| <<"{\"foo\":1}">>, |
| iolist_to_binary(encode([{"foo", 1}]))), |
| ?assertEqual( |
| <<"{\"\\ud834\\udd20\":1}">>, |
| iolist_to_binary( |
| encode({struct, [{[16#0001d120], 1}]}))), |
| ?assertEqual( |
| <<"{\"1\":1}">>, |
| iolist_to_binary(encode({struct, [{1, 1}]}))), |
| ok. |
| |
| unsafe_chars_test() -> |
| Chars = "\"\\\b\f\n\r\t", |
| [begin |
| ?assertEqual(false, json_string_is_safe([C])), |
| ?assertEqual(false, json_bin_is_safe(<<C>>)), |
| ?assertEqual(<<C>>, decode(encode(<<C>>))) |
| end || C <- Chars], |
| ?assertEqual( |
| false, |
| json_string_is_safe([16#0001d120])), |
| ?assertEqual( |
| false, |
| json_bin_is_safe(unicode:characters_to_binary([16#0001d120]))), |
| ?assertEqual( |
| [16#0001d120], |
| unicode:characters_to_list( |
| decode( |
| encode( |
| binary_to_atom( |
| unicode:characters_to_binary([16#0001d120]), |
| latin1))))), |
| ?assertEqual( |
| false, |
| json_string_is_safe([16#10ffff])), |
| ?assertEqual( |
| false, |
| json_bin_is_safe(unicode:characters_to_binary([16#10ffff]))), |
| %% solidus can be escaped but isn't unsafe by default |
| ?assertEqual( |
| <<"/">>, |
| decode(<<"\"\\/\"">>)), |
| ok. |
| |
| int_test() -> |
| ?assertEqual(0, decode("0")), |
| ?assertEqual(1, decode("1")), |
| ?assertEqual(11, decode("11")), |
| ok. |
| |
| large_int_test() -> |
| ?assertEqual(<<"-2147483649214748364921474836492147483649">>, |
| iolist_to_binary(encode(-2147483649214748364921474836492147483649))), |
| ?assertEqual(<<"2147483649214748364921474836492147483649">>, |
| iolist_to_binary(encode(2147483649214748364921474836492147483649))), |
| ok. |
| |
| float_test() -> |
| ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), |
| ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), |
| ok. |
| |
| handler_test() -> |
| ?assertEqual( |
| {'EXIT',{json_encode,{bad_term,{x,y}}}}, |
| catch encode({x,y})), |
| F = fun ({x,y}) -> [] end, |
| ?assertEqual( |
| <<"[]">>, |
| iolist_to_binary((encoder([{handler, F}]))({x, y}))), |
| ok. |
| |
| encode_empty_test_() -> |
| [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))} |
| || {A, B} <- [{"eep18 {}", {}}, |
| {"eep18 {[]}", {[]}}, |
| {"{struct, []}", {struct, []}}]]. |
| |
| encode_test_() -> |
| P = [{<<"k">>, <<"v">>}], |
| JSON = iolist_to_binary(encode({struct, P})), |
| [{atom_to_list(F), |
| ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))} |
| || F <- [struct, eep18, proplist]]. |
| |
| format_test_() -> |
| P = [{<<"k">>, <<"v">>}], |
| JSON = iolist_to_binary(encode({struct, P})), |
| [{atom_to_list(F), |
| ?_assertEqual(A, decode(JSON, [{format, F}]))} |
| || {F, A} <- [{struct, {struct, P}}, |
| {eep18, {P}}, |
| {proplist, P}]]. |
| |
| array_test() -> |
| A = [<<"hello">>], |
| ?assertEqual(A, decode(encode({array, A}))). |
| |
| bad_char_test() -> |
| ?assertEqual( |
| {'EXIT', {json_encode, {bad_char, 16#110000}}}, |
| catch json_string_is_safe([16#110000])). |
| |
| utf8_roundtrip_test_() -> |
| %% These are the boundary cases for UTF8 encoding |
| Codepoints = [%% 7 bits -> 1 byte |
| 16#00, 16#7f, |
| %% 11 bits -> 2 bytes |
| 16#080, 16#07ff, |
| %% 16 bits -> 3 bytes |
| 16#0800, 16#ffff, |
| 16#d7ff, 16#e000, |
| %% 21 bits -> 4 bytes |
| 16#010000, 16#10ffff], |
| UTF8 = unicode:characters_to_binary(Codepoints), |
| Encode = encoder([{utf8, true}]), |
| [{"roundtrip escaped", |
| ?_assertEqual(UTF8, decode(encode(UTF8)))}, |
| {"roundtrip utf8", |
| ?_assertEqual(UTF8, decode(Encode(UTF8)))}]. |
| |
| utf8_non_character_test_() -> |
| S = unicode:characters_to_binary([16#ffff, 16#fffe]), |
| [{"roundtrip escaped", ?_assertEqual(S, decode(encode(S)))}, |
| {"roundtrip utf8", ?_assertEqual(S, decode((encoder([{utf8, true}]))(S)))}]. |
| |
| -ifndef(map_unavailable). |
| |
| decode_map_test() -> |
| Json = "{\"var1\": 3, \"var2\": {\"var3\": 7}}", |
| M = #{<<"var1">> => 3,<<"var2">> => #{<<"var3">> => 7}}, |
| ?assertEqual(M, decode(Json, [{format, map}])). |
| |
| encode_map_test() -> |
| M = <<"{\"a\":1,\"b\":{\"c\":2}}">>, |
| ?assertEqual(M, iolist_to_binary(encode(#{a => 1, b => #{ c => 2}}))). |
| |
| encode_empty_map_test() -> |
| ?assertEqual(<<"{}">>, encode(#{})). |
| |
| -endif. |
| |
| -endif. |