| % Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| % use this file except in compliance with the License. You may obtain a copy of |
| % the License at |
| % |
| % http://www.apache.org/licenses/LICENSE-2.0 |
| % |
| % Unless required by applicable law or agreed to in writing, software |
| % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| % License for the specific language governing permissions and limitations under |
| % the License. |
| |
| -module(json_stream_parse). |
| |
| |
| -export([events/2, to_ejson/1, collect_object/2]). |
| |
| -define(IS_WS(X), (X == $\ orelse X == $\t orelse X == $\n orelse X == $\r)). |
| -define(IS_DELIM(X), (X == $} orelse X == $] orelse X == $,)). |
| -define(IS_DIGIT(X), (X >= $0 andalso X =< $9)). |
| |
| |
| |
| % Parses the json into events. |
| % |
| % The DataFun param is a function that produces the data for parsing. When |
| % called it must yield a tuple, or the atom done. The first element in the |
| % tuple is the data itself, and the second element is a function to be called |
| % next to get the next chunk of data in the stream. |
| % |
| % The EventFun is called everytime a json element is parsed. It must produce |
| % a new function to be called for the next event. |
| % |
| % Events happen each time a new element in the json string is parsed. |
| % For simple value types, the data itself is returned: |
| % Strings |
| % Integers |
| % Floats |
| % true |
| % false |
| % null |
| % |
| % For arrays, the start of the array is signaled by the event array_start |
| % atom. The end is signaled by array_end. The events before the end are the |
| % values, or nested values. |
| % |
| % For objects, the start of the object is signaled by the event object_start |
| % atom. The end is signaled by object_end. Each key is signaled by |
| % {key, KeyString}, and the following event is the value, or start of the |
| % value (array_start, object_start). |
| % |
| events(Data,EventFun) when is_list(Data)-> |
| events(list_to_binary(Data),EventFun); |
| events(Data,EventFun) when is_binary(Data)-> |
| events(fun() -> {Data, fun() -> done end} end,EventFun); |
| events(DataFun,EventFun) -> |
| parse_one(DataFun, EventFun, <<>>). |
| |
| % converts the JSON directly to the erlang represention of Json |
| to_ejson(DF) -> |
| {_DF2, EF, _Rest} = events(DF, fun(Ev) -> collect_events(Ev, []) end), |
| [[EJson]] = make_ejson(EF(get_results), [[]]), |
| EJson. |
| |
| |
| % This function is used to return complete objects while parsing streams. |
| % |
| % Return this function from inside an event function right after getting an |
| % object_start event. It then collects the remaining events for that object |
| % and converts it to the erlang represention of Json. |
| % |
| % It then calls your ReturnControl function with the erlang object. Your |
| % return control function then should yield another event function. |
| % |
| % This example stream parses an array of objects, calling |
| % fun do_something_with_the_object/1 for each object. |
| % |
| % ev_array(array_start) -> |
| % fun(Ev) -> ev_object_loop(Ev) end. |
| % |
| % ev_object_loop(object_start) -> |
| % fun(Ev) -> |
| % json_stream_parse:collect_object(Ev, |
| % fun(Obj) -> |
| % do_something_with_the_object(Obj), |
| % fun(Ev2) -> ev_object_loop(Ev2) end |
| % end) |
| % end; |
| % ev_object_loop(array_end) -> |
| % ok |
| % end. |
| % |
| % % invoke the parse |
| % main() -> |
| % ... |
| % events(Data, fun(Ev) -> ev_array(Ev) end). |
| |
| collect_object(Ev, ReturnControl) -> |
| collect_object(Ev, 0, ReturnControl, [object_start]). |
| |
| |
| |
| % internal methods |
| |
| parse_one(DF,EF,Acc) -> |
| case toke(DF, Acc) of |
| none -> |
| none; |
| {Token, DF2, Rest} -> |
| case Token of |
| "{" -> |
| EF2 = EF(object_start), |
| {DF3, EF3, Rest2} = parse_object(DF2, EF2, Rest), |
| {DF3, EF3(object_end), Rest2}; |
| "[" -> |
| EF2 = EF(array_start), |
| {DF3, EF3, Rest2} = parse_array(DF2, EF2, Rest), |
| {DF3, EF3(array_end), Rest2}; |
| Int when is_integer(Int)-> |
| {DF2, EF(Int), Rest}; |
| Float when is_float(Float)-> |
| {DF2, EF(Float), Rest}; |
| Atom when is_atom(Atom)-> |
| {DF2, EF(Atom), Rest}; |
| String when is_binary(String)-> |
| {DF2, EF(String), Rest}; |
| _OtherToken -> |
| err(unexpected_token) |
| end |
| end. |
| |
| must_parse_one(DF,EF,Acc,Error)-> |
| case parse_one(DF, EF, Acc) of |
| none -> |
| err(Error); |
| Else -> |
| Else |
| end. |
| |
| must_toke(DF, Data, Error) -> |
| case toke(DF, Data) of |
| none -> |
| err(Error); |
| Result -> |
| Result |
| end. |
| |
| toke(DF, <<>>) -> |
| case DF() of |
| done -> |
| none; |
| {Data, DF2} -> |
| toke(DF2, Data) |
| end; |
| toke(DF, <<C,Rest/binary>>) when ?IS_WS(C)-> |
| toke(DF, Rest); |
| toke(DF, <<${,Rest/binary>>) -> |
| {"{", DF, Rest}; |
| toke(DF, <<$},Rest/binary>>) -> |
| {"}", DF, Rest}; |
| toke(DF, <<$[,Rest/binary>>) -> |
| {"[", DF, Rest}; |
| toke(DF, <<$],Rest/binary>>) -> |
| {"]", DF, Rest}; |
| toke(DF, <<$",Rest/binary>>) -> |
| toke_string(DF,Rest,[]); |
| toke(DF, <<$,,Rest/binary>>) -> |
| {",", DF, Rest}; |
| toke(DF, <<$:,Rest/binary>>) -> |
| {":", DF, Rest}; |
| toke(DF, <<$-,Rest/binary>>) -> |
| {<<C,_/binary>> = Data, DF2} = must_df(DF,1,Rest,expected_number), |
| case ?IS_DIGIT(C) of |
| true -> |
| toke_number_leading(DF2, Data, "-"); |
| false -> |
| err(expected_number) |
| end; |
| toke(DF, <<C,_/binary>> = Data) when ?IS_DIGIT(C) -> |
| toke_number_leading(DF, Data, []); |
| toke(DF, <<$t,Rest/binary>>) -> |
| {Data, DF2} = must_match(<<"rue">>, DF, Rest), |
| {true, DF2, Data}; |
| toke(DF, <<$f,Rest/binary>>) -> |
| {Data, DF2} = must_match(<<"alse">>, DF, Rest), |
| {false, DF2, Data}; |
| toke(DF, <<$n,Rest/binary>>) -> |
| {Data, DF2} = must_match(<<"ull">>, DF, Rest), |
| {null, DF2, Data}; |
| toke(_, _) -> |
| err(bad_token). |
| |
| |
| must_match(Pattern, DF, Data) -> |
| Size = size(Pattern), |
| case must_df(DF, Size, Data, bad_token) of |
| {<<Pattern:Size/binary,Data2/binary>>, DF2} -> |
| {Data2, DF2}; |
| {_, _} -> |
| err(bad_token) |
| end. |
| |
| must_df(DF,Error)-> |
| case DF() of |
| done -> |
| err(Error); |
| {Data, DF2} -> |
| {Data, DF2} |
| end. |
| |
| |
| must_df(DF,NeedLen,Acc,Error)-> |
| if size(Acc) >= NeedLen -> |
| {Acc, DF}; |
| true -> |
| case DF() of |
| done -> |
| err(Error); |
| {Data, DF2} -> |
| must_df(DF2, NeedLen, <<Acc/binary, Data/binary>>, Error) |
| end |
| end. |
| |
| |
| parse_object(DF,EF,Acc) -> |
| case must_toke(DF, Acc, unterminated_object) of |
| {String, DF2, Rest} when is_binary(String)-> |
| EF2 = EF({key,String}), |
| case must_toke(DF2,Rest,unterminated_object) of |
| {":", DF3, Rest2} -> |
| {DF4, EF3, Rest3} = must_parse_one(DF3, EF2, Rest2, expected_value), |
| case must_toke(DF4,Rest3, unterminated_object) of |
| {",", DF5, Rest4} -> |
| parse_object(DF5, EF3, Rest4); |
| {"}", DF5, Rest4} -> |
| {DF5, EF3, Rest4}; |
| {_, _, _} -> |
| err(unexpected_token) |
| end; |
| _Else -> |
| err(expected_colon) |
| end; |
| {"}", DF2, Rest} -> |
| {DF2, EF, Rest}; |
| {_, _, _} -> |
| err(unexpected_token) |
| end. |
| |
| parse_array0(DF,EF,Acc) -> |
| case toke(DF, Acc) of |
| none -> |
| err(unterminated_array); |
| {",", DF2, Rest} -> |
| parse_array(DF2,EF,Rest); |
| {"]", DF2, Rest} -> |
| {DF2,EF,Rest}; |
| _ -> |
| err(unexpected_token) |
| end. |
| |
| parse_array(DF,EF,Acc) -> |
| case toke(DF, Acc) of |
| none -> |
| err(unterminated_array); |
| {Token, DF2, Rest} -> |
| case Token of |
| "{" -> |
| EF2 = EF(object_start), |
| {DF3, EF3, Rest2} = parse_object(DF2, EF2, Rest), |
| parse_array0(DF3, EF3(object_end), Rest2); |
| "[" -> |
| EF2 = EF(array_start), |
| {DF3, EF3, Rest2} = parse_array(DF2, EF2, Rest), |
| parse_array0(DF3, EF3(array_end), Rest2); |
| Int when is_integer(Int)-> |
| parse_array0(DF2, EF(Int), Rest); |
| Float when is_float(Float)-> |
| parse_array0(DF2, EF(Float), Rest); |
| Atom when is_atom(Atom)-> |
| parse_array0(DF2, EF(Atom), Rest); |
| String when is_binary(String)-> |
| parse_array0(DF2, EF(String), Rest); |
| "]" -> |
| {DF2, EF, Rest}; |
| _ -> |
| err(unexpected_token) |
| end |
| end. |
| |
| |
| toke_string(DF, <<>>, Acc) -> |
| {Data, DF2} = must_df(DF, unterminated_string), |
| toke_string(DF2, Data, Acc); |
| toke_string(DF, <<$\\,$",Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$" | Acc]); |
| toke_string(DF, <<$\\,$\\,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$\\ | Acc]); |
| toke_string(DF, <<$\\,$/,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$/ | Acc]); |
| toke_string(DF, <<$\\,$b,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$\b | Acc]); |
| toke_string(DF, <<$\\,$f,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$\f | Acc]); |
| toke_string(DF, <<$\\,$n,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$\n | Acc]); |
| toke_string(DF, <<$\\,$r,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$\r | Acc]); |
| toke_string(DF, <<$\\,$t,Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [$\t | Acc]); |
| toke_string(DF, <<$\\,$u,Rest/binary>>, Acc) -> |
| {<<A,B,C,D,Data/binary>>, DF2} = must_df(DF,4,Rest,missing_hex), |
| UTFChar = erlang:list_to_integer([A, B, C, D], 16), |
| if UTFChar == 16#FFFF orelse UTFChar == 16#FFFE -> |
| err(invalid_utf_char); |
| true -> |
| ok |
| end, |
| Chars = xmerl_ucs:to_utf8(UTFChar), |
| toke_string(DF2, Data, lists:reverse(Chars) ++ Acc); |
| toke_string(DF, <<$\\>>, Acc) -> |
| {Data, DF2} = must_df(DF, unterminated_string), |
| toke_string(DF2, <<$\\,Data/binary>>, Acc); |
| toke_string(_DF, <<$\\, _/binary>>, _Acc) -> |
| err(bad_escape); |
| toke_string(DF, <<$", Rest/binary>>, Acc) -> |
| {list_to_binary(lists:reverse(Acc)), DF, Rest}; |
| toke_string(DF, <<C, Rest/binary>>, Acc) -> |
| toke_string(DF, Rest, [C | Acc]). |
| |
| |
| toke_number_leading(DF, <<Digit,Rest/binary>>, Acc) |
| when ?IS_DIGIT(Digit) -> |
| toke_number_leading(DF, Rest, [Digit | Acc]); |
| toke_number_leading(DF, <<C,_/binary>>=Rest, Acc) |
| when ?IS_WS(C) orelse ?IS_DELIM(C) -> |
| {list_to_integer(lists:reverse(Acc)), DF, Rest}; |
| toke_number_leading(DF, <<>>, Acc) -> |
| case DF() of |
| done -> |
| {list_to_integer(lists:reverse(Acc)), fun() -> done end, <<>>}; |
| {Data, DF2} -> |
| toke_number_leading(DF2, Data, Acc) |
| end; |
| toke_number_leading(DF, <<$., Rest/binary>>, Acc) -> |
| toke_number_trailing(DF, Rest, [$.|Acc]); |
| toke_number_leading(DF, <<$e, Rest/binary>>, Acc) -> |
| toke_number_exponent(DF, Rest, [$e, $0, $.|Acc]); |
| toke_number_leading(DF, <<$E, Rest/binary>>, Acc) -> |
| toke_number_exponent(DF, Rest, [$e, $0, $.|Acc]); |
| toke_number_leading(_, _, _) -> |
| err(unexpected_character_in_number). |
| |
| toke_number_trailing(DF, <<Digit,Rest/binary>>, Acc) |
| when ?IS_DIGIT(Digit) -> |
| toke_number_trailing(DF, Rest, [Digit | Acc]); |
| toke_number_trailing(DF, <<C,_/binary>>=Rest, Acc) |
| when ?IS_WS(C) orelse ?IS_DELIM(C) -> |
| {list_to_float(lists:reverse(Acc)), DF, Rest}; |
| toke_number_trailing(DF, <<>>, Acc) -> |
| case DF() of |
| done -> |
| {list_to_float(lists:reverse(Acc)), fun() -> done end, <<>>}; |
| {Data, DF2} -> |
| toke_number_trailing(DF2, Data, Acc) |
| end; |
| toke_number_trailing(DF, <<"e", Rest/binary>>, [C|_]=Acc) when C /= $. -> |
| toke_number_exponent(DF, Rest, [$e|Acc]); |
| toke_number_trailing(DF, <<"E", Rest/binary>>, [C|_]=Acc) when C /= $. -> |
| toke_number_exponent(DF, Rest, [$e|Acc]); |
| toke_number_trailing(_, _, _) -> |
| err(unexpected_character_in_number). |
| |
| |
| toke_number_exponent(DF, <<Digit,Rest/binary>>, Acc) when ?IS_DIGIT(Digit) -> |
| toke_number_exponent(DF, Rest, [Digit | Acc]); |
| toke_number_exponent(DF, <<Sign,Rest/binary>>, [$e|_]=Acc) |
| when Sign == $+ orelse Sign == $- -> |
| toke_number_exponent(DF, Rest, [Sign | Acc]); |
| toke_number_exponent(DF, <<C,_/binary>>=Rest, Acc) |
| when ?IS_WS(C) orelse ?IS_DELIM(C) -> |
| {list_to_float(lists:reverse(Acc)), DF, Rest}; |
| toke_number_exponent(DF, <<>>, Acc) -> |
| case DF() of |
| done -> |
| {list_to_float(lists:reverse(Acc)), fun() -> done end, <<>>}; |
| {Data, DF2} -> |
| toke_number_exponent(DF2, Data, Acc) |
| end; |
| toke_number_exponent(_, _, _) -> |
| err(unexpected_character_in_number). |
| |
| |
| err(Error)-> |
| throw({parse_error,Error}). |
| |
| |
| make_ejson([], Stack) -> |
| Stack; |
| make_ejson([array_start | RevEvs], [ArrayValues, PrevValues | RestStack]) -> |
| make_ejson(RevEvs, [[ArrayValues | PrevValues] | RestStack]); |
| make_ejson([array_end | RevEvs], Stack) -> |
| make_ejson(RevEvs, [[] | Stack]); |
| make_ejson([object_start | RevEvs], [ObjValues, PrevValues | RestStack]) -> |
| make_ejson(RevEvs, [[{ObjValues} | PrevValues] | RestStack]); |
| make_ejson([object_end | RevEvs], Stack) -> |
| make_ejson(RevEvs, [[] | Stack]); |
| make_ejson([{key, String} | RevEvs], [[PrevValue|RestObject] | RestStack] = _Stack) -> |
| make_ejson(RevEvs, [[{String, PrevValue}|RestObject] | RestStack]); |
| make_ejson([Value | RevEvs], [Vals | RestStack] = _Stack) -> |
| make_ejson(RevEvs, [[Value | Vals] | RestStack]). |
| |
| collect_events(get_results, Acc) -> |
| Acc; |
| collect_events(Ev, Acc) -> |
| fun(NextEv) -> collect_events(NextEv, [Ev | Acc]) end. |
| |
| |
| collect_object(object_end, 0, ReturnControl, Acc) -> |
| [[Obj]] = make_ejson([object_end | Acc], [[]]), |
| ReturnControl(Obj); |
| collect_object(object_end, NestCount, ReturnControl, Acc) -> |
| fun(Ev) -> |
| collect_object(Ev, NestCount - 1, ReturnControl, [object_end | Acc]) |
| end; |
| collect_object(object_start, NestCount, ReturnControl, Acc) -> |
| fun(Ev) -> |
| collect_object(Ev, NestCount + 1, ReturnControl, [object_start | Acc]) |
| end; |
| collect_object(Ev, NestCount, ReturnControl, Acc) -> |
| fun(Ev2) -> |
| collect_object(Ev2, NestCount, ReturnControl, [Ev | Acc]) |
| end. |