| % This file is part of Jiffy released under the MIT license. |
| % See the LICENSE file for more information. |
| |
| -module(jiffy_utf8). |
| -export([fix/1]). |
| |
| |
| fix({Props}) -> |
| fix_props(Props, []); |
| fix(Values) when is_list(Values) -> |
| fix_array(Values, []); |
| fix(Bin) when is_binary(Bin) -> |
| fix_bin(Bin); |
| fix(Val) -> |
| maybe_map(Val). |
| |
| -ifndef(JIFFY_NO_MAPS). |
| maybe_map(Obj) when is_map(Obj) -> |
| maps:fold(fun fix_map/3, maps:new(), Obj); |
| maybe_map(Val) -> |
| Val. |
| |
| fix_map(K, V, Acc) -> |
| maps:put(fix(K), fix(V), Acc). |
| -else. |
| maybe_map(Val) -> |
| Val. |
| -endif. |
| |
| fix_props([], Acc) -> |
| {lists:reverse(Acc)}; |
| fix_props([{K0, V0} | Rest], Acc) -> |
| K = fix(K0), |
| V = fix(V0), |
| fix_props(Rest, [{K, V} | Acc]). |
| |
| |
| fix_array([], Acc) -> |
| lists:reverse(Acc); |
| fix_array([Val | Rest], Acc0) -> |
| Acc = [fix(Val) | Acc0], |
| fix_array(Rest, Acc). |
| |
| |
| fix_bin(Bin) -> |
| Dec0 = loose_decode(Bin, 0, []), |
| Dec1 = try_combining(Dec0, []), |
| Dec2 = replace_garbage(Dec1, []), |
| list_to_binary(xmerl_ucs:to_utf8(Dec2)). |
| |
| |
| loose_decode(Bin, O, Acc) -> |
| case Bin of |
| <<_:O/binary>> -> |
| lists:reverse(Acc); |
| <<_:O/binary, 0:1/integer, V:7/integer, _/binary>> -> |
| loose_decode(Bin, O+1, [V | Acc]); |
| <<_:O/binary, 6:3/integer, V0:5/integer, |
| 2:2/integer, V1:6/integer, _/binary>> -> |
| B = <<0:5/integer, V0:5/integer, V1:6/integer>>, |
| <<V:16/integer>> = B, |
| loose_decode(Bin, O+2, [V | Acc]); |
| <<_:O/binary, 14:4/integer, V0:4/integer, |
| 2:2/integer, V1:6/integer, |
| 2:2/integer, V2:6/integer, _/binary>> -> |
| B = <<V0:4/integer, V1:6/integer, V2:6/integer>>, |
| <<V:16/integer>> = B, |
| loose_decode(Bin, O+3, [V | Acc]); |
| <<_:O/binary, 30:5/integer, V0:3/integer, |
| 2:2/integer, V1:6/integer, |
| 2:2/integer, V2:6/integer, |
| 2:2/integer, V3:6/integer, _/binary>> -> |
| B = <<0:11/integer, V0:3/integer, V1:6/integer, |
| V2:6/integer, V3:6/integer>>, |
| <<V:32/integer>> = B, |
| loose_decode(Bin, O+4, [V | Acc]); |
| <<_:O/binary, _:8/integer, R/binary>> -> |
| % Broken lead or continuation byte. Discard first |
| % byte and all broken continuations. Replace the |
| % whole mess with a replacment code point. |
| T = 1 + count_continuation_bytes(R, 0), |
| loose_decode(Bin, O+T, [16#FFFD | Acc]) |
| end. |
| |
| |
| count_continuation_bytes(R, O) -> |
| case R of |
| <<_:O/binary, 2:2/integer, _:6/integer, _/binary>> -> |
| count_continuation_bytes(R, O+1); |
| _ -> |
| O |
| end. |
| |
| |
| try_combining([], Acc) -> |
| lists:reverse(Acc); |
| try_combining([H, L | Rest], Acc) when H >= 16#D800, H =< 16#DFFF, |
| L >= 16#D800, L =< 16#DFFF -> |
| Bin = <<H:16/big-unsigned-integer, L:16/big-unsigned-integer>>, |
| try |
| [C] = xmerl_ucs:from_utf16be(Bin), |
| try_combining(Rest, [C | Acc]) |
| catch _:_ -> |
| try_combining(Rest, [L, H | Acc]) |
| end; |
| try_combining([C | Rest], Acc) -> |
| try_combining(Rest, [C | Acc]). |
| |
| |
| replace_garbage([], Acc) -> |
| lists:reverse(Acc); |
| replace_garbage([C | Rest], Acc) -> |
| case xmerl_ucs:is_unicode(C) of |
| true -> replace_garbage(Rest, [C | Acc]); |
| false -> replace_garbage(Rest, [16#FFFD | Acc]) |
| end. |