blob: 1bffaf8723f66a60c78f1990cd5170553ca250c9 [file] [log] [blame]
-module(neotoma_parse).
-export([parse/1,file/1]).
-define(p_anything,true).
-define(p_charclass,true).
-define(p_choose,true).
-define(p_label,true).
-define(p_not,true).
-define(p_one_or_more,true).
-define(p_optional,true).
-define(p_scan,true).
-define(p_seq,true).
-define(p_string,true).
-define(p_zero_or_more,true).
% insert escapes into a string
-spec escape_string(string()) -> string().
escape_string(String) -> escape_string(String, []).
-spec escape_string(string(), string()) -> string().
escape_string([], Output) ->
lists:reverse(Output);
escape_string([H|T], Output) ->
escape_string(T,
case H of
$/ -> [$/,$\\|Output];
$\" -> [$\",$\\|Output]; % " comment inserted to help some editors with highlighting the generated parser
$\' -> [$\',$\\|Output]; % ' comment inserted to help some editors with highlighting the generated parser
$\b -> [$b,$\\|Output];
$\d -> [$d,$\\|Output];
$\e -> [$e,$\\|Output];
$\f -> [$f,$\\|Output];
$\n -> [$n,$\\|Output];
$\r -> [$r,$\\|Output];
$\s -> [$s,$\\|Output];
$\t -> [$t,$\\|Output];
$\v -> [$v,$\\|Output];
_ -> [H|Output]
end).
-spec add_lhs(binary(), index()) -> true.
add_lhs(Symbol, Index) ->
case ets:lookup(memo_table_name(), lhs) of
[] ->
ets:insert(memo_table_name(), {lhs, [{Symbol,Index}]});
[{lhs, L}] when is_list(L) ->
ets:insert(memo_table_name(), {lhs, [{Symbol,Index}|L]})
end.
-spec add_nt(binary(), index()) -> true | ok.
add_nt(Symbol, Index) ->
case ets:lookup(memo_table_name(), nts) of
[] ->
ets:insert(memo_table_name(), {nts, [{Symbol,Index}]});
[{nts, L}] when is_list(L) ->
case proplists:is_defined(Symbol, L) of
true ->
ok;
_ ->
ets:insert(memo_table_name(), {nts, [{Symbol,Index}|L]})
end
end.
-spec verify_rules() -> ok | no_return().
verify_rules() ->
[{lhs, LHS}] = ets:lookup(memo_table_name(), lhs),
[{nts, NTs}] = ets:lookup(memo_table_name(), nts),
[Root|NonRoots] = lists:reverse(LHS),
lists:foreach(fun({Sym,Idx}) ->
case proplists:is_defined(Sym, NTs) of
true ->
ok;
_ ->
io:format("neotoma warning: rule '~s' is unused. ~p~n", [Sym,Idx])
end
end, NonRoots),
lists:foreach(fun({S,I}) ->
case proplists:is_defined(S, LHS) of
true ->
ok;
_ ->
io:format("neotoma error: nonterminal '~s' has no reduction. (found at ~p) No parser will be generated!~n", [S,I]),
exit({neotoma, {no_reduction, list_to_atom(binary_to_list(S))}})
end
end, NTs),
Root.
-spec used_combinator(atom()) -> true.
used_combinator(C) ->
case ets:lookup(memo_table_name(), combinators) of
[] ->
ets:insert(memo_table_name(), {combinators, ordsets:from_list([C])});
[{combinators, Cs}] ->
ets:insert(memo_table_name(), {combinators, ordsets:add_element(C, Cs)})
end.
-spec used_transform_variables(binary()) -> [ 'Node' | 'Idx' ].
used_transform_variables(Transform) ->
Code = unicode:characters_to_list(Transform),
{ok, Tokens, _} = erl_scan:string(Code),
used_transform_variables(Tokens, []).
used_transform_variables([{var, _, Name}|Tokens], Acc) ->
used_transform_variables(Tokens, case Name of
'Node' -> [Name | Acc];
'Idx' -> [Name | Acc];
_ -> Acc
end);
used_transform_variables([_|Tokens], Acc) ->
used_transform_variables(Tokens, Acc);
used_transform_variables([], Acc) ->
lists:usort(Acc).
-spec file(file:name()) -> any().
file(Filename) -> case file:read_file(Filename) of {ok,Bin} -> parse(Bin); Err -> Err end.
-spec parse(binary() | list()) -> any().
parse(List) when is_list(List) -> parse(unicode:characters_to_binary(List));
parse(Input) when is_binary(Input) ->
_ = setup_memo(),
Result = case 'rules'(Input,{{line,1},{column,1}}) of
{AST, <<>>, _Index} -> AST;
Any -> Any
end,
release_memo(), Result.
-spec 'rules'(input(), index()) -> parse_result().
'rules'(Input, Index) ->
p(Input, Index, 'rules', fun(I,D) -> (p_seq([p_optional(fun 'space'/2), fun 'declaration_sequence'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2)]))(I,D) end, fun(Node, _Idx) ->
RootRule = verify_rules(),
Rules = unicode:characters_to_binary(lists:map(fun(R) -> [R, "\n\n"] end, lists:nth(2, Node))),
Code = case lists:nth(4, Node) of
{code, Block} -> Block;
_ -> []
end,
[{rules, Rules},
{code, Code},
{root, RootRule},
{transform, ets:lookup(memo_table_name(),gen_transform)},
{combinators, ets:lookup_element(memo_table_name(), combinators, 2)}]
end).
-spec 'declaration_sequence'(input(), index()) -> parse_result().
'declaration_sequence'(Input, Index) ->
p(Input, Index, 'declaration_sequence', fun(I,D) -> (p_seq([p_label('head', fun 'declaration'/2), p_label('tail', p_zero_or_more(p_seq([fun 'space'/2, fun 'declaration'/2])))]))(I,D) end, fun(Node, _Idx) ->
FirstRule = proplists:get_value(head, Node),
OtherRules = [I || [_,I] <- proplists:get_value(tail, Node, [])],
[FirstRule|OtherRules]
end).
-spec 'declaration'(input(), index()) -> parse_result().
'declaration'(Input, Index) ->
p(Input, Index, 'declaration', fun(I,D) -> (p_seq([fun 'nonterminal'/2, p_zero_or_more(fun 'space'/2), p_string(<<"<-">>), p_zero_or_more(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2), p_string(<<";">>)]))(I,D) end, fun(Node, _Idx) ->
[{nonterminal,Symbol}|Tail] = Node,
add_lhs(Symbol, Index),
Transform = case lists:nth(6,Tail) of
{code, CodeBlock} -> CodeBlock;
_ ->
ets:insert_new(memo_table_name(),{gen_transform, true}),
["transform('",Symbol,"', Node, Idx)"]
end,
TransformArgs = case used_transform_variables(Transform) of
[] -> "_Node, _Idx";
['Idx'] -> "_Node, Idx";
['Node'] -> "Node, _Idx";
['Idx', 'Node'] -> "Node, Idx"
end,
["-spec '", Symbol, "'(input(), index()) -> parse_result().\n",
"'",Symbol,"'","(Input, Index) ->\n ",
"p(Input, Index, '",Symbol,"', fun(I,D) -> (",
lists:nth(4, Tail),
")(I,D) end, fun(", TransformArgs, ") ->",Transform," end)."]
end).
-spec 'parsing_expression'(input(), index()) -> parse_result().
'parsing_expression'(Input, Index) ->
p(Input, Index, 'parsing_expression', fun(I,D) -> (p_choose([fun 'choice'/2, fun 'sequence'/2, fun 'primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'choice'(input(), index()) -> parse_result().
'choice'(Input, Index) ->
p(Input, Index, 'choice', fun(I,D) -> (p_seq([p_label('head', fun 'alternative'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, p_string(<<"\/">>), fun 'space'/2, fun 'alternative'/2])))]))(I,D) end, fun(Node, _Idx) ->
Tail = [lists:last(S) || S <- proplists:get_value(tail, Node)],
Head = proplists:get_value(head, Node),
Statements = [[", ", TS] || TS <- Tail],
used_combinator(p_choose),
["p_choose([", Head, Statements, "])"]
end).
-spec 'alternative'(input(), index()) -> parse_result().
'alternative'(Input, Index) ->
p(Input, Index, 'alternative', fun(I,D) -> (p_choose([fun 'sequence'/2, fun 'labeled_primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'primary'(input(), index()) -> parse_result().
'primary'(Input, Index) ->
p(Input, Index, 'primary', fun(I,D) -> (p_choose([p_seq([fun 'prefix'/2, fun 'atomic'/2]), p_seq([fun 'atomic'/2, fun 'suffix'/2]), fun 'atomic'/2]))(I,D) end, fun(Node, _Idx) ->
case Node of
[Atomic, one_or_more] ->
used_combinator(p_one_or_more),
used_combinator(p_scan),
["p_one_or_more(", Atomic, ")"];
[Atomic, zero_or_more] ->
used_combinator(p_zero_or_more),
used_combinator(p_scan),
["p_zero_or_more(", Atomic, ")"];
[Atomic, optional] ->
used_combinator(p_optional),
["p_optional(", Atomic, ")"];
[assert, Atomic] ->
used_combinator(p_assert),
["p_assert(", Atomic, ")"];
[not_, Atomic] ->
used_combinator(p_not),
["p_not(", Atomic, ")"];
_ -> Node
end
end).
-spec 'sequence'(input(), index()) -> parse_result().
'sequence'(Input, Index) ->
p(Input, Index, 'sequence', fun(I,D) -> (p_seq([p_label('head', fun 'labeled_primary'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, fun 'labeled_primary'/2])))]))(I,D) end, fun(Node, _Idx) ->
Tail = [lists:nth(2, S) || S <- proplists:get_value(tail, Node)],
Head = proplists:get_value(head, Node),
Statements = [[", ", TS] || TS <- Tail],
used_combinator(p_seq),
["p_seq([", Head, Statements, "])"]
end).
-spec 'labeled_primary'(input(), index()) -> parse_result().
'labeled_primary'(Input, Index) ->
p(Input, Index, 'labeled_primary', fun(I,D) -> (p_seq([p_optional(fun 'label'/2), fun 'primary'/2]))(I,D) end, fun(Node, _Idx) ->
case hd(Node) of
[] -> lists:nth(2, Node);
Label ->
used_combinator(p_label),
["p_label('", Label, "', ", lists:nth(2, Node), ")"]
end
end).
-spec 'label'(input(), index()) -> parse_result().
'label'(Input, Index) ->
p(Input, Index, 'label', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2), p_string(<<":">>)]))(I,D) end, fun(Node, _Idx) ->
lists:sublist(Node, length(Node)-1)
end).
-spec 'suffix'(input(), index()) -> parse_result().
'suffix'(Input, Index) ->
p(Input, Index, 'suffix', fun(I,D) -> (p_choose([fun 'repetition_suffix'/2, fun 'optional_suffix'/2]))(I,D) end, fun(Node, _Idx) ->
case Node of
<<"*">> -> zero_or_more;
<<"+">> -> one_or_more;
<<"?">> -> optional
end
end).
-spec 'optional_suffix'(input(), index()) -> parse_result().
'optional_suffix'(Input, Index) ->
p(Input, Index, 'optional_suffix', fun(I,D) -> (p_string(<<"?">>))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'repetition_suffix'(input(), index()) -> parse_result().
'repetition_suffix'(Input, Index) ->
p(Input, Index, 'repetition_suffix', fun(I,D) -> (p_choose([p_string(<<"+">>), p_string(<<"*">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'prefix'(input(), index()) -> parse_result().
'prefix'(Input, Index) ->
p(Input, Index, 'prefix', fun(I,D) -> (p_choose([p_string(<<"&">>), p_string(<<"!">>)]))(I,D) end, fun(Node, _Idx) ->
case Node of
<<"&">> -> assert;
<<"!">> -> not_
end
end).
-spec 'atomic'(input(), index()) -> parse_result().
'atomic'(Input, Index) ->
p(Input, Index, 'atomic', fun(I,D) -> (p_choose([fun 'terminal'/2, fun 'nonterminal'/2, fun 'parenthesized_expression'/2]))(I,D) end, fun(Node, _Idx) ->
case Node of
{nonterminal, Symbol} ->
[<<"fun '">>, Symbol, <<"'/2">>];
_ -> Node
end
end).
-spec 'parenthesized_expression'(input(), index()) -> parse_result().
'parenthesized_expression'(Input, Index) ->
p(Input, Index, 'parenthesized_expression', fun(I,D) -> (p_seq([p_string(<<"(">>), p_optional(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_string(<<")">>)]))(I,D) end, fun(Node, _Idx) ->lists:nth(3, Node) end).
-spec 'nonterminal'(input(), index()) -> parse_result().
'nonterminal'(Input, Index) ->
p(Input, Index, 'nonterminal', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2)]))(I,D) end, fun(Node, Idx) ->
Symbol = unicode:characters_to_binary(Node),
add_nt(Symbol, Idx),
{nonterminal, Symbol}
end).
-spec 'terminal'(input(), index()) -> parse_result().
'terminal'(Input, Index) ->
p(Input, Index, 'terminal', fun(I,D) -> (p_choose([fun 'regexp_string'/2, fun 'quoted_string'/2, fun 'character_class'/2, fun 'anything_symbol'/2]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'regexp_string'(input(), index()) -> parse_result().
'regexp_string'(Input, Index) ->
p(Input, Index, 'regexp_string', fun(I,D) -> (p_seq([p_string(<<"#">>), p_label('string', p_one_or_more(p_seq([p_not(p_string(<<"#">>)), p_choose([p_string(<<"\\#">>), p_anything()])]))), p_string(<<"#">>)]))(I,D) end, fun(Node, _Idx) ->
used_combinator(p_regexp),
["p_regexp(<<\"",
% Escape \ and " as they are used in erlang string. Other sumbol stay as is.
% \ -> \\
% " -> \"
re:replace(proplists:get_value(string, Node), "\"|\\\\", "\\\\&", [{return, binary}, global]),
"\">>)"]
end).
-spec 'quoted_string'(input(), index()) -> parse_result().
'quoted_string'(Input, Index) ->
p(Input, Index, 'quoted_string', fun(I,D) -> (p_choose([fun 'single_quoted_string'/2, fun 'double_quoted_string'/2]))(I,D) end, fun(Node, _Idx) ->
used_combinator(p_string),
lists:flatten(["p_string(<<\"",
escape_string(unicode:characters_to_list(proplists:get_value(string, Node))),
"\">>)"])
end).
-spec 'double_quoted_string'(input(), index()) -> parse_result().
'double_quoted_string'(Input, Index) ->
p(Input, Index, 'double_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\"">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\"">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\"">>), p_anything()])]))), p_string(<<"\"">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'single_quoted_string'(input(), index()) -> parse_result().
'single_quoted_string'(Input, Index) ->
p(Input, Index, 'single_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\'">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\'">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\'">>), p_anything()])]))), p_string(<<"\'">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'character_class'(input(), index()) -> parse_result().
'character_class'(Input, Index) ->
p(Input, Index, 'character_class', fun(I,D) -> (p_seq([p_string(<<"[">>), p_label('characters', p_one_or_more(p_seq([p_not(p_string(<<"]">>)), p_choose([p_seq([p_string(<<"\\\\">>), p_anything()]), p_seq([p_not(p_string(<<"\\\\">>)), p_anything()])])]))), p_string(<<"]">>)]))(I,D) end, fun(Node, _Idx) ->
used_combinator(p_charclass),
["p_charclass(<<\"[",
escape_string(unicode:characters_to_list(proplists:get_value(characters, Node))),
"]\">>)"]
end).
-spec 'anything_symbol'(input(), index()) -> parse_result().
'anything_symbol'(Input, Index) ->
p(Input, Index, 'anything_symbol', fun(I,D) -> (p_string(<<".">>))(I,D) end, fun(_Node, _Idx) -> used_combinator(p_anything), <<"p_anything()">> end).
-spec 'alpha_char'(input(), index()) -> parse_result().
'alpha_char'(Input, Index) ->
p(Input, Index, 'alpha_char', fun(I,D) -> (p_charclass(<<"[A-Za-z_]">>))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'alphanumeric_char'(input(), index()) -> parse_result().
'alphanumeric_char'(Input, Index) ->
p(Input, Index, 'alphanumeric_char', fun(I,D) -> (p_choose([fun 'alpha_char'/2, p_charclass(<<"[0-9]">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'space'(input(), index()) -> parse_result().
'space'(Input, Index) ->
p(Input, Index, 'space', fun(I,D) -> (p_one_or_more(p_choose([fun 'white'/2, fun 'comment_to_eol'/2])))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'comment_to_eol'(input(), index()) -> parse_result().
'comment_to_eol'(Input, Index) ->
p(Input, Index, 'comment_to_eol', fun(I,D) -> (p_seq([p_not(p_string(<<"%{">>)), p_string(<<"%">>), p_zero_or_more(p_seq([p_not(p_string(<<"\n">>)), p_anything()]))]))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'white'(input(), index()) -> parse_result().
'white'(Input, Index) ->
p(Input, Index, 'white', fun(I,D) -> (p_charclass(<<"[\s\t\n\r]">>))(I,D) end, fun(Node, _Idx) ->Node end).
-spec 'code_block'(input(), index()) -> parse_result().
'code_block'(Input, Index) ->
p(Input, Index, 'code_block', fun(I,D) -> (p_choose([p_seq([p_string(<<"%{">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\%">>), p_string(<<"$%">>), p_seq([p_not(p_string(<<"%}">>)), p_anything()])]))), p_string(<<"%}">>)]), p_seq([p_string(<<"`">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\`">>), p_string(<<"$`">>), p_seq([p_not(p_string(<<"`">>)), p_anything()])]))), p_string(<<"`">>)]), p_string(<<"~">>)]))(I,D) end, fun(Node, _Idx) ->
case Node of
<<"~">> -> {code, <<"Node">>};
_ -> {code, proplists:get_value('code', Node)}
end
end).
-file("peg_includes.hrl", 1).
-type index() :: {{line, pos_integer()}, {column, pos_integer()}}.
-type input() :: binary().
-type parse_failure() :: {fail, term()}.
-type parse_success() :: {term(), input(), index()}.
-type parse_result() :: parse_failure() | parse_success().
-type parse_fun() :: fun((input(), index()) -> parse_result()).
-type xform_fun() :: fun((input(), index()) -> term()).
-spec p(input(), index(), atom(), parse_fun(), xform_fun()) -> parse_result().
p(Inp, StartIndex, Name, ParseFun, TransformFun) ->
case get_memo(StartIndex, Name) of % See if the current reduction is memoized
{ok, Memo} -> %Memo; % If it is, return the stored result
Memo;
_ -> % If not, attempt to parse
Result = case ParseFun(Inp, StartIndex) of
{fail,_} = Failure -> % If it fails, memoize the failure
Failure;
{Match, InpRem, NewIndex} -> % If it passes, transform and memoize the result.
Transformed = TransformFun(Match, StartIndex),
{Transformed, InpRem, NewIndex}
end,
memoize(StartIndex, Name, Result),
Result
end.
-spec setup_memo() -> ets:tid().
setup_memo() ->
put({parse_memo_table, ?MODULE}, ets:new(?MODULE, [set])).
-spec release_memo() -> true.
release_memo() ->
ets:delete(memo_table_name()).
-spec memoize(index(), atom(), parse_result()) -> true.
memoize(Index, Name, Result) ->
Memo = case ets:lookup(memo_table_name(), Index) of
[] -> [];
[{Index, Plist}] -> Plist
end,
ets:insert(memo_table_name(), {Index, [{Name, Result}|Memo]}).
-spec get_memo(index(), atom()) -> {ok, term()} | {error, not_found}.
get_memo(Index, Name) ->
case ets:lookup(memo_table_name(), Index) of
[] -> {error, not_found};
[{Index, Plist}] ->
case proplists:lookup(Name, Plist) of
{Name, Result} -> {ok, Result};
_ -> {error, not_found}
end
end.
-spec memo_table_name() -> ets:tid().
memo_table_name() ->
get({parse_memo_table, ?MODULE}).
-ifdef(p_eof).
-spec p_eof() -> parse_fun().
p_eof() ->
fun(<<>>, Index) -> {eof, [], Index};
(_, Index) -> {fail, {expected, eof, Index}} end.
-endif.
-ifdef(p_optional).
-spec p_optional(parse_fun()) -> parse_fun().
p_optional(P) ->
fun(Input, Index) ->
case P(Input, Index) of
{fail,_} -> {[], Input, Index};
{_, _, _} = Success -> Success
end
end.
-endif.
-ifdef(p_not).
-spec p_not(parse_fun()) -> parse_fun().
p_not(P) ->
fun(Input, Index)->
case P(Input,Index) of
{fail,_} ->
{[], Input, Index};
{Result, _, _} -> {fail, {expected, {no_match, Result},Index}}
end
end.
-endif.
-ifdef(p_assert).
-spec p_assert(parse_fun()) -> parse_fun().
p_assert(P) ->
fun(Input,Index) ->
case P(Input,Index) of
{fail,_} = Failure-> Failure;
_ -> {[], Input, Index}
end
end.
-endif.
-ifdef(p_seq).
-spec p_seq([parse_fun()]) -> parse_fun().
p_seq(P) ->
fun(Input, Index) ->
p_all(P, Input, Index, [])
end.
-spec p_all([parse_fun()], input(), index(), [term()]) -> parse_result().
p_all([], Inp, Index, Accum ) -> {lists:reverse( Accum ), Inp, Index};
p_all([P|Parsers], Inp, Index, Accum) ->
case P(Inp, Index) of
{fail, _} = Failure -> Failure;
{Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result|Accum])
end.
-endif.
-ifdef(p_choose).
-spec p_choose([parse_fun()]) -> parse_fun().
p_choose(Parsers) ->
fun(Input, Index) ->
p_attempt(Parsers, Input, Index, none)
end.
-spec p_attempt([parse_fun()], input(), index(), none | parse_failure()) -> parse_result().
p_attempt([], _Input, _Index, Failure) -> Failure;
p_attempt([P|Parsers], Input, Index, FirstFailure)->
case P(Input, Index) of
{fail, _} = Failure ->
case FirstFailure of
none -> p_attempt(Parsers, Input, Index, Failure);
_ -> p_attempt(Parsers, Input, Index, FirstFailure)
end;
Result -> Result
end.
-endif.
-ifdef(p_zero_or_more).
-spec p_zero_or_more(parse_fun()) -> parse_fun().
p_zero_or_more(P) ->
fun(Input, Index) ->
p_scan(P, Input, Index, [])
end.
-endif.
-ifdef(p_one_or_more).
-spec p_one_or_more(parse_fun()) -> parse_fun().
p_one_or_more(P) ->
fun(Input, Index)->
Result = p_scan(P, Input, Index, []),
case Result of
{[_|_], _, _} ->
Result;
_ ->
{fail, {expected, Failure, _}} = P(Input,Index),
{fail, {expected, {at_least_one, Failure}, Index}}
end
end.
-endif.
-ifdef(p_label).
-spec p_label(atom(), parse_fun()) -> parse_fun().
p_label(Tag, P) ->
fun(Input, Index) ->
case P(Input, Index) of
{fail,_} = Failure ->
Failure;
{Result, InpRem, NewIndex} ->
{{Tag, Result}, InpRem, NewIndex}
end
end.
-endif.
-ifdef(p_scan).
-spec p_scan(parse_fun(), input(), index(), [term()]) -> {[term()], input(), index()}.
p_scan(_, <<>>, Index, Accum) -> {lists:reverse(Accum), <<>>, Index};
p_scan(P, Inp, Index, Accum) ->
case P(Inp, Index) of
{fail,_} -> {lists:reverse(Accum), Inp, Index};
{Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum])
end.
-endif.
-ifdef(p_string).
-spec p_string(binary()) -> parse_fun().
p_string(S) ->
Length = erlang:byte_size(S),
fun(Input, Index) ->
try
<<S:Length/binary, Rest/binary>> = Input,
{S, Rest, p_advance_index(S, Index)}
catch
error:{badmatch,_} -> {fail, {expected, {string, S}, Index}}
end
end.
-endif.
-ifdef(p_anything).
-spec p_anything() -> parse_fun().
p_anything() ->
fun(<<>>, Index) -> {fail, {expected, any_character, Index}};
(Input, Index) when is_binary(Input) ->
<<C/utf8, Rest/binary>> = Input,
{<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)}
end.
-endif.
-ifdef(p_charclass).
-spec p_charclass(string() | binary()) -> parse_fun().
p_charclass(Class) ->
{ok, RE} = re:compile(Class, [unicode, dotall]),
fun(Inp, Index) ->
case re:run(Inp, RE, [anchored]) of
{match, [{0, Length}|_]} ->
{Head, Tail} = erlang:split_binary(Inp, Length),
{Head, Tail, p_advance_index(Head, Index)};
_ -> {fail, {expected, {character_class, binary_to_list(Class)}, Index}}
end
end.
-endif.
-ifdef(p_regexp).
-spec p_regexp(binary()) -> parse_fun().
p_regexp(Regexp) ->
{ok, RE} = re:compile(Regexp, [unicode, dotall, anchored]),
fun(Inp, Index) ->
case re:run(Inp, RE) of
{match, [{0, Length}|_]} ->
{Head, Tail} = erlang:split_binary(Inp, Length),
{Head, Tail, p_advance_index(Head, Index)};
_ -> {fail, {expected, {regexp, binary_to_list(Regexp)}, Index}}
end
end.
-endif.
-ifdef(line).
-spec line(index() | term()) -> pos_integer() | undefined.
line({{line,L},_}) -> L;
line(_) -> undefined.
-endif.
-ifdef(column).
-spec column(index() | term()) -> pos_integer() | undefined.
column({_,{column,C}}) -> C;
column(_) -> undefined.
-endif.
-spec p_advance_index(input() | unicode:charlist() | pos_integer(), index()) -> index().
p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings
lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput));
p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters
{{line, Line}, {column, Col}} = Index,
case MatchedInput of
$\n -> {{line, Line+1}, {column, 1}};
_ -> {{line, Line}, {column, Col+1}}
end.