% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.
%
% bind_path is based on bind method from Webmachine


%% @doc Module for URL rewriting by pattern matching.

-module(chttpd_rewrite).
-export([handle_rewrite_req/3]).
-include_lib("couch/include/couch_db.hrl").

-define(SEPARATOR, $\/).
-define(MATCH_ALL, {bind, <<"*">>}).


%% doc The http rewrite handler. All rewriting is done from
%% /dbname/_design/ddocname/_rewrite by default.
%%
%% each rules should be in rewrites member of the design doc.
%% Ex of a complete rule :
%%
%%  {
%%      ....
%%      "rewrites": [
%%      {
%%          "from": "",
%%          "to": "index.html",
%%          "method": "GET",
%%          "query": {}
%%      }
%%      ]
%%  }
%%
%%  from: is the path rule used to bind current uri to the rule. It
%% use pattern matching for that.
%%
%%  to: rule to rewrite an url. It can contain variables depending on binding
%% variables discovered during pattern matching and query args (url args and from
%% the query member.)
%%
%%  method: method to bind the request method to the rule. by default "*"
%%  query: query args you want to define they can contain dynamic variable
%% by binding the key to the bindings
%%
%%
%% to and from are path with  patterns. pattern can be string starting with ":" or
%% "*". ex:
%% /somepath/:var/*
%%
%% This path is converted in erlang list by splitting "/". Each var are
%% converted in atom. "*" is converted to '*' atom. The pattern matching is done
%% by splitting "/" in request url in a list of token. A string pattern will
%% match equal token. The star atom ('*' in single quotes) will match any number
%% of tokens, but may only be present as the last pathtern in a pathspec. If all
%% tokens are matched and all pathterms are used, then the pathspec matches. It works
%% like webmachine. Each identified token will be reused in to rule and in query
%%
%% The pattern matching is done by first matching the request method to a rule. by
%% default all methods match a rule. (method is equal to "*" by default). Then
%% It will try to match the path to one rule. If no rule match, then a 404 error
%% is displayed.
%%
%% Once a rule is found we rewrite the request url using the "to" and
%% "query" members. The identified token are matched to the rule and
%% will replace var. if '*' is found in the rule it will contain the remaining
%% part if it exists.
%%
%% Examples:
%%
%% Dispatch rule            URL             TO                  Tokens
%%
%% {"from": "/a/b",         /a/b?k=v        /some/b?k=v         var =:= b
%% "to": "/some/"}                                              k = v
%%
%% {"from": "/a/b",         /a/b            /some/b?var=b       var =:= b
%% "to": "/some/:var"}
%%
%% {"from": "/a",           /a              /some
%% "to": "/some/*"}
%%
%% {"from": "/a/*",         /a/b/c          /some/b/c
%% "to": "/some/*"}
%%
%% {"from": "/a",           /a              /some
%% "to": "/some/*"}
%%
%% {"from": "/a/:foo/*",    /a/b/c          /some/b/c?foo=b     foo =:= b
%% "to": "/some/:foo/*"}
%%
%% {"from": "/a/:foo",     /a/b             /some/?k=b&foo=b    foo =:= b
%% "to": "/some",
%%  "query": {
%%      "k": ":foo"
%%  }}
%%
%% {"from": "/a",           /a?foo=b        /some/b             foo =:= b
%% "to": "/some/:foo",
%%  }}



handle_rewrite_req(#httpd{
        path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts],
        method=Method,
        mochi_req=MochiReq}=Req, _Db, DDoc) ->

    % we are in a design handler
    DesignId = <<"_design/", DesignName/binary>>,
    Prefix = <<"/", DbName/binary, "/", DesignId/binary>>,
    QueryList = lists:map(fun decode_query_value/1, couch_httpd:qs(Req)),

    #doc{body={Props}} = DDoc,

    % get rules from ddoc
    case couch_util:get_value(<<"rewrites">>, Props) of
        undefined ->
            couch_httpd:send_error(Req, 404, <<"rewrite_error">>,
                <<"Invalid path.">>);
        Bin when is_binary(Bin) ->
            couch_httpd:send_error(Req, 400, <<"rewrite_error">>,
                <<"Rewrite rules are a String. They must be a JSON Array.">>);
        Rules ->
            % create dispatch list from rules
            DispatchList =  [make_rule(Rule) || {Rule} <- Rules],
            Method1 = couch_util:to_binary(Method),

            %% get raw path by matching url to a rule.
            RawPath = case try_bind_path(DispatchList, Method1,
                    PathParts, QueryList) of
                no_dispatch_path ->
                    throw(not_found);
                {NewPathParts, Bindings} ->
                    Parts = [quote_plus(X) || X <- NewPathParts],

                    % build new path, reencode query args, eventually convert
                    % them to json
                    Bindings1 = maybe_encode_bindings(Bindings),
                    Path = binary_to_list(
                        iolist_to_binary([
                                string:join(Parts, [?SEPARATOR]),
                                [["?", mochiweb_util:urlencode(Bindings1)]
                                    || Bindings1 =/= [] ]
                            ])),

                    % if path is relative detect it and rewrite path
                    case mochiweb_util:safe_relative_path(Path) of
                        undefined ->
                            ?b2l(Prefix) ++ "/" ++ Path;
                        P1 ->
                            ?b2l(Prefix) ++ "/" ++ P1
                    end

                end,

            % normalize final path (fix levels "." and "..")
            RawPath1 = ?b2l(iolist_to_binary(normalize_path(RawPath))),

            couch_log:debug("rewrite to ~p ~n", [RawPath1]),

            % build a new mochiweb request
            MochiReq1 = mochiweb_request:new(MochiReq:get(socket),
                                             MochiReq:get(method),
                                             RawPath1,
                                             MochiReq:get(version),
                                             MochiReq:get(headers)),

            % cleanup, It force mochiweb to reparse raw uri.
            MochiReq1:cleanup(),

            chttpd:handle_request(MochiReq1)
        end.

quote_plus({bind, X}) ->
    mochiweb_util:quote_plus(X);
quote_plus(X) ->
    mochiweb_util:quote_plus(X).

%% @doc Try to find a rule matching current url. If none is found
%% 404 error not_found is raised
try_bind_path([], _Method, _PathParts, _QueryList) ->
    no_dispatch_path;
try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) ->
    [{PathParts1, Method1}, RedirectPath, QueryArgs, Formats] = Dispatch,
    case bind_method(Method1, Method) of
        true ->
            case bind_path(PathParts1, PathParts, []) of
                {ok, Remaining, Bindings} ->
                    Bindings1 = Bindings ++ QueryList,
                    % we parse query args from the rule and fill
                    % it eventually with bindings vars
                    QueryArgs1 = make_query_list(QueryArgs, Bindings1,
                        Formats, []),
                    % remove params in QueryLists1 that are already in
                    % QueryArgs1
                    Bindings2 = lists:foldl(fun({K, V}, Acc) ->
                        K1 = to_binding(K),
                        KV = case couch_util:get_value(K1, QueryArgs1) of
                            undefined -> [{K1, V}];
                            _V1 -> []
                        end,
                        Acc ++ KV
                    end, [], Bindings1),

                    FinalBindings = Bindings2 ++ QueryArgs1,
                    NewPathParts = make_new_path(RedirectPath, FinalBindings,
                                    Remaining, []),
                    {NewPathParts, FinalBindings};
                fail ->
                    try_bind_path(Rest, Method, PathParts, QueryList)
            end;
        false ->
            try_bind_path(Rest, Method, PathParts, QueryList)
    end.

%% rewriting dynamically the quey list given as query member in
%% rewrites. Each value is replaced by one binding or an argument
%% passed in url.
make_query_list([], _Bindings, _Formats, Acc) ->
    Acc;
make_query_list([{Key, {Value}}|Rest], Bindings, Formats, Acc) ->
    Value1 = {Value},
    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_binary(Value) ->
    Value1 = replace_var(Value, Bindings, Formats),
    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_list(Value) ->
    Value1 = replace_var(Value, Bindings, Formats),
    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) ->
    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value}|Acc]).

replace_var(<<"*">>=Value, Bindings, Formats) ->
    get_var(Value, Bindings, Value, Formats);
replace_var(<<":", Var/binary>> = Value, Bindings, Formats) ->
    get_var(Var, Bindings, Value, Formats);
replace_var(Value, _Bindings, _Formats) when is_binary(Value) ->
    Value;
replace_var(Value, Bindings, Formats) when is_list(Value) ->
    lists:reverse(lists:foldl(fun
                (<<":", Var/binary>>=Value1, Acc) ->
                    [get_var(Var, Bindings, Value1, Formats)|Acc];
                (Value1, Acc) ->
                    [Value1|Acc]
            end, [], Value));
replace_var(Value, _Bindings, _Formats) ->
    Value.

maybe_json(Key, Value) ->
    case lists:member(Key, [<<"key">>, <<"startkey">>, <<"start_key">>,
                <<"endkey">>, <<"end_key">>, <<"keys">>]) of
        true ->
            ?JSON_ENCODE(Value);
        false ->
            Value
    end.

get_var(VarName, Props, Default, Formats) ->
    VarName1 = to_binding(VarName),
    Val = couch_util:get_value(VarName1, Props, Default),
    maybe_format(VarName, Val, Formats).

maybe_format(VarName, Value, Formats) ->
    case couch_util:get_value(VarName, Formats) of
        undefined ->
             Value;
        Format ->
            format(Format, Value)
    end.

format(<<"int">>, Value) when is_integer(Value) ->
    Value;
format(<<"int">>, Value) when is_binary(Value) ->
    format(<<"int">>, ?b2l(Value));
format(<<"int">>, Value) when is_list(Value) ->
    case (catch list_to_integer(Value)) of
        IntVal when is_integer(IntVal) ->
            IntVal;
        _ ->
            Value
    end;
format(<<"bool">>, Value) when is_binary(Value) ->
    format(<<"bool">>, ?b2l(Value));
format(<<"bool">>, Value) when is_list(Value) ->
    case string:to_lower(Value) of
        "true" -> true;
        "false" -> false;
        _ -> Value
    end;
format(_Format, Value) ->
   Value.

%% doc: build new patch from bindings. bindings are query args
%% (+ dynamic query rewritten if needed) and bindings found in
%% bind_path step.
make_new_path([], _Bindings, _Remaining, Acc) ->
    lists:reverse(Acc);
make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) ->
    Acc1 = lists:reverse(Acc) ++ Remaining,
    Acc1;
make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) ->
    Acc1 = lists:reverse(Acc) ++ Remaining,
    Acc1;
make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) ->
    P2 = case couch_util:get_value({bind, P}, Bindings) of
        undefined -> << "undefined">>;
        P1 ->
            iolist_to_binary(P1)
    end,
    make_new_path(Rest, Bindings, Remaining, [P2|Acc]);
make_new_path([P|Rest], Bindings, Remaining, Acc) ->
    make_new_path(Rest, Bindings, Remaining, [P|Acc]).


%% @doc If method of the query fith the rule method. If the
%% method rule is '*', which is the default, all
%% request method will bind. It allows us to make rules
%% depending on HTTP method.
bind_method(?MATCH_ALL, _Method) ->
    true;
bind_method({bind, Method}, Method) ->
    true;
bind_method(_, _) ->
    false.


%% @doc bind path. Using the rule from we try to bind variables given
%% to the current url by pattern matching
bind_path([], [], Bindings) ->
    {ok, [], Bindings};
bind_path([?MATCH_ALL], Rest, Bindings) when is_list(Rest) ->
    {ok, Rest, Bindings};
bind_path(_, [], _) ->
    fail;
bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) ->
    bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]);
bind_path([Token|RestToken], [Token|RestMatch], Bindings) ->
    bind_path(RestToken, RestMatch, Bindings);
bind_path(_, _, _) ->
    fail.


%% normalize path.
normalize_path(Path)  ->
    "/" ++ string:join(normalize_path1(string:tokens(Path,
                "/"), []), [?SEPARATOR]).


normalize_path1([], Acc) ->
    lists:reverse(Acc);
normalize_path1([".."|Rest], Acc) ->
    Acc1 = case Acc of
        [] -> [".."|Acc];
        [T|_] when T =:= ".." -> [".."|Acc];
        [_|R] -> R
    end,
    normalize_path1(Rest, Acc1);
normalize_path1(["."|Rest], Acc) ->
    normalize_path1(Rest, Acc);
normalize_path1([Path|Rest], Acc) ->
    normalize_path1(Rest, [Path|Acc]).


%% @doc transform json rule in erlang for pattern matching
make_rule(Rule) ->
    Method = case couch_util:get_value(<<"method">>, Rule) of
        undefined -> ?MATCH_ALL;
        M -> to_binding(M)
    end,
    QueryArgs = case couch_util:get_value(<<"query">>, Rule) of
        undefined -> [];
        {Args} -> Args
        end,
    FromParts  = case couch_util:get_value(<<"from">>, Rule) of
        undefined -> [?MATCH_ALL];
        From ->
            parse_path(From)
        end,
    ToParts  = case couch_util:get_value(<<"to">>, Rule) of
        undefined ->
            throw({error, invalid_rewrite_target});
        To ->
            parse_path(To)
        end,
    Formats = case couch_util:get_value(<<"formats">>, Rule) of
        undefined -> [];
        {Fmts} -> Fmts
    end,
    [{FromParts, Method}, ToParts, QueryArgs, Formats].

parse_path(Path) ->
    {ok, SlashRE} = re:compile(<<"\\/">>),
    path_to_list(re:split(Path, SlashRE), [], 0).

%% @doc convert a path rule (from or to) to an erlang list
%% * and path variable starting by ":" are converted
%% in erlang atom.
path_to_list([], Acc, _DotDotCount) ->
    lists:reverse(Acc);
path_to_list([<<>>|R], Acc, DotDotCount) ->
    path_to_list(R, Acc, DotDotCount);
path_to_list([<<"*">>|R], Acc, DotDotCount) ->
    path_to_list(R, [?MATCH_ALL|Acc], DotDotCount);
path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 ->
    case config:get("httpd", "secure_rewrites", "true") of
    "false" ->
        path_to_list(R, [<<"..">>|Acc], DotDotCount+1);
    _Else ->
        couch_log:notice("insecure_rewrite_rule ~p blocked", [lists:reverse(Acc) ++ [<<"..">>] ++ R]),
        throw({insecure_rewrite_rule, "too many ../.. segments"})
    end;
path_to_list([<<"..">>|R], Acc, DotDotCount) ->
    path_to_list(R, [<<"..">>|Acc], DotDotCount+1);
path_to_list([P|R], Acc, DotDotCount) ->
    P1 = case P of
        <<":", Var/binary>> ->
            to_binding(Var);
        _ -> P
    end,
    path_to_list(R, [P1|Acc], DotDotCount).

maybe_encode_bindings([]) ->
    [];
maybe_encode_bindings(Props) ->
    lists:foldl(fun
            ({{bind, <<"*">>}, _V}, Acc) ->
                Acc;
            ({{bind, K}, V}, Acc) ->
                V1 = iolist_to_binary(maybe_json(K, V)),
                [{K, V1}|Acc]
        end, [], Props).

decode_query_value({K,V}) ->
    case lists:member(K, ["key", "startkey", "start_key",
                "endkey", "end_key", "keys"]) of
        true ->
            {to_binding(K), ?JSON_DECODE(V)};
        false ->
            {to_binding(K), ?l2b(V)}
    end.

to_binding({bind, V}) ->
    {bind, V};
to_binding(V) when is_list(V) ->
    to_binding(?l2b(V));
to_binding(V) ->
    {bind, V}.
