| %% @author Bob Ippolito <bob@mochimedia.com> |
| %% @copyright 2007 Mochi Media, Inc. |
| |
| %% @doc Utilities for parsing and quoting. |
| |
| -module(mochiweb_util). |
| -author('bob@mochimedia.com'). |
| -export([join/2, quote_plus/1, urlencode/1, parse_qs/1, unquote/1]). |
| -export([path_split/1]). |
| -export([urlsplit/1, urlsplit_path/1, urlunsplit/1, urlunsplit_path/1]). |
| -export([guess_mime/1, parse_header/1]). |
| -export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2]). |
| -export([record_to_proplist/2, record_to_proplist/3]). |
| -export([safe_relative_path/1, partition/2]). |
| -export([test/0]). |
| |
| -define(PERCENT, 37). % $\% |
| -define(FULLSTOP, 46). % $\. |
| -define(IS_HEX(C), ((C >= $0 andalso C =< $9) orelse |
| (C >= $a andalso C =< $f) orelse |
| (C >= $A andalso C =< $F))). |
| -define(QS_SAFE(C), ((C >= $a andalso C =< $z) orelse |
| (C >= $A andalso C =< $Z) orelse |
| (C >= $0 andalso C =< $9) orelse |
| (C =:= ?FULLSTOP orelse C =:= $- orelse C =:= $~ orelse |
| C =:= $_))). |
| |
| hexdigit(C) when C < 10 -> $0 + C; |
| hexdigit(C) when C < 16 -> $A + (C - 10). |
| |
| unhexdigit(C) when C >= $0, C =< $9 -> C - $0; |
| unhexdigit(C) when C >= $a, C =< $f -> C - $a + 10; |
| unhexdigit(C) when C >= $A, C =< $F -> C - $A + 10. |
| |
| %% @spec partition(String, Sep) -> {String, [], []} | {Prefix, Sep, Postfix} |
| %% @doc Inspired by Python 2.5's str.partition: |
| %% partition("foo/bar", "/") = {"foo", "/", "bar"}, |
| %% partition("foo", "/") = {"foo", "", ""}. |
| partition(String, Sep) -> |
| case partition(String, Sep, []) of |
| undefined -> |
| {String, "", ""}; |
| Result -> |
| Result |
| end. |
| |
| partition("", _Sep, _Acc) -> |
| undefined; |
| partition(S, Sep, Acc) -> |
| case partition2(S, Sep) of |
| undefined -> |
| [C | Rest] = S, |
| partition(Rest, Sep, [C | Acc]); |
| Rest -> |
| {lists:reverse(Acc), Sep, Rest} |
| end. |
| |
| partition2(Rest, "") -> |
| Rest; |
| partition2([C | R1], [C | R2]) -> |
| partition2(R1, R2); |
| partition2(_S, _Sep) -> |
| undefined. |
| |
| |
| |
| %% @spec safe_relative_path(string()) -> string() | undefined |
| %% @doc Return the reduced version of a relative path or undefined if it |
| %% is not safe. safe relative paths can be joined with an absolute path |
| %% and will result in a subdirectory of the absolute path. |
| safe_relative_path("/" ++ _) -> |
| undefined; |
| safe_relative_path(P) -> |
| safe_relative_path(P, []). |
| |
| safe_relative_path("", Acc) -> |
| case Acc of |
| [] -> |
| ""; |
| _ -> |
| join(lists:reverse(Acc), "/") |
| end; |
| safe_relative_path(P, Acc) -> |
| case partition(P, "/") of |
| {"", "/", _} -> |
| %% /foo or foo//bar |
| undefined; |
| {"..", _, _} when Acc =:= [] -> |
| undefined; |
| {"..", _, Rest} -> |
| safe_relative_path(Rest, tl(Acc)); |
| {Part, "/", ""} -> |
| safe_relative_path("", ["", Part | Acc]); |
| {Part, _, Rest} -> |
| safe_relative_path(Rest, [Part | Acc]) |
| end. |
| |
| %% @spec shell_quote(string()) -> string() |
| %% @doc Quote a string according to UNIX shell quoting rules, returns a string |
| %% surrounded by double quotes. |
| shell_quote(L) -> |
| shell_quote(L, [$\"]). |
| |
| %% @spec cmd_port([string()], Options) -> port() |
| %% @doc open_port({spawn, mochiweb_util:cmd_string(Argv)}, Options). |
| cmd_port(Argv, Options) -> |
| open_port({spawn, cmd_string(Argv)}, Options). |
| |
| %% @spec cmd([string()]) -> string() |
| %% @doc os:cmd(cmd_string(Argv)). |
| cmd(Argv) -> |
| os:cmd(cmd_string(Argv)). |
| |
| %% @spec cmd_string([string()]) -> string() |
| %% @doc Create a shell quoted command string from a list of arguments. |
| cmd_string(Argv) -> |
| join([shell_quote(X) || X <- Argv], " "). |
| |
| %% @spec join([string()], Separator) -> string() |
| %% @doc Join a list of strings together with the given separator |
| %% string or char. |
| join([], _Separator) -> |
| []; |
| join([S], _Separator) -> |
| lists:flatten(S); |
| join(Strings, Separator) -> |
| lists:flatten(revjoin(lists:reverse(Strings), Separator, [])). |
| |
| revjoin([], _Separator, Acc) -> |
| Acc; |
| revjoin([S | Rest], Separator, []) -> |
| revjoin(Rest, Separator, [S]); |
| revjoin([S | Rest], Separator, Acc) -> |
| revjoin(Rest, Separator, [S, Separator | Acc]). |
| |
| %% @spec quote_plus(atom() | integer() | float() | string() | binary()) -> string() |
| %% @doc URL safe encoding of the given term. |
| quote_plus(Atom) when is_atom(Atom) -> |
| quote_plus(atom_to_list(Atom)); |
| quote_plus(Int) when is_integer(Int) -> |
| quote_plus(integer_to_list(Int)); |
| quote_plus(Binary) when is_binary(Binary) -> |
| quote_plus(binary_to_list(Binary)); |
| quote_plus(Float) when is_float(Float) -> |
| quote_plus(mochinum:digits(Float)); |
| quote_plus(String) -> |
| quote_plus(String, []). |
| |
| quote_plus([], Acc) -> |
| lists:reverse(Acc); |
| quote_plus([C | Rest], Acc) when ?QS_SAFE(C) -> |
| quote_plus(Rest, [C | Acc]); |
| quote_plus([$\s | Rest], Acc) -> |
| quote_plus(Rest, [$+ | Acc]); |
| quote_plus([C | Rest], Acc) -> |
| <<Hi:4, Lo:4>> = <<C>>, |
| quote_plus(Rest, [hexdigit(Lo), hexdigit(Hi), ?PERCENT | Acc]). |
| |
| %% @spec urlencode([{Key, Value}]) -> string() |
| %% @doc URL encode the property list. |
| urlencode(Props) -> |
| RevPairs = lists:foldl(fun ({K, V}, Acc) -> |
| [[quote_plus(K), $=, quote_plus(V)] | Acc] |
| end, [], Props), |
| lists:flatten(revjoin(RevPairs, $&, [])). |
| |
| %% @spec parse_qs(string() | binary()) -> [{Key, Value}] |
| %% @doc Parse a query string or application/x-www-form-urlencoded. |
| parse_qs(Binary) when is_binary(Binary) -> |
| parse_qs(binary_to_list(Binary)); |
| parse_qs(String) -> |
| parse_qs(String, []). |
| |
| parse_qs([], Acc) -> |
| lists:reverse(Acc); |
| parse_qs(String, Acc) -> |
| {Key, Rest} = parse_qs_key(String), |
| {Value, Rest1} = parse_qs_value(Rest), |
| parse_qs(Rest1, [{Key, Value} | Acc]). |
| |
| parse_qs_key(String) -> |
| parse_qs_key(String, []). |
| |
| parse_qs_key([], Acc) -> |
| {qs_revdecode(Acc), ""}; |
| parse_qs_key([$= | Rest], Acc) -> |
| {qs_revdecode(Acc), Rest}; |
| parse_qs_key(Rest=[$; | _], Acc) -> |
| {qs_revdecode(Acc), Rest}; |
| parse_qs_key(Rest=[$& | _], Acc) -> |
| {qs_revdecode(Acc), Rest}; |
| parse_qs_key([C | Rest], Acc) -> |
| parse_qs_key(Rest, [C | Acc]). |
| |
| parse_qs_value(String) -> |
| parse_qs_value(String, []). |
| |
| parse_qs_value([], Acc) -> |
| {qs_revdecode(Acc), ""}; |
| parse_qs_value([$; | Rest], Acc) -> |
| {qs_revdecode(Acc), Rest}; |
| parse_qs_value([$& | Rest], Acc) -> |
| {qs_revdecode(Acc), Rest}; |
| parse_qs_value([C | Rest], Acc) -> |
| parse_qs_value(Rest, [C | Acc]). |
| |
| %% @spec unquote(string() | binary()) -> string() |
| %% @doc Unquote a URL encoded string. |
| unquote(Binary) when is_binary(Binary) -> |
| unquote(binary_to_list(Binary)); |
| unquote(String) -> |
| qs_revdecode(lists:reverse(String)). |
| |
| qs_revdecode(S) -> |
| qs_revdecode(S, []). |
| |
| qs_revdecode([], Acc) -> |
| Acc; |
| qs_revdecode([$+ | Rest], Acc) -> |
| qs_revdecode(Rest, [$\s | Acc]); |
| qs_revdecode([Lo, Hi, ?PERCENT | Rest], Acc) when ?IS_HEX(Lo), ?IS_HEX(Hi) -> |
| qs_revdecode(Rest, [(unhexdigit(Lo) bor (unhexdigit(Hi) bsl 4)) | Acc]); |
| qs_revdecode([C | Rest], Acc) -> |
| qs_revdecode(Rest, [C | Acc]). |
| |
| %% @spec urlsplit(Url) -> {Scheme, Netloc, Path, Query, Fragment} |
| %% @doc Return a 5-tuple, does not expand % escapes. Only supports HTTP style |
| %% URLs. |
| urlsplit(Url) -> |
| {Scheme, Url1} = urlsplit_scheme(Url), |
| {Netloc, Url2} = urlsplit_netloc(Url1), |
| {Path, Query, Fragment} = urlsplit_path(Url2), |
| {Scheme, Netloc, Path, Query, Fragment}. |
| |
| urlsplit_scheme(Url) -> |
| urlsplit_scheme(Url, []). |
| |
| urlsplit_scheme([], Acc) -> |
| {"", lists:reverse(Acc)}; |
| urlsplit_scheme(":" ++ Rest, Acc) -> |
| {string:to_lower(lists:reverse(Acc)), Rest}; |
| urlsplit_scheme([C | Rest], Acc) -> |
| urlsplit_scheme(Rest, [C | Acc]). |
| |
| urlsplit_netloc("//" ++ Rest) -> |
| urlsplit_netloc(Rest, []); |
| urlsplit_netloc(Path) -> |
| {"", Path}. |
| |
| urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# -> |
| {lists:reverse(Acc), Rest}; |
| urlsplit_netloc([C | Rest], Acc) -> |
| urlsplit_netloc(Rest, [C | Acc]). |
| |
| |
| %% @spec path_split(string()) -> {Part, Rest} |
| %% @doc Split a path starting from the left, as in URL traversal. |
| %% path_split("foo/bar") = {"foo", "bar"}, |
| %% path_split("/foo/bar") = {"", "foo/bar"}. |
| path_split(S) -> |
| path_split(S, []). |
| |
| path_split("", Acc) -> |
| {lists:reverse(Acc), ""}; |
| path_split("/" ++ Rest, Acc) -> |
| {lists:reverse(Acc), Rest}; |
| path_split([C | Rest], Acc) -> |
| path_split(Rest, [C | Acc]). |
| |
| |
| %% @spec urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> string() |
| %% @doc Assemble a URL from the 5-tuple. Path must be absolute. |
| urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> |
| lists:flatten([case Scheme of "" -> ""; _ -> [Scheme, "://"] end, |
| Netloc, |
| urlunsplit_path({Path, Query, Fragment})]). |
| |
| %% @spec urlunsplit_path({Path, Query, Fragment}) -> string() |
| %% @doc Assemble a URL path from the 3-tuple. |
| urlunsplit_path({Path, Query, Fragment}) -> |
| lists:flatten([Path, |
| case Query of "" -> ""; _ -> [$? | Query] end, |
| case Fragment of "" -> ""; _ -> [$# | Fragment] end]). |
| |
| %% @spec urlsplit_path(Url) -> {Path, Query, Fragment} |
| %% @doc Return a 3-tuple, does not expand % escapes. Only supports HTTP style |
| %% paths. |
| urlsplit_path(Path) -> |
| urlsplit_path(Path, []). |
| |
| urlsplit_path("", Acc) -> |
| {lists:reverse(Acc), "", ""}; |
| urlsplit_path("?" ++ Rest, Acc) -> |
| {Query, Fragment} = urlsplit_query(Rest), |
| {lists:reverse(Acc), Query, Fragment}; |
| urlsplit_path("#" ++ Rest, Acc) -> |
| {lists:reverse(Acc), "", Rest}; |
| urlsplit_path([C | Rest], Acc) -> |
| urlsplit_path(Rest, [C | Acc]). |
| |
| urlsplit_query(Query) -> |
| urlsplit_query(Query, []). |
| |
| urlsplit_query("", Acc) -> |
| {lists:reverse(Acc), ""}; |
| urlsplit_query("#" ++ Rest, Acc) -> |
| {lists:reverse(Acc), Rest}; |
| urlsplit_query([C | Rest], Acc) -> |
| urlsplit_query(Rest, [C | Acc]). |
| |
| %% @spec guess_mime(string()) -> string() |
| %% @doc Guess the mime type of a file by the extension of its filename. |
| guess_mime(File) -> |
| case filename:extension(File) of |
| ".html" -> |
| "text/html"; |
| ".xhtml" -> |
| "application/xhtml+xml"; |
| ".xml" -> |
| "application/xml"; |
| ".css" -> |
| "text/css"; |
| ".js" -> |
| "application/x-javascript"; |
| ".jpg" -> |
| "image/jpeg"; |
| ".gif" -> |
| "image/gif"; |
| ".png" -> |
| "image/png"; |
| ".swf" -> |
| "application/x-shockwave-flash"; |
| ".zip" -> |
| "application/zip"; |
| ".bz2" -> |
| "application/x-bzip2"; |
| ".gz" -> |
| "application/x-gzip"; |
| ".tar" -> |
| "application/x-tar"; |
| ".tgz" -> |
| "application/x-gzip"; |
| ".txt" -> |
| "text/plain"; |
| ".doc" -> |
| "application/msword"; |
| ".pdf" -> |
| "application/pdf"; |
| ".xls" -> |
| "application/vnd.ms-excel"; |
| ".rtf" -> |
| "application/rtf"; |
| ".mov" -> |
| "video/quicktime"; |
| ".mp3" -> |
| "audio/mpeg"; |
| ".z" -> |
| "application/x-compress"; |
| ".wav" -> |
| "audio/x-wav"; |
| ".ico" -> |
| "image/x-icon"; |
| ".bmp" -> |
| "image/bmp"; |
| ".m4a" -> |
| "audio/mpeg"; |
| ".m3u" -> |
| "audio/x-mpegurl"; |
| ".exe" -> |
| "application/octet-stream"; |
| ".csv" -> |
| "text/csv"; |
| _ -> |
| "text/plain" |
| end. |
| |
| %% @spec parse_header(string()) -> {Type, [{K, V}]} |
| %% @doc Parse a Content-Type like header, return the main Content-Type |
| %% and a property list of options. |
| parse_header(String) -> |
| %% TODO: This is exactly as broken as Python's cgi module. |
| %% Should parse properly like mochiweb_cookies. |
| [Type | Parts] = [string:strip(S) || S <- string:tokens(String, ";")], |
| F = fun (S, Acc) -> |
| case lists:splitwith(fun (C) -> C =/= $= end, S) of |
| {"", _} -> |
| %% Skip anything with no name |
| Acc; |
| {_, ""} -> |
| %% Skip anything with no value |
| Acc; |
| {Name, [$\= | Value]} -> |
| [{string:to_lower(string:strip(Name)), |
| unquote_header(string:strip(Value))} | Acc] |
| end |
| end, |
| {string:to_lower(Type), |
| lists:foldr(F, [], Parts)}. |
| |
| unquote_header("\"" ++ Rest) -> |
| unquote_header(Rest, []); |
| unquote_header(S) -> |
| S. |
| |
| unquote_header("", Acc) -> |
| lists:reverse(Acc); |
| unquote_header("\"", Acc) -> |
| lists:reverse(Acc); |
| unquote_header([$\\, C | Rest], Acc) -> |
| unquote_header(Rest, [C | Acc]); |
| unquote_header([C | Rest], Acc) -> |
| unquote_header(Rest, [C | Acc]). |
| |
| %% @spec record_to_proplist(Record, Fields) -> proplist() |
| %% @doc calls record_to_proplist/3 with a default TypeKey of '__record' |
| record_to_proplist(Record, Fields) -> |
| record_to_proplist(Record, Fields, '__record'). |
| |
| %% @spec record_to_proplist(Record, Fields, TypeKey) -> proplist() |
| %% @doc Return a proplist of the given Record with each field in the |
| %% Fields list set as a key with the corresponding value in the Record. |
| %% TypeKey is the key that is used to store the record type |
| %% Fields should be obtained by calling record_info(fields, record_type) |
| %% where record_type is the record type of Record |
| record_to_proplist(Record, Fields, TypeKey) |
| when is_tuple(Record), |
| is_list(Fields), |
| size(Record) - 1 =:= length(Fields) -> |
| lists:zip([TypeKey | Fields], tuple_to_list(Record)). |
| |
| |
| shell_quote([], Acc) -> |
| lists:reverse([$\" | Acc]); |
| shell_quote([C | Rest], Acc) when C =:= $\" orelse C =:= $\` orelse |
| C =:= $\\ orelse C =:= $\$ -> |
| shell_quote(Rest, [C, $\\ | Acc]); |
| shell_quote([C | Rest], Acc) -> |
| shell_quote(Rest, [C | Acc]). |
| |
| test() -> |
| test_join(), |
| test_quote_plus(), |
| test_unquote(), |
| test_urlencode(), |
| test_parse_qs(), |
| test_urlsplit_path(), |
| test_urlunsplit_path(), |
| test_urlsplit(), |
| test_urlunsplit(), |
| test_path_split(), |
| test_guess_mime(), |
| test_parse_header(), |
| test_shell_quote(), |
| test_cmd(), |
| test_cmd_string(), |
| test_partition(), |
| test_safe_relative_path(), |
| ok. |
| |
| test_shell_quote() -> |
| "\"foo \\$bar\\\"\\`' baz\"" = shell_quote("foo $bar\"`' baz"), |
| ok. |
| |
| test_cmd() -> |
| "$bling$ `word`!\n" = cmd(["echo", "$bling$ `word`!"]), |
| ok. |
| |
| test_cmd_string() -> |
| "\"echo\" \"\\$bling\\$ \\`word\\`!\"" = cmd_string(["echo", "$bling$ `word`!"]), |
| ok. |
| |
| test_parse_header() -> |
| {"multipart/form-data", [{"boundary", "AaB03x"}]} = |
| parse_header("multipart/form-data; boundary=AaB03x"), |
| ok. |
| |
| test_guess_mime() -> |
| "text/plain" = guess_mime(""), |
| "text/plain" = guess_mime(".text"), |
| "application/zip" = guess_mime(".zip"), |
| "application/zip" = guess_mime("x.zip"), |
| "text/html" = guess_mime("x.html"), |
| "application/xhtml+xml" = guess_mime("x.xhtml"), |
| ok. |
| |
| test_path_split() -> |
| {"", "foo/bar"} = path_split("/foo/bar"), |
| {"foo", "bar"} = path_split("foo/bar"), |
| {"bar", ""} = path_split("bar"), |
| ok. |
| |
| test_urlsplit() -> |
| {"", "", "/foo", "", "bar?baz"} = urlsplit("/foo#bar?baz"), |
| {"http", "host:port", "/foo", "", "bar?baz"} = |
| urlsplit("http://host:port/foo#bar?baz"), |
| ok. |
| |
| test_urlsplit_path() -> |
| {"/foo/bar", "", ""} = urlsplit_path("/foo/bar"), |
| {"/foo", "baz", ""} = urlsplit_path("/foo?baz"), |
| {"/foo", "", "bar?baz"} = urlsplit_path("/foo#bar?baz"), |
| {"/foo", "", "bar?baz#wibble"} = urlsplit_path("/foo#bar?baz#wibble"), |
| {"/foo", "bar", "baz"} = urlsplit_path("/foo?bar#baz"), |
| {"/foo", "bar?baz", "baz"} = urlsplit_path("/foo?bar?baz#baz"), |
| ok. |
| |
| test_urlunsplit() -> |
| "/foo#bar?baz" = urlunsplit({"", "", "/foo", "", "bar?baz"}), |
| "http://host:port/foo#bar?baz" = |
| urlunsplit({"http", "host:port", "/foo", "", "bar?baz"}), |
| ok. |
| |
| test_urlunsplit_path() -> |
| "/foo/bar" = urlunsplit_path({"/foo/bar", "", ""}), |
| "/foo?baz" = urlunsplit_path({"/foo", "baz", ""}), |
| "/foo#bar?baz" = urlunsplit_path({"/foo", "", "bar?baz"}), |
| "/foo#bar?baz#wibble" = urlunsplit_path({"/foo", "", "bar?baz#wibble"}), |
| "/foo?bar#baz" = urlunsplit_path({"/foo", "bar", "baz"}), |
| "/foo?bar?baz#baz" = urlunsplit_path({"/foo", "bar?baz", "baz"}), |
| ok. |
| |
| test_join() -> |
| "foo,bar,baz" = join(["foo", "bar", "baz"], $,), |
| "foo,bar,baz" = join(["foo", "bar", "baz"], ","), |
| "foo bar" = join([["foo", " bar"]], ","), |
| "foo bar,baz" = join([["foo", " bar"], "baz"], ","), |
| "foo" = join(["foo"], ","), |
| "foobarbaz" = join(["foo", "bar", "baz"], ""), |
| ok. |
| |
| test_quote_plus() -> |
| "foo" = quote_plus(foo), |
| "1" = quote_plus(1), |
| "1.1" = quote_plus(1.1), |
| "foo" = quote_plus("foo"), |
| "foo+bar" = quote_plus("foo bar"), |
| "foo%0A" = quote_plus("foo\n"), |
| "foo%0A" = quote_plus("foo\n"), |
| "foo%3B%26%3D" = quote_plus("foo;&="), |
| ok. |
| |
| test_unquote() -> |
| "foo bar" = unquote("foo+bar"), |
| "foo bar" = unquote("foo%20bar"), |
| "foo\r\n" = unquote("foo%0D%0A"), |
| ok. |
| |
| test_urlencode() -> |
| "foo=bar&baz=wibble+%0D%0A&z=1" = urlencode([{foo, "bar"}, |
| {"baz", "wibble \r\n"}, |
| {z, 1}]), |
| ok. |
| |
| test_parse_qs() -> |
| [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}] = |
| parse_qs("foo=bar&baz=wibble+%0D%0A&z=1"), |
| ok. |
| |
| test_partition() -> |
| {"foo", "", ""} = partition("foo", "/"), |
| {"foo", "/", "bar"} = partition("foo/bar", "/"), |
| {"foo", "/", ""} = partition("foo/", "/"), |
| {"", "/", "bar"} = partition("/bar", "/"), |
| {"f", "oo/ba", "r"} = partition("foo/bar", "oo/ba"), |
| ok. |
| |
| test_safe_relative_path() -> |
| "foo" = safe_relative_path("foo"), |
| "foo/" = safe_relative_path("foo/"), |
| "foo" = safe_relative_path("foo/bar/.."), |
| "bar" = safe_relative_path("foo/../bar"), |
| "bar/" = safe_relative_path("foo/../bar/"), |
| "" = safe_relative_path("foo/.."), |
| "" = safe_relative_path("foo/../"), |
| undefined = safe_relative_path("/foo"), |
| undefined = safe_relative_path("../foo"), |
| undefined = safe_relative_path("foo/../.."), |
| undefined = safe_relative_path("foo//"), |
| ok. |