blob: d037d2f8c38bd61059f071b53c00199df69438b3 [file] [log] [blame]
%% @author Bob Ippolito <bob@mochimedia.com>
%% @copyright 2007 Mochi Media, Inc.
%% @doc Converts HTML 4 charrefs and entities to codepoints.
-module(mochiweb_charref).
-export([charref/1, test/0]).
%% External API.
%% @spec charref(S) -> integer() | undefined
%% @doc Convert a decimal charref, hex charref, or html entity to a unicode
%% codepoint, or return undefined on failure.
%% The input should not include an ampersand or semicolon.
%% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38.
charref(B) when is_binary(B) ->
charref(binary_to_list(B));
charref([$#, C | L]) when C =:= $x orelse C =:= $X ->
try erlang:list_to_integer(L, 16)
catch
error:badarg -> undefined
end;
charref([$# | L]) ->
try list_to_integer(L)
catch
error:badarg -> undefined
end;
charref(L) ->
entity(L).
%% @spec test() -> ok
%% @doc Run tests for mochiweb_charref.
test() ->
1234 = charref("#1234"),
255 = charref("#xfF"),
255 = charref("#XFf"),
38 = charref("amp"),
undefined = charref("not_an_entity"),
ok.
%% Internal API.
entity("nbsp") -> 160;
entity("iexcl") -> 161;
entity("cent") -> 162;
entity("pound") -> 163;
entity("curren") -> 164;
entity("yen") -> 165;
entity("brvbar") -> 166;
entity("sect") -> 167;
entity("uml") -> 168;
entity("copy") -> 169;
entity("ordf") -> 170;
entity("laquo") -> 171;
entity("not") -> 172;
entity("shy") -> 173;
entity("reg") -> 174;
entity("macr") -> 175;
entity("deg") -> 176;
entity("plusmn") -> 177;
entity("sup2") -> 178;
entity("sup3") -> 179;
entity("acute") -> 180;
entity("micro") -> 181;
entity("para") -> 182;
entity("middot") -> 183;
entity("cedil") -> 184;
entity("sup1") -> 185;
entity("ordm") -> 186;
entity("raquo") -> 187;
entity("frac14") -> 188;
entity("frac12") -> 189;
entity("frac34") -> 190;
entity("iquest") -> 191;
entity("Agrave") -> 192;
entity("Aacute") -> 193;
entity("Acirc") -> 194;
entity("Atilde") -> 195;
entity("Auml") -> 196;
entity("Aring") -> 197;
entity("AElig") -> 198;
entity("Ccedil") -> 199;
entity("Egrave") -> 200;
entity("Eacute") -> 201;
entity("Ecirc") -> 202;
entity("Euml") -> 203;
entity("Igrave") -> 204;
entity("Iacute") -> 205;
entity("Icirc") -> 206;
entity("Iuml") -> 207;
entity("ETH") -> 208;
entity("Ntilde") -> 209;
entity("Ograve") -> 210;
entity("Oacute") -> 211;
entity("Ocirc") -> 212;
entity("Otilde") -> 213;
entity("Ouml") -> 214;
entity("times") -> 215;
entity("Oslash") -> 216;
entity("Ugrave") -> 217;
entity("Uacute") -> 218;
entity("Ucirc") -> 219;
entity("Uuml") -> 220;
entity("Yacute") -> 221;
entity("THORN") -> 222;
entity("szlig") -> 223;
entity("agrave") -> 224;
entity("aacute") -> 225;
entity("acirc") -> 226;
entity("atilde") -> 227;
entity("auml") -> 228;
entity("aring") -> 229;
entity("aelig") -> 230;
entity("ccedil") -> 231;
entity("egrave") -> 232;
entity("eacute") -> 233;
entity("ecirc") -> 234;
entity("euml") -> 235;
entity("igrave") -> 236;
entity("iacute") -> 237;
entity("icirc") -> 238;
entity("iuml") -> 239;
entity("eth") -> 240;
entity("ntilde") -> 241;
entity("ograve") -> 242;
entity("oacute") -> 243;
entity("ocirc") -> 244;
entity("otilde") -> 245;
entity("ouml") -> 246;
entity("divide") -> 247;
entity("oslash") -> 248;
entity("ugrave") -> 249;
entity("uacute") -> 250;
entity("ucirc") -> 251;
entity("uuml") -> 252;
entity("yacute") -> 253;
entity("thorn") -> 254;
entity("yuml") -> 255;
entity("fnof") -> 402;
entity("Alpha") -> 913;
entity("Beta") -> 914;
entity("Gamma") -> 915;
entity("Delta") -> 916;
entity("Epsilon") -> 917;
entity("Zeta") -> 918;
entity("Eta") -> 919;
entity("Theta") -> 920;
entity("Iota") -> 921;
entity("Kappa") -> 922;
entity("Lambda") -> 923;
entity("Mu") -> 924;
entity("Nu") -> 925;
entity("Xi") -> 926;
entity("Omicron") -> 927;
entity("Pi") -> 928;
entity("Rho") -> 929;
entity("Sigma") -> 931;
entity("Tau") -> 932;
entity("Upsilon") -> 933;
entity("Phi") -> 934;
entity("Chi") -> 935;
entity("Psi") -> 936;
entity("Omega") -> 937;
entity("alpha") -> 945;
entity("beta") -> 946;
entity("gamma") -> 947;
entity("delta") -> 948;
entity("epsilon") -> 949;
entity("zeta") -> 950;
entity("eta") -> 951;
entity("theta") -> 952;
entity("iota") -> 953;
entity("kappa") -> 954;
entity("lambda") -> 955;
entity("mu") -> 956;
entity("nu") -> 957;
entity("xi") -> 958;
entity("omicron") -> 959;
entity("pi") -> 960;
entity("rho") -> 961;
entity("sigmaf") -> 962;
entity("sigma") -> 963;
entity("tau") -> 964;
entity("upsilon") -> 965;
entity("phi") -> 966;
entity("chi") -> 967;
entity("psi") -> 968;
entity("omega") -> 969;
entity("thetasym") -> 977;
entity("upsih") -> 978;
entity("piv") -> 982;
entity("bull") -> 8226;
entity("hellip") -> 8230;
entity("prime") -> 8242;
entity("Prime") -> 8243;
entity("oline") -> 8254;
entity("frasl") -> 8260;
entity("weierp") -> 8472;
entity("image") -> 8465;
entity("real") -> 8476;
entity("trade") -> 8482;
entity("alefsym") -> 8501;
entity("larr") -> 8592;
entity("uarr") -> 8593;
entity("rarr") -> 8594;
entity("darr") -> 8595;
entity("harr") -> 8596;
entity("crarr") -> 8629;
entity("lArr") -> 8656;
entity("uArr") -> 8657;
entity("rArr") -> 8658;
entity("dArr") -> 8659;
entity("hArr") -> 8660;
entity("forall") -> 8704;
entity("part") -> 8706;
entity("exist") -> 8707;
entity("empty") -> 8709;
entity("nabla") -> 8711;
entity("isin") -> 8712;
entity("notin") -> 8713;
entity("ni") -> 8715;
entity("prod") -> 8719;
entity("sum") -> 8721;
entity("minus") -> 8722;
entity("lowast") -> 8727;
entity("radic") -> 8730;
entity("prop") -> 8733;
entity("infin") -> 8734;
entity("ang") -> 8736;
entity("and") -> 8743;
entity("or") -> 8744;
entity("cap") -> 8745;
entity("cup") -> 8746;
entity("int") -> 8747;
entity("there4") -> 8756;
entity("sim") -> 8764;
entity("cong") -> 8773;
entity("asymp") -> 8776;
entity("ne") -> 8800;
entity("equiv") -> 8801;
entity("le") -> 8804;
entity("ge") -> 8805;
entity("sub") -> 8834;
entity("sup") -> 8835;
entity("nsub") -> 8836;
entity("sube") -> 8838;
entity("supe") -> 8839;
entity("oplus") -> 8853;
entity("otimes") -> 8855;
entity("perp") -> 8869;
entity("sdot") -> 8901;
entity("lceil") -> 8968;
entity("rceil") -> 8969;
entity("lfloor") -> 8970;
entity("rfloor") -> 8971;
entity("lang") -> 9001;
entity("rang") -> 9002;
entity("loz") -> 9674;
entity("spades") -> 9824;
entity("clubs") -> 9827;
entity("hearts") -> 9829;
entity("diams") -> 9830;
entity("quot") -> 34;
entity("amp") -> 38;
entity("lt") -> 60;
entity("gt") -> 62;
entity("OElig") -> 338;
entity("oelig") -> 339;
entity("Scaron") -> 352;
entity("scaron") -> 353;
entity("Yuml") -> 376;
entity("circ") -> 710;
entity("tilde") -> 732;
entity("ensp") -> 8194;
entity("emsp") -> 8195;
entity("thinsp") -> 8201;
entity("zwnj") -> 8204;
entity("zwj") -> 8205;
entity("lrm") -> 8206;
entity("rlm") -> 8207;
entity("ndash") -> 8211;
entity("mdash") -> 8212;
entity("lsquo") -> 8216;
entity("rsquo") -> 8217;
entity("sbquo") -> 8218;
entity("ldquo") -> 8220;
entity("rdquo") -> 8221;
entity("bdquo") -> 8222;
entity("dagger") -> 8224;
entity("Dagger") -> 8225;
entity("permil") -> 8240;
entity("lsaquo") -> 8249;
entity("rsaquo") -> 8250;
entity("euro") -> 8364;
entity(_) -> undefined.