| # -- package RivetEntities |
| # |
| # The code is largely taken from a simple yet clever encoder/decoder of HTML entities |
| # starting from a utf-8 character string. The original code writted by Andy Goth is |
| # at http://wiki.tcl.tk/26403. Package entities extends the functionality in the |
| # original code combining it with Tcl's 'encoding' command to encode/decode from |
| # any supported encoding |
| # |
| # $Id$ |
| |
| package provide RivetEntities 1.0 |
| |
| namespace eval ::rivet { |
| # namespace ensemble create -subcommands {encode decode} |
| namespace export encode decode |
| variable utf8_entities_map { |
| \u00a0  \; \u00a1 ¡\; \u00a2 ¢\; \u00a3 £\; \u00a4 |
| ¤\; \u00a5 ¥\; \u00a6 ¦\; \u00a7 §\; \u00a8 ¨\; |
| \u00a9 ©\; \u00aa ª\; \u00ab «\; \u00ac ¬\; \u00ad |
| ­\; \u00ae ®\; \u00af ¯\; \u00b0 °\; \u00b1 ±\; |
| \u00b2 ²\; \u00b3 ³\; \u00b4 ´\; \u00b5 µ\; \u00b6 |
| ¶\; \u00b7 ·\; \u00b8 ¸\; \u00b9 ¹\; \u00ba º\; |
| \u00bb »\; \u00bc ¼\; \u00bd ½\; \u00be ¾\; |
| \u00bf ¿\; \u00c0 À\; \u00c1 Á\; \u00c2 Â\; |
| \u00c3 Ã\; \u00c4 Ä\; \u00c5 Å\; \u00c6 Æ\; \u00c7 |
| Ç\; \u00c8 È\; \u00c9 É\; \u00ca Ê\; \u00cb |
| Ë\; \u00cc Ì\; \u00cd Í\; \u00ce Î\; \u00cf |
| Ï\; \u00d0 Ð\; \u00d1 Ñ\; \u00d2 Ò\; \u00d3 |
| Ó\; \u00d4 Ô\; \u00d5 Õ\; \u00d6 Ö\; \u00d7 |
| ×\; \u00d8 Ø\; \u00d9 Ù\; \u00da Ú\; \u00db |
| Û\; \u00dc Ü\; \u00dd Ý\; \u00de Þ\; \u00df |
| ß\; \u00e0 à\; \u00e1 á\; \u00e2 â\; \u00e3 |
| ã\; \u00e4 ä\; \u00e5 å\; \u00e6 æ\; \u00e7 |
| ç\; \u00e8 è\; \u00e9 é\; \u00ea ê\; \u00eb |
| ë\; \u00ec ì\; \u00ed í\; \u00ee î\; \u00ef |
| ï\; \u00f0 ð\; \u00f1 ñ\; \u00f2 ò\; \u00f3 |
| ó\; \u00f4 ô\; \u00f5 õ\; \u00f6 ö\; \u00f7 |
| ÷\; \u00f8 ø\; \u00f9 ù\; \u00fa ú\; \u00fb |
| û\; \u00fc ü\; \u00fd ý\; \u00fe þ\; \u00ff ÿ\; |
| \u0192 &fnof\; \u0391 &Alpha\; \u0392 &Beta\; \u0393 &Gamma\; \u0394 |
| &Delta\; \u0395 &Epsilon\; \u0396 &Zeta\; \u0397 &Eta\; \u0398 &Theta\; |
| \u0399 &Iota\; \u039a &Kappa\; \u039b &Lambda\; \u039c &Mu\; \u039d |
| &Nu\; \u039e &Xi\; \u039f &Omicron\; \u03a0 &Pi\; \u03a1 &Rho\; \u03a3 |
| &Sigma\; \u03a4 &Tau\; \u03a5 &Upsilon\; \u03a6 &Phi\; \u03a7 &Chi\; |
| \u03a8 &Psi\; \u03a9 &Omega\; \u03b1 &alpha\; \u03b2 &beta\; \u03b3 |
| &gamma\; \u03b4 &delta\; \u03b5 &epsilon\; \u03b6 &zeta\; \u03b7 &eta\; |
| \u03b8 &theta\; \u03b9 &iota\; \u03ba &kappa\; \u03bb &lambda\; \u03bc |
| &mu\; \u03bd &nu\; \u03be &xi\; \u03bf &omicron\; \u03c0 &pi\; \u03c1 |
| &rho\; \u03c2 &sigmaf\; \u03c3 &sigma\; \u03c4 &tau\; \u03c5 &upsilon\; |
| \u03c6 &phi\; \u03c7 &chi\; \u03c8 &psi\; \u03c9 &omega\; \u03d1 |
| &thetasym\; \u03d2 &upsih\; \u03d6 &piv\; \u2022 &bull\; \u2026 |
| &hellip\; \u2032 &prime\; \u2033 &Prime\; \u203e &oline\; \u2044 |
| &frasl\; \u2118 &weierp\; \u2111 &image\; \u211c &real\; \u2122 |
| &trade\; \u2135 &alefsym\; \u2190 &larr\; \u2191 &uarr\; \u2192 &rarr\; |
| \u2193 &darr\; \u2194 &harr\; \u21b5 &crarr\; \u21d0 &lArr\; \u21d1 |
| &uArr\; \u21d2 &rArr\; \u21d3 &dArr\; \u21d4 &hArr\; \u2200 &forall\; |
| \u2202 &part\; \u2203 &exist\; \u2205 &empty\; \u2207 &nabla\; \u2208 |
| &isin\; \u2209 ¬in\; \u220b &ni\; \u220f &prod\; \u2211 &sum\; |
| \u2212 &minus\; \u2217 &lowast\; \u221a &radic\; \u221d &prop\; \u221e |
| &infin\; \u2220 &ang\; \u2227 &and\; \u2228 &or\; \u2229 &cap\; \u222a |
| &cup\; \u222b &int\; \u2234 &there4\; \u223c &sim\; \u2245 &cong\; |
| \u2248 &asymp\; \u2260 &ne\; \u2261 &equiv\; \u2264 &le\; \u2265 &ge\; |
| \u2282 &sub\; \u2283 &sup\; \u2284 &nsub\; \u2286 &sube\; \u2287 |
| &supe\; \u2295 &oplus\; \u2297 &otimes\; \u22a5 &perp\; \u22c5 &sdot\; |
| \u2308 &lceil\; \u2309 &rceil\; \u230a &lfloor\; \u230b &rfloor\; |
| \u2329 &lang\; \u232a &rang\; \u25ca &loz\; \u2660 &spades\; \u2663 |
| &clubs\; \u2665 &hearts\; \u2666 &diams\; \u0022 "\; \u0026 &\; |
| \u003c <\; \u003e >\; \u0152 &OElig\; \u0153 &oelig\; \u0160 |
| &Scaron\; \u0161 &scaron\; \u0178 &Yuml\; \u02c6 &circ\; \u02dc |
| &tilde\; \u2002 &ensp\; \u2003 &emsp\; \u2009 &thinsp\; \u200c &zwnj\; |
| \u200d &zwj\; \u200e &lrm\; \u200f &rlm\; \u2013 &ndash\; \u2014 |
| &mdash\; \u2018 &lsquo\; \u2019 &rsquo\; \u201a &sbquo\; \u201c |
| &ldquo\; \u201d &rdquo\; \u201e &bdquo\; \u2020 &dagger\; \u2021 |
| &Dagger\; \u2030 &permil\; \u2039 &lsaquo\; \u203a &rsaquo\; \u20ac |
| &euro\; |
| } |
| |
| variable entitities_utf8_map [lreverse $utf8_entities_map] |
| |
| proc args_processing {string mode arglist} { |
| |
| if { [dict exists $arglist -encoding] |
| && [dict get $arglist -encoding] ne "utf-8" |
| } { |
| return [encoding $mode [dict get $arglist -encoding] $string] |
| } else { |
| return $string |
| } |
| |
| } |
| |
| # -- encode |
| # |
| # input string is processed and its characters suitable to be transformed |
| # into entities are replaced with their corrisponding HTML (SGML?) entity |
| # |
| # if the input string encoding is diffrent from utf-8 the string is |
| # transformed into utf-8 and then processed for entity substitution |
| # |
| # ::rivet::encode <input_string> ?-encoding <encoding>? |
| # |
| # Arguments: |
| # |
| # * <input_string>: string whose characted must go through HTML |
| # entities expansion |
| # * <encoding>: input string character encoding (utf-8 when omitted) |
| # |
| # Returned value: |
| # |
| # - expanded string |
| # |
| # |
| |
| proc encode {string args} { |
| variable utf8_entities_map |
| return [string map $utf8_entities_map\ |
| [args_processing $string convertfrom $args]] |
| } |
| |
| # -- decode |
| # |
| # input string is converted into utf-8 and in case the final string |
| # has to be in some other encoding Tcl's command 'enconding' is |
| # invoked for final conversion |
| # |
| # ::rivet::decode <input_string> ?-encoding <encoding>? |
| # |
| # Arguments: |
| # |
| # * <input_string>: string whose HTML entities have to be reconverted |
| # in characters |
| # * <encoding>: output string character encoding (utf-8 when omitted) |
| # |
| # Returned value: |
| # |
| # - converted string |
| # |
| |
| proc decode {string args} { |
| variable entitities_utf8_map |
| return [args_processing [string map $entitities_utf8_map $string]\ |
| convertto $args] |
| } |
| } |