/** | |
* @fileoverview | |
* Registers a language handler for Haskell. | |
* | |
* | |
* To use, include prettify.js and this file in your HTML page. | |
* Then put your code in an HTML tag like | |
* <pre class="prettyprint lang-hs">(my lisp code)</pre> | |
* The lang-cl class identifies the language as common lisp. | |
* This file supports the following language extensions: | |
* lang-cl - Common Lisp | |
* lang-el - Emacs Lisp | |
* lang-lisp - Lisp | |
* lang-scm - Scheme | |
* | |
* | |
* I used http://www.informatik.uni-freiburg.de/~thiemann/haskell/haskell98-report-html/syntax-iso.html | |
* as the basis, but ignore the way the ncomment production nests since this | |
* makes the lexical grammar irregular. It might be possible to support | |
* ncomments using the lookbehind filter. | |
* | |
* | |
* @author mikesamuel@gmail.com | |
*/ | |
PR.registerLangHandler( | |
PR.createSimpleLexer( | |
[ | |
// Whitespace | |
// whitechar -> newline | vertab | space | tab | uniWhite | |
// newline -> return linefeed | return | linefeed | formfeed | |
[PR.PR_PLAIN, /^[\t\n\x0B\x0C\r ]+/, null, '\t\n\x0B\x0C\r '], | |
// Single line double and single-quoted strings. | |
// char -> ' (graphic<' | \> | space | escape<\&>) ' | |
// string -> " {graphic<" | \> | space | escape | gap}" | |
// escape -> \ ( charesc | ascii | decimal | o octal | |
// | x hexadecimal ) | |
// charesc -> a | b | f | n | r | t | v | \ | " | ' | & | |
[PR.PR_STRING, /^\"(?:[^\"\\\n\x0C\r]|\\[\s\S])*(?:\"|$)/, | |
null, '"'], | |
[PR.PR_STRING, /^\'(?:[^\'\\\n\x0C\r]|\\[^&])\'?/, | |
null, "'"], | |
// decimal -> digit{digit} | |
// octal -> octit{octit} | |
// hexadecimal -> hexit{hexit} | |
// integer -> decimal | |
// | 0o octal | 0O octal | |
// | 0x hexadecimal | 0X hexadecimal | |
// float -> decimal . decimal [exponent] | |
// | decimal exponent | |
// exponent -> (e | E) [+ | -] decimal | |
[PR.PR_LITERAL, | |
/^(?:0o[0-7]+|0x[\da-f]+|\d+(?:\.\d+)?(?:e[+\-]?\d+)?)/i, | |
null, '0123456789'] | |
], | |
[ | |
// Haskell does not have a regular lexical grammar due to the nested | |
// ncomment. | |
// comment -> dashes [ any<symbol> {any}] newline | |
// ncomment -> opencom ANYseq {ncomment ANYseq}closecom | |
// dashes -> '--' {'-'} | |
// opencom -> '{-' | |
// closecom -> '-}' | |
[PR.PR_COMMENT, /^(?:(?:--+(?:[^\r\n\x0C]*)?)|(?:\{-(?:[^-]|-+[^-\}])*-\}))/], | |
// reservedid -> case | class | data | default | deriving | do | |
// | else | if | import | in | infix | infixl | infixr | |
// | instance | let | module | newtype | of | then | |
// | type | where | _ | |
[PR.PR_KEYWORD, /^(?:case|class|data|default|deriving|do|else|if|import|in|infix|infixl|infixr|instance|let|module|newtype|of|then|type|where|_)(?=[^a-zA-Z0-9\']|$)/, null], | |
// qvarid -> [ modid . ] varid | |
// qconid -> [ modid . ] conid | |
// varid -> (small {small | large | digit | ' })<reservedid> | |
// conid -> large {small | large | digit | ' } | |
// modid -> conid | |
// small -> ascSmall | uniSmall | _ | |
// ascSmall -> a | b | ... | z | |
// uniSmall -> any Unicode lowercase letter | |
// large -> ascLarge | uniLarge | |
// ascLarge -> A | B | ... | Z | |
// uniLarge -> any uppercase or titlecase Unicode letter | |
[PR.PR_PLAIN, /^(?:[A-Z][\w\']*\.)*[a-zA-Z][\w\']*/], | |
// matches the symbol production | |
[PR.PR_PUNCTUATION, /^[^\t\n\x0B\x0C\r a-zA-Z0-9\'\"]+/] | |
]), | |
['hs']); |