| // Copyright (C) 2006 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| /** |
| * @fileoverview |
| * some functions for browser-side pretty printing of code contained in html. |
| * |
| * <p> |
| * For a fairly comprehensive set of languages see the |
| * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> |
| * file that came with this source. At a minimum, the lexer should work on a |
| * number of languages including C and friends, Java, Python, Bash, SQL, HTML, |
| * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk |
| * and a subset of Perl, but, because of commenting conventions, doesn't work on |
| * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. |
| * <p> |
| * Usage: <ol> |
| * <li> include this source file in an html page via |
| * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} |
| * <li> define style rules. See the example page for examples. |
| * <li> mark the {@code <pre>} and {@code <code>} tags in your source with |
| * {@code class=prettyprint.} |
| * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty |
| * printer needs to do more substantial DOM manipulations to support that, so |
| * some css styles may not be preserved. |
| * </ol> |
| * That's it. I wanted to keep the API as simple as possible, so there's no |
| * need to specify which language the code is in, but if you wish, you can add |
| * another class to the {@code <pre>} or {@code <code>} element to specify the |
| * language, as in {@code <pre class="prettyprint lang-java">}. Any class that |
| * starts with "lang-" followed by a file extension, specifies the file type. |
| * See the "lang-*.js" files in this directory for code that implements |
| * per-language file handlers. |
| * <p> |
| * Change log:<br> |
| * cbeust, 2006/08/22 |
| * <blockquote> |
| * Java annotations (start with "@") are now captured as literals ("lit") |
| * </blockquote> |
| * @requires console |
| */ |
| |
| // JSLint declarations |
| /*global console, document, navigator, setTimeout, window */ |
| |
| /** |
| * Split {@code prettyPrint} into multiple timeouts so as not to interfere with |
| * UI events. |
| * If set to {@code false}, {@code prettyPrint()} is synchronous. |
| */ |
| window['PR_SHOULD_USE_CONTINUATION'] = true; |
| |
| (function () { |
| // Keyword lists for various languages. |
| // We use things that coerce to strings to make them compact when minified |
| // and to defeat aggressive optimizers that fold large string constants. |
| var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]; |
| var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + |
| "double,enum,extern,float,goto,int,long,register,short,signed,sizeof," + |
| "static,struct,switch,typedef,union,unsigned,void,volatile"]; |
| var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + |
| "new,operator,private,protected,public,this,throw,true,try,typeof"]; |
| var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," + |
| "concept,concept_map,const_cast,constexpr,decltype," + |
| "dynamic_cast,explicit,export,friend,inline,late_check," + |
| "mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," + |
| "template,typeid,typename,using,virtual,where"]; |
| var JAVA_KEYWORDS = [COMMON_KEYWORDS, |
| "abstract,boolean,byte,extends,final,finally,implements,import," + |
| "instanceof,null,native,package,strictfp,super,synchronized,throws," + |
| "transient"]; |
| var CSHARP_KEYWORDS = [JAVA_KEYWORDS, |
| "as,base,by,checked,decimal,delegate,descending,dynamic,event," + |
| "fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock," + |
| "object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed," + |
| "stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var"]; |
| var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," + |
| "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," + |
| "true,try,unless,until,when,while,yes"; |
| var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, |
| "debugger,eval,export,function,get,null,set,undefined,var,with," + |
| "Infinity,NaN"]; |
| var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," + |
| "goto,if,import,last,local,my,next,no,our,print,package,redo,require," + |
| "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; |
| var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," + |
| "elif,except,exec,finally,from,global,import,in,is,lambda," + |
| "nonlocal,not,or,pass,print,raise,try,with,yield," + |
| "False,True,None"]; |
| var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," + |
| "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + |
| "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + |
| "BEGIN,END"]; |
| var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + |
| "function,in,local,set,then,until"]; |
| var ALL_KEYWORDS = [ |
| CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS + |
| PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS]; |
| var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)/; |
| |
| // token style names. correspond to css classes |
| /** |
| * token style for a string literal |
| * @const |
| */ |
| var PR_STRING = 'str'; |
| /** |
| * token style for a keyword |
| * @const |
| */ |
| var PR_KEYWORD = 'kwd'; |
| /** |
| * token style for a comment |
| * @const |
| */ |
| var PR_COMMENT = 'com'; |
| /** |
| * token style for a type |
| * @const |
| */ |
| var PR_TYPE = 'typ'; |
| /** |
| * token style for a literal value. e.g. 1, null, true. |
| * @const |
| */ |
| var PR_LITERAL = 'lit'; |
| /** |
| * token style for a punctuation string. |
| * @const |
| */ |
| var PR_PUNCTUATION = 'pun'; |
| /** |
| * token style for a punctuation string. |
| * @const |
| */ |
| var PR_PLAIN = 'pln'; |
| |
| /** |
| * token style for an sgml tag. |
| * @const |
| */ |
| var PR_TAG = 'tag'; |
| /** |
| * token style for a markup declaration such as a DOCTYPE. |
| * @const |
| */ |
| var PR_DECLARATION = 'dec'; |
| /** |
| * token style for embedded source. |
| * @const |
| */ |
| var PR_SOURCE = 'src'; |
| /** |
| * token style for an sgml attribute name. |
| * @const |
| */ |
| var PR_ATTRIB_NAME = 'atn'; |
| /** |
| * token style for an sgml attribute value. |
| * @const |
| */ |
| var PR_ATTRIB_VALUE = 'atv'; |
| |
| /** |
| * A class that indicates a section of markup that is not code, e.g. to allow |
| * embedding of line numbers within code listings. |
| * @const |
| */ |
| var PR_NOCODE = 'nocode'; |
| |
| |
| |
| /** |
| * A set of tokens that can precede a regular expression literal in |
| * javascript |
| * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html |
| * has the full list, but I've removed ones that might be problematic when |
| * seen in languages that don't support regular expression literals. |
| * |
| * <p>Specifically, I've removed any keywords that can't precede a regexp |
| * literal in a syntactically legal javascript program, and I've removed the |
| * "in" keyword since it's not a keyword in many languages, and might be used |
| * as a count of inches. |
| * |
| * <p>The link a above does not accurately describe EcmaScript rules since |
| * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works |
| * very well in practice. |
| * |
| * @private |
| * @const |
| */ |
| var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|\\!|\\!=|\\!==|\\#|\\%|\\%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|\\,|\\-=|\\->|\\/|\\/=|:|::|\\;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\@|\\[|\\^|\\^=|\\^\\^|\\^\\^=|\\{|\\||\\|=|\\|\\||\\|\\|=|\\~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*'; |
| |
| // CAVEAT: this does not properly handle the case where a regular |
| // expression immediately follows another since a regular expression may |
| // have flags for case-sensitivity and the like. Having regexp tokens |
| // adjacent is not valid in any language I'm aware of, so I'm punting. |
| // TODO: maybe style special characters inside a regexp as punctuation. |
| |
| |
| /** |
| * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally |
| * matches the union of the sets of strings matched by the input RegExp. |
| * Since it matches globally, if the input strings have a start-of-input |
| * anchor (/^.../), it is ignored for the purposes of unioning. |
| * @param {Array.<RegExp>} regexs non multiline, non-global regexs. |
| * @return {RegExp} a global regex. |
| */ |
| function combinePrefixPatterns(regexs) { |
| var capturedGroupIndex = 0; |
| |
| var needToFoldCase = false; |
| var ignoreCase = false; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.ignoreCase) { |
| ignoreCase = true; |
| } else if (/[a-z]/i.test(regex.source.replace( |
| /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { |
| needToFoldCase = true; |
| ignoreCase = false; |
| break; |
| } |
| } |
| |
| var escapeCharToCodeUnit = { |
| 'b': 8, |
| 't': 9, |
| 'n': 0xa, |
| 'v': 0xb, |
| 'f': 0xc, |
| 'r': 0xd |
| }; |
| |
| function decodeEscape(charsetPart) { |
| var cc0 = charsetPart.charCodeAt(0); |
| if (cc0 !== 92 /* \\ */) { |
| return cc0; |
| } |
| var c1 = charsetPart.charAt(1); |
| cc0 = escapeCharToCodeUnit[c1]; |
| if (cc0) { |
| return cc0; |
| } else if ('0' <= c1 && c1 <= '7') { |
| return parseInt(charsetPart.substring(1), 8); |
| } else if (c1 === 'u' || c1 === 'x') { |
| return parseInt(charsetPart.substring(2), 16); |
| } else { |
| return charsetPart.charCodeAt(1); |
| } |
| } |
| |
| function encodeEscape(charCode) { |
| if (charCode < 0x20) { |
| return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); |
| } |
| var ch = String.fromCharCode(charCode); |
| if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { |
| ch = '\\' + ch; |
| } |
| return ch; |
| } |
| |
| function caseFoldCharset(charSet) { |
| var charsetParts = charSet.substring(1, charSet.length - 1).match( |
| new RegExp( |
| '\\\\u[0-9A-Fa-f]{4}' |
| + '|\\\\x[0-9A-Fa-f]{2}' |
| + '|\\\\[0-3][0-7]{0,2}' |
| + '|\\\\[0-7]{1,2}' |
| + '|\\\\[\\s\\S]' |
| + '|-' |
| + '|[^-\\\\]', |
| 'g')); |
| var groups = []; |
| var ranges = []; |
| var inverse = charsetParts[0] === '^'; |
| for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { |
| var p = charsetParts[i]; |
| if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups. |
| groups.push(p); |
| } else { |
| var start = decodeEscape(p); |
| var end; |
| if (i + 2 < n && '-' === charsetParts[i + 1]) { |
| end = decodeEscape(charsetParts[i + 2]); |
| i += 2; |
| } else { |
| end = start; |
| } |
| ranges.push([start, end]); |
| // If the range might intersect letters, then expand it. |
| // This case handling is too simplistic. |
| // It does not deal with non-latin case folding. |
| // It works for latin source code identifiers though. |
| if (!(end < 65 || start > 122)) { |
| if (!(end < 65 || start > 90)) { |
| ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); |
| } |
| if (!(end < 97 || start > 122)) { |
| ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); |
| } |
| } |
| } |
| } |
| |
| // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] |
| // -> [[1, 12], [14, 14], [16, 17]] |
| ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); |
| var consolidatedRanges = []; |
| var lastRange = [NaN, NaN]; |
| for (var i = 0; i < ranges.length; ++i) { |
| var range = ranges[i]; |
| if (range[0] <= lastRange[1] + 1) { |
| lastRange[1] = Math.max(lastRange[1], range[1]); |
| } else { |
| consolidatedRanges.push(lastRange = range); |
| } |
| } |
| |
| var out = ['[']; |
| if (inverse) { out.push('^'); } |
| out.push.apply(out, groups); |
| for (var i = 0; i < consolidatedRanges.length; ++i) { |
| var range = consolidatedRanges[i]; |
| out.push(encodeEscape(range[0])); |
| if (range[1] > range[0]) { |
| if (range[1] + 1 > range[0]) { out.push('-'); } |
| out.push(encodeEscape(range[1])); |
| } |
| } |
| out.push(']'); |
| return out.join(''); |
| } |
| |
| function allowAnywhereFoldCaseAndRenumberGroups(regex) { |
| // Split into character sets, escape sequences, punctuation strings |
| // like ('(', '(?:', ')', '^'), and runs of characters that do not |
| // include any of the above. |
| var parts = regex.source.match( |
| new RegExp( |
| '(?:' |
| + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set |
| + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape |
| + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape |
| + '|\\\\[0-9]+' // a back-reference or octal escape |
| + '|\\\\[^ux0-9]' // other escape sequence |
| + '|\\(\\?[:!=]' // start of a non-capturing group |
| + '|[\\(\\)\\^]' // start/emd of a group, or line start |
| + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters |
| + ')', |
| 'g')); |
| var n = parts.length; |
| |
| // Maps captured group numbers to the number they will occupy in |
| // the output or to -1 if that has not been determined, or to |
| // undefined if they need not be capturing in the output. |
| var capturedGroups = []; |
| |
| // Walk over and identify back references to build the capturedGroups |
| // mapping. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| // groups are 1-indexed, so max group index is count of '(' |
| ++groupIndex; |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| capturedGroups[decimalValue] = -1; |
| } |
| } |
| } |
| |
| // Renumber groups and reduce capturing groups to non-capturing groups |
| // where possible. |
| for (var i = 1; i < capturedGroups.length; ++i) { |
| if (-1 === capturedGroups[i]) { |
| capturedGroups[i] = ++capturedGroupIndex; |
| } |
| } |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| ++groupIndex; |
| if (capturedGroups[groupIndex] === undefined) { |
| parts[i] = '(?:'; |
| } |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| parts[i] = '\\' + capturedGroups[groupIndex]; |
| } |
| } |
| } |
| |
| // Remove any prefix anchors so that the output will match anywhere. |
| // ^^ really does mean an anchored match though. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } |
| } |
| |
| // Expand letters to groups to handle mixing of case-sensitive and |
| // case-insensitive patterns if necessary. |
| if (regex.ignoreCase && needToFoldCase) { |
| for (var i = 0; i < n; ++i) { |
| var p = parts[i]; |
| var ch0 = p.charAt(0); |
| if (p.length >= 2 && ch0 === '[') { |
| parts[i] = caseFoldCharset(p); |
| } else if (ch0 !== '\\') { |
| // TODO: handle letters in numeric escapes. |
| parts[i] = p.replace( |
| /[a-zA-Z]/g, |
| function (ch) { |
| var cc = ch.charCodeAt(0); |
| return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; |
| }); |
| } |
| } |
| } |
| |
| return parts.join(''); |
| } |
| |
| var rewritten = []; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.global || regex.multiline) { throw new Error('' + regex); } |
| rewritten.push( |
| '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); |
| } |
| |
| return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); |
| } |
| |
| |
| /** |
| * Split markup into a string of source code and an array mapping ranges in |
| * that string to the text nodes in which they appear. |
| * |
| * <p> |
| * The HTML DOM structure:</p> |
| * <pre> |
| * (Element "p" |
| * (Element "b" |
| * (Text "print ")) ; #1 |
| * (Text "'Hello '") ; #2 |
| * (Element "br") ; #3 |
| * (Text " + 'World';")) ; #4 |
| * </pre> |
| * <p> |
| * corresponds to the HTML |
| * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> |
| * |
| * <p> |
| * It will produce the output:</p> |
| * <pre> |
| * { |
| * sourceCode: "print 'Hello '\n + 'World';", |
| * // 1 2 |
| * // 012345678901234 5678901234567 |
| * spans: [0, #1, 6, #2, 14, #3, 15, #4] |
| * } |
| * </pre> |
| * <p> |
| * where #1 is a reference to the {@code "print "} text node above, and so |
| * on for the other text nodes. |
| * </p> |
| * |
| * <p> |
| * The {@code} spans array is an array of pairs. Even elements are the start |
| * indices of substrings, and odd elements are the text nodes (or BR elements) |
| * that contain the text for those substrings. |
| * Substrings continue until the next index or the end of the source. |
| * </p> |
| * |
| * @param {Node} node an HTML DOM subtree containing source-code. |
| * @return {Object} source code and the text nodes in which they occur. |
| */ |
| function extractSourceSpans(node) { |
| var nocode = /(?:^|\s)nocode(?:\s|$)/; |
| |
| var chunks = []; |
| var length = 0; |
| var spans = []; |
| var k = 0; |
| |
| var whitespace; |
| if (node.currentStyle) { |
| whitespace = node.currentStyle.whiteSpace; |
| } else if (window.getComputedStyle) { |
| whitespace = document.defaultView.getComputedStyle(node, null) |
| .getPropertyValue('white-space'); |
| } |
| var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); |
| |
| function walk(node) { |
| switch (node.nodeType) { |
| case 1: // Element |
| if (nocode.test(node.className)) { return; } |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| walk(child); |
| } |
| var nodeName = node.nodeName; |
| if ('BR' === nodeName || 'LI' === nodeName) { |
| chunks[k] = '\n'; |
| spans[k << 1] = length++; |
| spans[(k++ << 1) | 1] = node; |
| } |
| break; |
| case 3: case 4: // Text |
| var text = node.nodeValue; |
| if (text.length) { |
| if (!isPreformatted) { |
| text = text.replace(/[ \t\r\n]+/g, ' '); |
| } else { |
| text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. |
| } |
| // TODO: handle tabs here? |
| chunks[k] = text; |
| spans[k << 1] = length; |
| length += text.length; |
| spans[(k++ << 1) | 1] = node; |
| } |
| break; |
| } |
| } |
| |
| walk(node); |
| |
| return { |
| sourceCode: chunks.join('').replace(/\n$/, ''), |
| spans: spans |
| }; |
| } |
| |
| |
| /** |
| * Apply the given language handler to sourceCode and add the resulting |
| * decorations to out. |
| * @param {number} basePos the index of sourceCode within the chunk of source |
| * whose decorations are already present on out. |
| */ |
| function appendDecorations(basePos, sourceCode, langHandler, out) { |
| if (!sourceCode) { return; } |
| var job = { |
| sourceCode: sourceCode, |
| basePos: basePos |
| }; |
| langHandler(job); |
| out.push.apply(out, job.decorations); |
| } |
| |
| var notWs = /\S/; |
| |
| /** |
| * Given an element, if it contains only one child element and any text nodes |
| * it contains contain only space characters, return the sole child element. |
| * Otherwise returns undefined. |
| * <p> |
| * This is meant to return the CODE element in {@code <pre><code ...>} when |
| * there is a single child element that contains all the non-space textual |
| * content, but not to return anything where there are multiple child elements |
| * as in {@code <pre><code>...</code><code>...</code></pre>} or when there |
| * is textual content. |
| */ |
| function childContentWrapper(element) { |
| var wrapper = undefined; |
| for (var c = element.firstChild; c; c = c.nextSibling) { |
| var type = c.nodeType; |
| wrapper = (type === 1) // Element Node |
| ? (wrapper ? element : c) |
| : (type === 3) // Text Node |
| ? (notWs.test(c.nodeValue) ? element : wrapper) |
| : wrapper; |
| } |
| return wrapper === element ? undefined : wrapper; |
| } |
| |
| /** Given triples of [style, pattern, context] returns a lexing function, |
| * The lexing function interprets the patterns to find token boundaries and |
| * returns a decoration list of the form |
| * [index_0, style_0, index_1, style_1, ..., index_n, style_n] |
| * where index_n is an index into the sourceCode, and style_n is a style |
| * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to |
| * all characters in sourceCode[index_n-1:index_n]. |
| * |
| * The stylePatterns is a list whose elements have the form |
| * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. |
| * |
| * Style is a style constant like PR_PLAIN, or can be a string of the |
| * form 'lang-FOO', where FOO is a language extension describing the |
| * language of the portion of the token in $1 after pattern executes. |
| * E.g., if style is 'lang-lisp', and group 1 contains the text |
| * '(hello (world))', then that portion of the token will be passed to the |
| * registered lisp handler for formatting. |
| * The text before and after group 1 will be restyled using this decorator |
| * so decorators should take care that this doesn't result in infinite |
| * recursion. For example, the HTML lexer rule for SCRIPT elements looks |
| * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match |
| * '<script>foo()<\/script>', which would cause the current decorator to |
| * be called with '<script>' which would not match the same rule since |
| * group 1 must not be empty, so it would be instead styled as PR_TAG by |
| * the generic tag rule. The handler registered for the 'js' extension would |
| * then be called with 'foo()', and finally, the current decorator would |
| * be called with '<\/script>' which would not match the original rule and |
| * so the generic tag rule would identify it as a tag. |
| * |
| * Pattern must only match prefixes, and if it matches a prefix, then that |
| * match is considered a token with the same style. |
| * |
| * Context is applied to the last non-whitespace, non-comment token |
| * recognized. |
| * |
| * Shortcut is an optional string of characters, any of which, if the first |
| * character, gurantee that this pattern and only this pattern matches. |
| * |
| * @param {Array} shortcutStylePatterns patterns that always start with |
| * a known character. Must have a shortcut string. |
| * @param {Array} fallthroughStylePatterns patterns that will be tried in |
| * order if the shortcut ones fail. May have shortcuts. |
| * |
| * @return {function (Object)} a |
| * function that takes source code and returns a list of decorations. |
| */ |
| function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { |
| var shortcuts = {}; |
| var tokenizer; |
| (function () { |
| var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); |
| var allRegexs = []; |
| var regexKeys = {}; |
| for (var i = 0, n = allPatterns.length; i < n; ++i) { |
| var patternParts = allPatterns[i]; |
| var shortcutChars = patternParts[3]; |
| if (shortcutChars) { |
| for (var c = shortcutChars.length; --c >= 0;) { |
| shortcuts[shortcutChars.charAt(c)] = patternParts; |
| } |
| } |
| var regex = patternParts[1]; |
| var k = '' + regex; |
| if (!regexKeys.hasOwnProperty(k)) { |
| allRegexs.push(regex); |
| regexKeys[k] = null; |
| } |
| } |
| allRegexs.push(/[\0-\uffff]/); |
| tokenizer = combinePrefixPatterns(allRegexs); |
| })(); |
| |
| var nPatterns = fallthroughStylePatterns.length; |
| |
| /** |
| * Lexes job.sourceCode and produces an output array job.decorations of |
| * style classes preceded by the position at which they start in |
| * job.sourceCode in order. |
| * |
| * @param {Object} job an object like <pre>{ |
| * sourceCode: {string} sourceText plain text, |
| * basePos: {int} position of job.sourceCode in the larger chunk of |
| * sourceCode. |
| * }</pre> |
| */ |
| var decorate = function (job) { |
| var sourceCode = job.sourceCode, basePos = job.basePos; |
| /** Even entries are positions in source in ascending order. Odd enties |
| * are style markers (e.g., PR_COMMENT) that run from that position until |
| * the end. |
| * @type {Array.<number|string>} |
| */ |
| var decorations = [basePos, PR_PLAIN]; |
| var pos = 0; // index into sourceCode |
| var tokens = sourceCode.match(tokenizer) || []; |
| var styleCache = {}; |
| |
| for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { |
| var token = tokens[ti]; |
| var style = styleCache[token]; |
| var match = void 0; |
| |
| var isEmbedded; |
| if (typeof style === 'string') { |
| isEmbedded = false; |
| } else { |
| var patternParts = shortcuts[token.charAt(0)]; |
| if (patternParts) { |
| match = token.match(patternParts[1]); |
| style = patternParts[0]; |
| } else { |
| for (var i = 0; i < nPatterns; ++i) { |
| patternParts = fallthroughStylePatterns[i]; |
| match = token.match(patternParts[1]); |
| if (match) { |
| style = patternParts[0]; |
| break; |
| } |
| } |
| |
| if (!match) { // make sure that we make progress |
| style = PR_PLAIN; |
| } |
| } |
| |
| isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); |
| if (isEmbedded && !(match && typeof match[1] === 'string')) { |
| isEmbedded = false; |
| style = PR_SOURCE; |
| } |
| |
| if (!isEmbedded) { styleCache[token] = style; } |
| } |
| |
| var tokenStart = pos; |
| pos += token.length; |
| |
| if (!isEmbedded) { |
| decorations.push(basePos + tokenStart, style); |
| } else { // Treat group 1 as an embedded block of source code. |
| var embeddedSource = match[1]; |
| var embeddedSourceStart = token.indexOf(embeddedSource); |
| var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; |
| if (match[2]) { |
| // If embeddedSource can be blank, then it would match at the |
| // beginning which would cause us to infinitely recurse on the |
| // entire token, so we catch the right context in match[2]. |
| embeddedSourceEnd = token.length - match[2].length; |
| embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; |
| } |
| var lang = style.substring(5); |
| // Decorate the left of the embedded source |
| appendDecorations( |
| basePos + tokenStart, |
| token.substring(0, embeddedSourceStart), |
| decorate, decorations); |
| // Decorate the embedded source |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceStart, |
| embeddedSource, |
| langHandlerForExtension(lang, embeddedSource), |
| decorations); |
| // Decorate the right of the embedded section |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceEnd, |
| token.substring(embeddedSourceEnd), |
| decorate, decorations); |
| } |
| } |
| job.decorations = decorations; |
| }; |
| return decorate; |
| } |
| |
| /** returns a function that produces a list of decorations from source text. |
| * |
| * This code treats ", ', and ` as string delimiters, and \ as a string |
| * escape. It does not recognize perl's qq() style strings. |
| * It has no special handling for double delimiter escapes as in basic, or |
| * the tripled delimiters used in python, but should work on those regardless |
| * although in those cases a single string literal may be broken up into |
| * multiple adjacent string literals. |
| * |
| * It recognizes C, C++, and shell style comments. |
| * |
| * @param {Object} options a set of optional parameters. |
| * @return {function (Object)} a function that examines the source code |
| * in the input job and builds the decoration list. |
| */ |
| function sourceDecorator(options) { |
| var shortcutStylePatterns = [], fallthroughStylePatterns = []; |
| if (options['tripleQuotedStrings']) { |
| // '''multi-line-string''', 'single-line-string', and double-quoted |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, |
| null, '\'"']); |
| } else if (options['multiLineStrings']) { |
| // 'multi-line-string', "multi-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, |
| null, '\'"`']); |
| } else { |
| // 'single-line-string', "single-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, |
| /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, |
| null, '"\'']); |
| } |
| if (options['verbatimStrings']) { |
| // verbatim-string-literal production from the C# grammar. See issue 93. |
| fallthroughStylePatterns.push( |
| [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); |
| } |
| var hc = options['hashComments']; |
| if (hc) { |
| if (options['cStyleComments']) { |
| if (hc > 1) { // multiline hash comments |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); |
| } else { |
| // Stop C preprocessor declarations at an unclosed open comment |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, |
| null, '#']); |
| } |
| fallthroughStylePatterns.push( |
| [PR_STRING, |
| /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, |
| null]); |
| } else { |
| shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); |
| } |
| } |
| if (options['cStyleComments']) { |
| fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); |
| fallthroughStylePatterns.push( |
| [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); |
| } |
| if (options['regexLiterals']) { |
| /** |
| * @const |
| */ |
| var REGEX_LITERAL = ( |
| // A regular expression literal starts with a slash that is |
| // not followed by * or / so that it is not confused with |
| // comments. |
| '/(?=[^/*])' |
| // and then contains any number of raw characters, |
| + '(?:[^/\\x5B\\x5C]' |
| // escape sequences (\x5C), |
| + '|\\x5C[\\s\\S]' |
| // or non-nesting character sets (\x5B\x5D); |
| + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' |
| // finally closed by a /. |
| + '/'); |
| fallthroughStylePatterns.push( |
| ['lang-regex', |
| new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') |
| ]); |
| } |
| |
| var types = options['types']; |
| if (types) { |
| fallthroughStylePatterns.push([PR_TYPE, types]); |
| } |
| |
| var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); |
| if (keywords.length) { |
| fallthroughStylePatterns.push( |
| [PR_KEYWORD, |
| new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), |
| null]); |
| } |
| |
| shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); |
| fallthroughStylePatterns.push( |
| // TODO(mikesamuel): recognize non-latin letters and numerals in idents |
| [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null], |
| [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_LITERAL, |
| new RegExp( |
| '^(?:' |
| // A hex number |
| + '0x[a-f0-9]+' |
| // or an octal or decimal number, |
| + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' |
| // possibly in scientific notation |
| + '(?:e[+\\-]?\\d+)?' |
| + ')' |
| // with an optional modifier like UL for unsigned long |
| + '[a-z]*', 'i'), |
| null, '0123456789'], |
| // Don't treat escaped quotes in bash as starting strings. See issue 144. |
| [PR_PLAIN, /^\\[\s\S]?/, null], |
| [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]); |
| |
| return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); |
| } |
| |
| var decorateSource = sourceDecorator({ |
| 'keywords': ALL_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }); |
| |
| /** |
| * Given a DOM subtree, wraps it in a list, and puts each line into its own |
| * list item. |
| * |
| * @param {Node} node modified in place. Its content is pulled into an |
| * HTMLOListElement, and each line is moved into a separate list item. |
| * This requires cloning elements, so the input might not have unique |
| * IDs after numbering. |
| */ |
| function numberLines(node, opt_startLineNum) { |
| var nocode = /(?:^|\s)nocode(?:\s|$)/; |
| var lineBreak = /\r\n?|\n/; |
| |
| var document = node.ownerDocument; |
| |
| var whitespace; |
| if (node.currentStyle) { |
| whitespace = node.currentStyle.whiteSpace; |
| } else if (window.getComputedStyle) { |
| whitespace = document.defaultView.getComputedStyle(node, null) |
| .getPropertyValue('white-space'); |
| } |
| // If it's preformatted, then we need to split lines on line breaks |
| // in addition to <BR>s. |
| var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); |
| |
| var li = document.createElement('LI'); |
| while (node.firstChild) { |
| li.appendChild(node.firstChild); |
| } |
| // An array of lines. We split below, so this is initialized to one |
| // un-split line. |
| var listItems = [li]; |
| |
| function walk(node) { |
| switch (node.nodeType) { |
| case 1: // Element |
| if (nocode.test(node.className)) { break; } |
| if ('BR' === node.nodeName) { |
| breakAfter(node); |
| // Discard the <BR> since it is now flush against a </LI>. |
| if (node.parentNode) { |
| node.parentNode.removeChild(node); |
| } |
| } else { |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| walk(child); |
| } |
| } |
| break; |
| case 3: case 4: // Text |
| if (isPreformatted) { |
| var text = node.nodeValue; |
| var match = text.match(lineBreak); |
| if (match) { |
| var firstLine = text.substring(0, match.index); |
| node.nodeValue = firstLine; |
| var tail = text.substring(match.index + match[0].length); |
| if (tail) { |
| var parent = node.parentNode; |
| parent.insertBefore( |
| document.createTextNode(tail), node.nextSibling); |
| } |
| breakAfter(node); |
| if (!firstLine) { |
| // Don't leave blank text nodes in the DOM. |
| node.parentNode.removeChild(node); |
| } |
| } |
| } |
| break; |
| } |
| } |
| |
| // Split a line after the given node. |
| function breakAfter(lineEndNode) { |
| // If there's nothing to the right, then we can skip ending the line |
| // here, and move root-wards since splitting just before an end-tag |
| // would require us to create a bunch of empty copies. |
| while (!lineEndNode.nextSibling) { |
| lineEndNode = lineEndNode.parentNode; |
| if (!lineEndNode) { return; } |
| } |
| |
| function breakLeftOf(limit, copy) { |
| // Clone shallowly if this node needs to be on both sides of the break. |
| var rightSide = copy ? limit.cloneNode(false) : limit; |
| var parent = limit.parentNode; |
| if (parent) { |
| // We clone the parent chain. |
| // This helps us resurrect important styling elements that cross lines. |
| // E.g. in <i>Foo<br>Bar</i> |
| // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. |
| var parentClone = breakLeftOf(parent, 1); |
| // Move the clone and everything to the right of the original |
| // onto the cloned parent. |
| var next = limit.nextSibling; |
| parentClone.appendChild(rightSide); |
| for (var sibling = next; sibling; sibling = next) { |
| next = sibling.nextSibling; |
| parentClone.appendChild(sibling); |
| } |
| } |
| return rightSide; |
| } |
| |
| var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); |
| |
| // Walk the parent chain until we reach an unattached LI. |
| for (var parent; |
| // Check nodeType since IE invents document fragments. |
| (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { |
| copiedListItem = parent; |
| } |
| // Put it on the list of lines for later processing. |
| listItems.push(copiedListItem); |
| } |
| |
| // Split lines while there are lines left to split. |
| for (var i = 0; // Number of lines that have been split so far. |
| i < listItems.length; // length updated by breakAfter calls. |
| ++i) { |
| walk(listItems[i]); |
| } |
| |
| // Make sure numeric indices show correctly. |
| if (opt_startLineNum === (opt_startLineNum|0)) { |
| listItems[0].setAttribute('value', opt_startLineNum); |
| } |
| |
| var ol = document.createElement('OL'); |
| ol.className = 'linenums'; |
| var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0; |
| for (var i = 0, n = listItems.length; i < n; ++i) { |
| li = listItems[i]; |
| // Stick a class on the LIs so that stylesheets can |
| // color odd/even rows, or any other row pattern that |
| // is co-prime with 10. |
| li.className = 'L' + ((i + offset) % 10); |
| if (!li.firstChild) { |
| li.appendChild(document.createTextNode('\xA0')); |
| } |
| ol.appendChild(li); |
| } |
| |
| node.appendChild(ol); |
| } |
| |
| /** |
| * Breaks {@code job.sourceCode} around style boundaries in |
| * {@code job.decorations} and modifies {@code job.sourceNode} in place. |
| * @param {Object} job like <pre>{ |
| * sourceCode: {string} source as plain text, |
| * spans: {Array.<number|Node>} alternating span start indices into source |
| * and the text node or element (e.g. {@code <BR>}) corresponding to that |
| * span. |
| * decorations: {Array.<number|string} an array of style classes preceded |
| * by the position at which they start in job.sourceCode in order |
| * }</pre> |
| * @private |
| */ |
| function recombineTagsAndDecorations(job) { |
| var isIE = /\bMSIE\b/.test(navigator.userAgent); |
| var newlineRe = /\n/g; |
| |
| var source = job.sourceCode; |
| var sourceLength = source.length; |
| // Index into source after the last code-unit recombined. |
| var sourceIndex = 0; |
| |
| var spans = job.spans; |
| var nSpans = spans.length; |
| // Index into spans after the last span which ends at or before sourceIndex. |
| var spanIndex = 0; |
| |
| var decorations = job.decorations; |
| var nDecorations = decorations.length; |
| // Index into decorations after the last decoration which ends at or before |
| // sourceIndex. |
| var decorationIndex = 0; |
| |
| // Remove all zero-length decorations. |
| decorations[nDecorations] = sourceLength; |
| var decPos, i; |
| for (i = decPos = 0; i < nDecorations;) { |
| if (decorations[i] !== decorations[i + 2]) { |
| decorations[decPos++] = decorations[i++]; |
| decorations[decPos++] = decorations[i++]; |
| } else { |
| i += 2; |
| } |
| } |
| nDecorations = decPos; |
| |
| // Simplify decorations. |
| for (i = decPos = 0; i < nDecorations;) { |
| var startPos = decorations[i]; |
| // Conflate all adjacent decorations that use the same style. |
| var startDec = decorations[i + 1]; |
| var end = i + 2; |
| while (end + 2 <= nDecorations && decorations[end + 1] === startDec) { |
| end += 2; |
| } |
| decorations[decPos++] = startPos; |
| decorations[decPos++] = startDec; |
| i = end; |
| } |
| |
| nDecorations = decorations.length = decPos; |
| |
| var decoration = null; |
| while (spanIndex < nSpans) { |
| var spanStart = spans[spanIndex]; |
| var spanEnd = spans[spanIndex + 2] || sourceLength; |
| |
| var decStart = decorations[decorationIndex]; |
| var decEnd = decorations[decorationIndex + 2] || sourceLength; |
| |
| var end = Math.min(spanEnd, decEnd); |
| |
| var textNode = spans[spanIndex + 1]; |
| var styledText; |
| if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s |
| // Don't introduce spans around empty text nodes. |
| && (styledText = source.substring(sourceIndex, end))) { |
| // This may seem bizarre, and it is. Emitting LF on IE causes the |
| // code to display with spaces instead of line breaks. |
| // Emitting Windows standard issue linebreaks (CRLF) causes a blank |
| // space to appear at the beginning of every line but the first. |
| // Emitting an old Mac OS 9 line separator makes everything spiffy. |
| if (isIE) { styledText = styledText.replace(newlineRe, '\r'); } |
| textNode.nodeValue = styledText; |
| var document = textNode.ownerDocument; |
| var span = document.createElement('SPAN'); |
| span.className = decorations[decorationIndex + 1]; |
| var parentNode = textNode.parentNode; |
| parentNode.replaceChild(span, textNode); |
| span.appendChild(textNode); |
| if (sourceIndex < spanEnd) { // Split off a text node. |
| spans[spanIndex + 1] = textNode |
| // TODO: Possibly optimize by using '' if there's no flicker. |
| = document.createTextNode(source.substring(end, spanEnd)); |
| parentNode.insertBefore(textNode, span.nextSibling); |
| } |
| } |
| |
| sourceIndex = end; |
| |
| if (sourceIndex >= spanEnd) { |
| spanIndex += 2; |
| } |
| if (sourceIndex >= decEnd) { |
| decorationIndex += 2; |
| } |
| } |
| } |
| |
| |
| /** Maps language-specific file extensions to handlers. */ |
| var langHandlerRegistry = {}; |
| /** Register a language handler for the given file extensions. |
| * @param {function (Object)} handler a function from source code to a list |
| * of decorations. Takes a single argument job which describes the |
| * state of the computation. The single parameter has the form |
| * {@code { |
| * sourceCode: {string} as plain text. |
| * decorations: {Array.<number|string>} an array of style classes |
| * preceded by the position at which they start in |
| * job.sourceCode in order. |
| * The language handler should assigned this field. |
| * basePos: {int} the position of source in the larger source chunk. |
| * All positions in the output decorations array are relative |
| * to the larger source chunk. |
| * } } |
| * @param {Array.<string>} fileExtensions |
| */ |
| function registerLangHandler(handler, fileExtensions) { |
| for (var i = fileExtensions.length; --i >= 0;) { |
| var ext = fileExtensions[i]; |
| if (!langHandlerRegistry.hasOwnProperty(ext)) { |
| langHandlerRegistry[ext] = handler; |
| } else if (window['console']) { |
| console['warn']('cannot override language handler %s', ext); |
| } |
| } |
| } |
| function langHandlerForExtension(extension, source) { |
| if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { |
| // Treat it as markup if the first non whitespace character is a < and |
| // the last non-whitespace character is a >. |
| extension = /^\s*</.test(source) |
| ? 'default-markup' |
| : 'default-code'; |
| } |
| return langHandlerRegistry[extension]; |
| } |
| registerLangHandler(decorateSource, ['default-code']); |
| registerLangHandler( |
| createSimpleLexer( |
| [], |
| [ |
| [PR_PLAIN, /^[^<?]+/], |
| [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], |
| [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], |
| // Unescaped content in an unknown language |
| ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], |
| ['lang-', /^<%([\s\S]+?)(?:%>|$)/], |
| [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], |
| ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], |
| // Unescaped content in javascript. (Or possibly vbscript). |
| ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], |
| // Contains unescaped stylesheet content |
| ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], |
| ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] |
| ]), |
| ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); |
| registerLangHandler( |
| createSimpleLexer( |
| [ |
| [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], |
| [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] |
| ], |
| [ |
| [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], |
| [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], |
| ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], |
| [PR_PUNCTUATION, /^[=<>\/]+/], |
| ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], |
| ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], |
| ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], |
| ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], |
| ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], |
| ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] |
| ]), |
| ['in.tag']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CPP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'types': C_TYPES |
| }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': 'null,true,false' |
| }), ['json']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CSHARP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'verbatimStrings': true, |
| 'types': C_TYPES |
| }), ['cs']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JAVA_KEYWORDS, |
| 'cStyleComments': true |
| }), ['java']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': SH_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true |
| }), ['bsh', 'csh', 'sh']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PYTHON_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'tripleQuotedStrings': true |
| }), ['cv', 'py']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PERL_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['perl', 'pl', 'pm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': RUBY_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['rb']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JSCRIPT_KEYWORDS, |
| 'cStyleComments': true, |
| 'regexLiterals': true |
| }), ['js']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': COFFEE_KEYWORDS, |
| 'hashComments': 3, // ### style block comments |
| 'cStyleComments': true, |
| 'multilineStrings': true, |
| 'tripleQuotedStrings': true, |
| 'regexLiterals': true |
| }), ['coffee']); |
| registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); |
| |
| function applyDecorator(job) { |
| var opt_langExtension = job.langExtension; |
| |
| try { |
| // Extract tags, and convert the source code to plain text. |
| var sourceAndSpans = extractSourceSpans(job.sourceNode); |
| /** Plain text. @type {string} */ |
| var source = sourceAndSpans.sourceCode; |
| job.sourceCode = source; |
| job.spans = sourceAndSpans.spans; |
| job.basePos = 0; |
| |
| // Apply the appropriate language handler |
| langHandlerForExtension(opt_langExtension, source)(job); |
| |
| // Integrate the decorations and tags back into the source code, |
| // modifying the sourceNode in place. |
| recombineTagsAndDecorations(job); |
| } catch (e) { |
| if ('console' in window) { |
| console['log'](e && e['stack'] ? e['stack'] : e); |
| } |
| } |
| } |
| |
| /** |
| * @param sourceCodeHtml {string} The HTML to pretty print. |
| * @param opt_langExtension {string} The language name to use. |
| * Typically, a filename extension like 'cpp' or 'java'. |
| * @param opt_numberLines {number|boolean} True to number lines, |
| * or the 1-indexed number of the first line in sourceCodeHtml. |
| */ |
| function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { |
| var container = document.createElement('PRE'); |
| // This could cause images to load and onload listeners to fire. |
| // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. |
| // We assume that the inner HTML is from a trusted source. |
| container.innerHTML = sourceCodeHtml; |
| if (opt_numberLines) { |
| numberLines(container, opt_numberLines); |
| } |
| |
| var job = { |
| langExtension: opt_langExtension, |
| numberLines: opt_numberLines, |
| sourceNode: container |
| }; |
| applyDecorator(job); |
| return container.innerHTML; |
| } |
| |
| function prettyPrint(opt_whenDone) { |
| function byTagName(tn) { return document.getElementsByTagName(tn); } |
| // fetch a list of nodes to rewrite |
| var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; |
| var elements = []; |
| for (var i = 0; i < codeSegments.length; ++i) { |
| for (var j = 0, n = codeSegments[i].length; j < n; ++j) { |
| elements.push(codeSegments[i][j]); |
| } |
| } |
| codeSegments = null; |
| |
| var clock = Date; |
| if (!clock['now']) { |
| clock = { 'now': function () { return +(new Date); } }; |
| } |
| |
| // The loop is broken into a series of continuations to make sure that we |
| // don't make the browser unresponsive when rewriting a large page. |
| var k = 0; |
| var prettyPrintingJob; |
| |
| var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; |
| var prettyPrintRe = /\bprettyprint\b/; |
| |
| function doWork() { |
| var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? |
| clock['now']() + 250 /* ms */ : |
| Infinity); |
| for (; k < elements.length && clock['now']() < endTime; k++) { |
| var cs = elements[k]; |
| var className = cs.className; |
| if (className.indexOf('prettyprint') >= 0) { |
| // If the classes includes a language extensions, use it. |
| // Language extensions can be specified like |
| // <pre class="prettyprint lang-cpp"> |
| // the language extension "cpp" is used to find a language handler as |
| // passed to PR.registerLangHandler. |
| // HTML5 recommends that a language be specified using "language-" |
| // as the prefix instead. Google Code Prettify supports both. |
| // http://dev.w3.org/html5/spec-author-view/the-code-element.html |
| var langExtension = className.match(langExtensionRe); |
| // Support <pre class="prettyprint"><code class="language-c"> |
| var wrapper; |
| if (!langExtension && (wrapper = childContentWrapper(cs)) |
| && "CODE" === wrapper.tagName) { |
| langExtension = wrapper.className.match(langExtensionRe); |
| } |
| |
| if (langExtension) { |
| langExtension = langExtension[1]; |
| } |
| |
| // make sure this is not nested in an already prettified element |
| var nested = false; |
| for (var p = cs.parentNode; p; p = p.parentNode) { |
| if ((p.tagName === 'pre' || p.tagName === 'code' || |
| p.tagName === 'xmp') && |
| p.className && p.className.indexOf('prettyprint') >= 0) { |
| nested = true; |
| break; |
| } |
| } |
| if (!nested) { |
| // Look for a class like linenums or linenums:<n> where <n> is the |
| // 1-indexed number of the first line. |
| var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/); |
| lineNums = lineNums |
| ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true |
| : false; |
| if (lineNums) { numberLines(cs, lineNums); } |
| |
| // do the pretty printing |
| prettyPrintingJob = { |
| langExtension: langExtension, |
| sourceNode: cs, |
| numberLines: lineNums |
| }; |
| applyDecorator(prettyPrintingJob); |
| } |
| } |
| } |
| if (k < elements.length) { |
| // finish up in a continuation |
| setTimeout(doWork, 250); |
| } else if (opt_whenDone) { |
| opt_whenDone(); |
| } |
| } |
| |
| doWork(); |
| } |
| |
| /** |
| * Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
| * {@code class=prettyprint} and prettify them. |
| * |
| * @param {Function?} opt_whenDone if specified, called when the last entry |
| * has been finished. |
| */ |
| window['prettyPrintOne'] = prettyPrintOne; |
| /** |
| * Pretty print a chunk of code. |
| * |
| * @param {string} sourceCodeHtml code as html |
| * @return {string} code as html, but prettier |
| */ |
| window['prettyPrint'] = prettyPrint; |
| /** |
| * Contains functions for creating and registering new language handlers. |
| * @type {Object} |
| */ |
| window['PR'] = { |
| 'createSimpleLexer': createSimpleLexer, |
| 'registerLangHandler': registerLangHandler, |
| 'sourceDecorator': sourceDecorator, |
| 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, |
| 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, |
| 'PR_COMMENT': PR_COMMENT, |
| 'PR_DECLARATION': PR_DECLARATION, |
| 'PR_KEYWORD': PR_KEYWORD, |
| 'PR_LITERAL': PR_LITERAL, |
| 'PR_NOCODE': PR_NOCODE, |
| 'PR_PLAIN': PR_PLAIN, |
| 'PR_PUNCTUATION': PR_PUNCTUATION, |
| 'PR_SOURCE': PR_SOURCE, |
| 'PR_STRING': PR_STRING, |
| 'PR_TAG': PR_TAG, |
| 'PR_TYPE': PR_TYPE |
| }; |
| })(); |
| |