| // Copyright (C) 2006 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| /** |
| * @fileoverview |
| * some functions for browser-side pretty printing of code contained in html. |
| * <p> |
| * |
| * For a fairly comprehensive set of languages see the |
| * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> |
| * file that came with this source. At a minimum, the lexer should work on a |
| * number of languages including C and friends, Java, Python, Bash, SQL, HTML, |
| * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk |
| * and a subset of Perl, but, because of commenting conventions, doesn't work on |
| * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. |
| * <p> |
| * Usage: <ol> |
| * <li> include this source file in an html page via |
| * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} |
| * <li> define style rules. See the example page for examples. |
| * <li> mark the {@code <pre>} and {@code <code>} tags in your source with |
| * {@code class=prettyprint.} |
| * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty |
| * printer needs to do more substantial DOM manipulations to support that, so |
| * some css styles may not be preserved. |
| * </ol> |
| * That's it. I wanted to keep the API as simple as possible, so there's no |
| * need to specify which language the code is in, but if you wish, you can add |
| * another class to the {@code <pre>} or {@code <code>} element to specify the |
| * language, as in {@code <pre class="prettyprint lang-java">}. Any class that |
| * starts with "lang-" followed by a file extension, specifies the file type. |
| * See the "lang-*.js" files in this directory for code that implements |
| * per-language file handlers. |
| * <p> |
| * Change log:<br> |
| * cbeust, 2006/08/22 |
| * <blockquote> |
| * Java annotations (start with "@") are now captured as literals ("lit") |
| * </blockquote> |
| * @requires console |
| * @overrides window |
| */ |
| |
| // JSLint declarations |
| /*global console, document, navigator, setTimeout, window */ |
| |
| /** |
| * Split {@code prettyPrint} into multiple timeouts so as not to interfere with |
| * UI events. |
| * If set to {@code false}, {@code prettyPrint()} is synchronous. |
| */ |
| window['PR_SHOULD_USE_CONTINUATION'] = true; |
| |
| /** the number of characters between tab columns */ |
| window['PR_TAB_WIDTH'] = 8; |
| |
| /** Walks the DOM returning a properly escaped version of innerHTML. |
| * @param {Node} node |
| * @param {Array.<string>} out output buffer that receives chunks of HTML. |
| */ |
| window['PR_normalizedHtml'] |
| |
| /** Contains functions for creating and registering new language handlers. |
| * @type {Object} |
| */ |
| = window['PR'] |
| |
| /** Pretty print a chunk of code. |
| * |
| * @param {string} sourceCodeHtml code as html |
| * @return {string} code as html, but prettier |
| */ |
| = window['prettyPrintOne'] |
| /** Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
| * {@code class=prettyprint} and prettify them. |
| * @param {Function?} opt_whenDone if specified, called when the last entry |
| * has been finished. |
| */ |
| = window['prettyPrint'] = void 0; |
| |
| /** browser detection. @extern @returns false if not IE, otherwise the major version. */ |
| window['_pr_isIE6'] = function () { |
| var ieVersion = navigator && navigator.userAgent && |
| navigator.userAgent.match(/\bMSIE ([678])\./); |
| ieVersion = ieVersion ? +ieVersion[1] : false; |
| window['_pr_isIE6'] = function () { return ieVersion; }; |
| return ieVersion; |
| }; |
| |
| |
| (function () { |
| // Keyword lists for various languages. |
| var FLOW_CONTROL_KEYWORDS = |
| "break continue do else for if return while "; |
| var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + |
| "double enum extern float goto int long register short signed sizeof " + |
| "static struct switch typedef union unsigned void volatile "; |
| var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + |
| "new operator private protected public this throw true try typeof "; |
| var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + |
| "concept concept_map const_cast constexpr decltype " + |
| "dynamic_cast explicit export friend inline late_check " + |
| "mutable namespace nullptr reinterpret_cast static_assert static_cast " + |
| "template typeid typename using virtual wchar_t where "; |
| var JAVA_KEYWORDS = COMMON_KEYWORDS + |
| "abstract boolean byte extends final finally implements import " + |
| "instanceof null native package strictfp super synchronized throws " + |
| "transient "; |
| var CSHARP_KEYWORDS = JAVA_KEYWORDS + |
| "as base by checked decimal delegate descending event " + |
| "fixed foreach from group implicit in interface internal into is lock " + |
| "object out override orderby params partial readonly ref sbyte sealed " + |
| "stackalloc string select uint ulong unchecked unsafe ushort var "; |
| var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + |
| "debugger eval export function get null set undefined var with " + |
| "Infinity NaN "; |
| var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + |
| "goto if import last local my next no our print package redo require " + |
| "sub undef unless until use wantarray while BEGIN END "; |
| var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + |
| "elif except exec finally from global import in is lambda " + |
| "nonlocal not or pass print raise try with yield " + |
| "False True None "; |
| var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + |
| " defined elsif end ensure false in module next nil not or redo rescue " + |
| "retry self super then true undef unless until when yield BEGIN END "; |
| var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + |
| "function in local set then until "; |
| var ALL_KEYWORDS = ( |
| CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + |
| PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); |
| |
| // token style names. correspond to css classes |
| /** token style for a string literal */ |
| var PR_STRING = 'str'; |
| /** token style for a keyword */ |
| var PR_KEYWORD = 'kwd'; |
| /** token style for a comment */ |
| var PR_COMMENT = 'com'; |
| /** token style for a type */ |
| var PR_TYPE = 'typ'; |
| /** token style for a literal value. e.g. 1, null, true. */ |
| var PR_LITERAL = 'lit'; |
| /** token style for a punctuation string. */ |
| var PR_PUNCTUATION = 'pun'; |
| /** token style for a punctuation string. */ |
| var PR_PLAIN = 'pln'; |
| |
| /** token style for an sgml tag. */ |
| var PR_TAG = 'tag'; |
| /** token style for a markup declaration such as a DOCTYPE. */ |
| var PR_DECLARATION = 'dec'; |
| /** token style for embedded source. */ |
| var PR_SOURCE = 'src'; |
| /** token style for an sgml attribute name. */ |
| var PR_ATTRIB_NAME = 'atn'; |
| /** token style for an sgml attribute value. */ |
| var PR_ATTRIB_VALUE = 'atv'; |
| |
| /** |
| * A class that indicates a section of markup that is not code, e.g. to allow |
| * embedding of line numbers within code listings. |
| */ |
| var PR_NOCODE = 'nocode'; |
| |
| /** A set of tokens that can precede a regular expression literal in |
| * javascript. |
| * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full |
| * list, but I've removed ones that might be problematic when seen in |
| * languages that don't support regular expression literals. |
| * |
| * <p>Specifically, I've removed any keywords that can't precede a regexp |
| * literal in a syntactically legal javascript program, and I've removed the |
| * "in" keyword since it's not a keyword in many languages, and might be used |
| * as a count of inches. |
| * |
| * <p>The link a above does not accurately describe EcmaScript rules since |
| * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works |
| * very well in practice. |
| * |
| * @private |
| */ |
| var REGEXP_PRECEDER_PATTERN = function () { |
| var preceders = [ |
| "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", |
| "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", |
| "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", |
| "<", "<<", "<<=", "<=", "=", "==", "===", ">", |
| ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", |
| "^", "^=", "^^", "^^=", "{", "|", "|=", "||", |
| "||=", "~" /* handles =~ and !~ */, |
| "break", "case", "continue", "delete", |
| "do", "else", "finally", "instanceof", |
| "return", "throw", "try", "typeof" |
| ]; |
| var pattern = '(?:^^|[+-]'; |
| for (var i = 0; i < preceders.length; ++i) { |
| pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); |
| } |
| pattern += ')\\s*'; // matches at end, and matches empty string |
| return pattern; |
| // CAVEAT: this does not properly handle the case where a regular |
| // expression immediately follows another since a regular expression may |
| // have flags for case-sensitivity and the like. Having regexp tokens |
| // adjacent is not valid in any language I'm aware of, so I'm punting. |
| // TODO: maybe style special characters inside a regexp as punctuation. |
| }(); |
| |
| // Define regexps here so that the interpreter doesn't have to create an |
| // object each time the function containing them is called. |
| // The language spec requires a new object created even if you don't access |
| // the $1 members. |
| var pr_amp = /&/g; |
| var pr_lt = /</g; |
| var pr_gt = />/g; |
| var pr_quot = /\"/g; |
| /** like textToHtml but escapes double quotes to be attribute safe. */ |
| function attribToHtml(str) { |
| return str.replace(pr_amp, '&') |
| .replace(pr_lt, '<') |
| .replace(pr_gt, '>') |
| .replace(pr_quot, '"'); |
| } |
| |
| /** escapest html special characters to html. */ |
| function textToHtml(str) { |
| return str.replace(pr_amp, '&') |
| .replace(pr_lt, '<') |
| .replace(pr_gt, '>'); |
| } |
| |
| |
| var pr_ltEnt = /</g; |
| var pr_gtEnt = />/g; |
| var pr_aposEnt = /'/g; |
| var pr_quotEnt = /"/g; |
| var pr_ampEnt = /&/g; |
| var pr_nbspEnt = / /g; |
| /** unescapes html to plain text. */ |
| function htmlToText(html) { |
| var pos = html.indexOf('&'); |
| if (pos < 0) { return html; } |
| // Handle numeric entities specially. We can't use functional substitution |
| // since that doesn't work in older versions of Safari. |
| // These should be rare since most browsers convert them to normal chars. |
| for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { |
| var end = html.indexOf(';', pos); |
| if (end >= 0) { |
| var num = html.substring(pos + 3, end); |
| var radix = 10; |
| if (num && num.charAt(0) === 'x') { |
| num = num.substring(1); |
| radix = 16; |
| } |
| var codePoint = parseInt(num, radix); |
| if (!isNaN(codePoint)) { |
| html = (html.substring(0, pos) + String.fromCharCode(codePoint) + |
| html.substring(end + 1)); |
| } |
| } |
| } |
| |
| return html.replace(pr_ltEnt, '<') |
| .replace(pr_gtEnt, '>') |
| .replace(pr_aposEnt, "'") |
| .replace(pr_quotEnt, '"') |
| .replace(pr_nbspEnt, ' ') |
| .replace(pr_ampEnt, '&'); |
| } |
| |
| /** is the given node's innerHTML normally unescaped? */ |
| function isRawContent(node) { |
| return 'XMP' === node.tagName; |
| } |
| |
| var newlineRe = /[\r\n]/g; |
| /** |
| * Are newlines and adjacent spaces significant in the given node's innerHTML? |
| */ |
| function isPreformatted(node, content) { |
| // PRE means preformatted, and is a very common case, so don't create |
| // unnecessary computed style objects. |
| if ('PRE' === node.tagName) { return true; } |
| if (!newlineRe.test(content)) { return true; } // Don't care |
| var whitespace = ''; |
| // For disconnected nodes, IE has no currentStyle. |
| if (node.currentStyle) { |
| whitespace = node.currentStyle.whiteSpace; |
| } else if (window.getComputedStyle) { |
| // Firefox makes a best guess if node is disconnected whereas Safari |
| // returns the empty string. |
| whitespace = window.getComputedStyle(node, null).whiteSpace; |
| } |
| return !whitespace || whitespace === 'pre'; |
| } |
| |
| function normalizedHtml(node, out) { |
| switch (node.nodeType) { |
| case 1: // an element |
| var name = node.tagName.toLowerCase(); |
| out.push('<', name); |
| for (var i = 0; i < node.attributes.length; ++i) { |
| var attr = node.attributes[i]; |
| if (!attr.specified) { continue; } |
| out.push(' '); |
| normalizedHtml(attr, out); |
| } |
| out.push('>'); |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| normalizedHtml(child, out); |
| } |
| if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { |
| out.push('<\/', name, '>'); |
| } |
| break; |
| case 2: // an attribute |
| out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"'); |
| break; |
| case 3: case 4: // text |
| out.push(textToHtml(node.nodeValue)); |
| break; |
| } |
| } |
| |
| /** |
| * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally |
| * matches the union o the sets o strings matched d by the input RegExp. |
| * Since it matches globally, if the input strings have a start-of-input |
| * anchor (/^.../), it is ignored for the purposes of unioning. |
| * @param {Array.<RegExp>} regexs non multiline, non-global regexs. |
| * @return {RegExp} a global regex. |
| */ |
| function combinePrefixPatterns(regexs) { |
| var capturedGroupIndex = 0; |
| |
| var needToFoldCase = false; |
| var ignoreCase = false; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.ignoreCase) { |
| ignoreCase = true; |
| } else if (/[a-z]/i.test(regex.source.replace( |
| /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { |
| needToFoldCase = true; |
| ignoreCase = false; |
| break; |
| } |
| } |
| |
| function decodeEscape(charsetPart) { |
| if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } |
| switch (charsetPart.charAt(1)) { |
| case 'b': return 8; |
| case 't': return 9; |
| case 'n': return 0xa; |
| case 'v': return 0xb; |
| case 'f': return 0xc; |
| case 'r': return 0xd; |
| case 'u': case 'x': |
| return parseInt(charsetPart.substring(2), 16) |
| || charsetPart.charCodeAt(1); |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': |
| return parseInt(charsetPart.substring(1), 8); |
| default: return charsetPart.charCodeAt(1); |
| } |
| } |
| |
| function encodeEscape(charCode) { |
| if (charCode < 0x20) { |
| return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); |
| } |
| var ch = String.fromCharCode(charCode); |
| if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { |
| ch = '\\' + ch; |
| } |
| return ch; |
| } |
| |
| function caseFoldCharset(charSet) { |
| var charsetParts = charSet.substring(1, charSet.length - 1).match( |
| new RegExp( |
| '\\\\u[0-9A-Fa-f]{4}' |
| + '|\\\\x[0-9A-Fa-f]{2}' |
| + '|\\\\[0-3][0-7]{0,2}' |
| + '|\\\\[0-7]{1,2}' |
| + '|\\\\[\\s\\S]' |
| + '|-' |
| + '|[^-\\\\]', |
| 'g')); |
| var groups = []; |
| var ranges = []; |
| var inverse = charsetParts[0] === '^'; |
| for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { |
| var p = charsetParts[i]; |
| switch (p) { |
| case '\\B': case '\\b': |
| case '\\D': case '\\d': |
| case '\\S': case '\\s': |
| case '\\W': case '\\w': |
| groups.push(p); |
| continue; |
| } |
| var start = decodeEscape(p); |
| var end; |
| if (i + 2 < n && '-' === charsetParts[i + 1]) { |
| end = decodeEscape(charsetParts[i + 2]); |
| i += 2; |
| } else { |
| end = start; |
| } |
| ranges.push([start, end]); |
| // If the range might intersect letters, then expand it. |
| if (!(end < 65 || start > 122)) { |
| if (!(end < 65 || start > 90)) { |
| ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); |
| } |
| if (!(end < 97 || start > 122)) { |
| ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); |
| } |
| } |
| } |
| |
| // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] |
| // -> [[1, 12], [14, 14], [16, 17]] |
| ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); |
| var consolidatedRanges = []; |
| var lastRange = [NaN, NaN]; |
| for (var i = 0; i < ranges.length; ++i) { |
| var range = ranges[i]; |
| if (range[0] <= lastRange[1] + 1) { |
| lastRange[1] = Math.max(lastRange[1], range[1]); |
| } else { |
| consolidatedRanges.push(lastRange = range); |
| } |
| } |
| |
| var out = ['[']; |
| if (inverse) { out.push('^'); } |
| out.push.apply(out, groups); |
| for (var i = 0; i < consolidatedRanges.length; ++i) { |
| var range = consolidatedRanges[i]; |
| out.push(encodeEscape(range[0])); |
| if (range[1] > range[0]) { |
| if (range[1] + 1 > range[0]) { out.push('-'); } |
| out.push(encodeEscape(range[1])); |
| } |
| } |
| out.push(']'); |
| return out.join(''); |
| } |
| |
| function allowAnywhereFoldCaseAndRenumberGroups(regex) { |
| // Split into character sets, escape sequences, punctuation strings |
| // like ('(', '(?:', ')', '^'), and runs of characters that do not |
| // include any of the above. |
| var parts = regex.source.match( |
| new RegExp( |
| '(?:' |
| + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set |
| + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape |
| + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape |
| + '|\\\\[0-9]+' // a back-reference or octal escape |
| + '|\\\\[^ux0-9]' // other escape sequence |
| + '|\\(\\?[:!=]' // start of a non-capturing group |
| + '|[\\(\\)\\^]' // start/emd of a group, or line start |
| + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters |
| + ')', |
| 'g')); |
| var n = parts.length; |
| |
| // Maps captured group numbers to the number they will occupy in |
| // the output or to -1 if that has not been determined, or to |
| // undefined if they need not be capturing in the output. |
| var capturedGroups = []; |
| |
| // Walk over and identify back references to build the capturedGroups |
| // mapping. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| // groups are 1-indexed, so max group index is count of '(' |
| ++groupIndex; |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| capturedGroups[decimalValue] = -1; |
| } |
| } |
| } |
| |
| // Renumber groups and reduce capturing groups to non-capturing groups |
| // where possible. |
| for (var i = 1; i < capturedGroups.length; ++i) { |
| if (-1 === capturedGroups[i]) { |
| capturedGroups[i] = ++capturedGroupIndex; |
| } |
| } |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| ++groupIndex; |
| if (capturedGroups[groupIndex] === undefined) { |
| parts[i] = '(?:'; |
| } |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| parts[i] = '\\' + capturedGroups[groupIndex]; |
| } |
| } |
| } |
| |
| // Remove any prefix anchors so that the output will match anywhere. |
| // ^^ really does mean an anchored match though. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } |
| } |
| |
| // Expand letters to groupts to handle mixing of case-sensitive and |
| // case-insensitive patterns if necessary. |
| if (regex.ignoreCase && needToFoldCase) { |
| for (var i = 0; i < n; ++i) { |
| var p = parts[i]; |
| var ch0 = p.charAt(0); |
| if (p.length >= 2 && ch0 === '[') { |
| parts[i] = caseFoldCharset(p); |
| } else if (ch0 !== '\\') { |
| // TODO: handle letters in numeric escapes. |
| parts[i] = p.replace( |
| /[a-zA-Z]/g, |
| function (ch) { |
| var cc = ch.charCodeAt(0); |
| return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; |
| }); |
| } |
| } |
| } |
| |
| return parts.join(''); |
| } |
| |
| var rewritten = []; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.global || regex.multiline) { throw new Error('' + regex); } |
| rewritten.push( |
| '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); |
| } |
| |
| return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); |
| } |
| |
| var PR_innerHtmlWorks = null; |
| function getInnerHtml(node) { |
| // inner html is hopelessly broken in Safari 2.0.4 when the content is |
| // an html description of well formed XML and the containing tag is a PRE |
| // tag, so we detect that case and emulate innerHTML. |
| if (null === PR_innerHtmlWorks) { |
| var testNode = document.createElement('PRE'); |
| testNode.appendChild( |
| document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); |
| PR_innerHtmlWorks = !/</.test(testNode.innerHTML); |
| } |
| |
| if (PR_innerHtmlWorks) { |
| var content = node.innerHTML; |
| // XMP tags contain unescaped entities so require special handling. |
| if (isRawContent(node)) { |
| content = textToHtml(content); |
| } else if (!isPreformatted(node, content)) { |
| content = content.replace(/(<br\s*\/?>)[\r\n]+/g, '$1') |
| .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); |
| } |
| return content; |
| } |
| |
| var out = []; |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| normalizedHtml(child, out); |
| } |
| return out.join(''); |
| } |
| |
| /** returns a function that expand tabs to spaces. This function can be fed |
| * successive chunks of text, and will maintain its own internal state to |
| * keep track of how tabs are expanded. |
| * @return {function (string) : string} a function that takes |
| * plain text and return the text with tabs expanded. |
| * @private |
| */ |
| function makeTabExpander(tabWidth) { |
| var SPACES = ' '; |
| var charInLine = 0; |
| |
| return function (plainText) { |
| // walk over each character looking for tabs and newlines. |
| // On tabs, expand them. On newlines, reset charInLine. |
| // Otherwise increment charInLine |
| var out = null; |
| var pos = 0; |
| for (var i = 0, n = plainText.length; i < n; ++i) { |
| var ch = plainText.charAt(i); |
| |
| switch (ch) { |
| case '\t': |
| if (!out) { out = []; } |
| out.push(plainText.substring(pos, i)); |
| // calculate how much space we need in front of this part |
| // nSpaces is the amount of padding -- the number of spaces needed |
| // to move us to the next column, where columns occur at factors of |
| // tabWidth. |
| var nSpaces = tabWidth - (charInLine % tabWidth); |
| charInLine += nSpaces; |
| for (; nSpaces >= 0; nSpaces -= SPACES.length) { |
| out.push(SPACES.substring(0, nSpaces)); |
| } |
| pos = i + 1; |
| break; |
| case '\n': |
| charInLine = 0; |
| break; |
| default: |
| ++charInLine; |
| } |
| } |
| if (!out) { return plainText; } |
| out.push(plainText.substring(pos)); |
| return out.join(''); |
| }; |
| } |
| |
| var pr_chunkPattern = new RegExp( |
| '[^<]+' // A run of characters other than '<' |
| + '|<\!--[\\s\\S]*?--\>' // an HTML comment |
| + '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' // a CDATA section |
| // a probable tag that should not be highlighted |
| + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' |
| + '|<', // A '<' that does not begin a larger chunk |
| 'g'); |
| var pr_commentPrefix = /^<\!--/; |
| var pr_cdataPrefix = /^<!\[CDATA\[/; |
| var pr_brPrefix = /^<br\b/i; |
| var pr_tagNameRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)/; |
| |
| /** split markup into chunks of html tags (style null) and |
| * plain text (style {@link #PR_PLAIN}), converting tags which are |
| * significant for tokenization (<br>) into their textual equivalent. |
| * |
| * @param {string} s html where whitespace is considered significant. |
| * @return {Object} source code and extracted tags. |
| * @private |
| */ |
| function extractTags(s) { |
| // since the pattern has the 'g' modifier and defines no capturing groups, |
| // this will return a list of all chunks which we then classify and wrap as |
| // PR_Tokens |
| var matches = s.match(pr_chunkPattern); |
| var sourceBuf = []; |
| var sourceBufLen = 0; |
| var extractedTags = []; |
| if (matches) { |
| for (var i = 0, n = matches.length; i < n; ++i) { |
| var match = matches[i]; |
| if (match.length > 1 && match.charAt(0) === '<') { |
| if (pr_commentPrefix.test(match)) { continue; } |
| if (pr_cdataPrefix.test(match)) { |
| // strip CDATA prefix and suffix. Don't unescape since it's CDATA |
| sourceBuf.push(match.substring(9, match.length - 3)); |
| sourceBufLen += match.length - 12; |
| } else if (pr_brPrefix.test(match)) { |
| // <br> tags are lexically significant so convert them to text. |
| // This is undone later. |
| sourceBuf.push('\n'); |
| ++sourceBufLen; |
| } else { |
| if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { |
| // A <span class="nocode"> will start a section that should be |
| // ignored. Continue walking the list until we see a matching end |
| // tag. |
| var name = match.match(pr_tagNameRe)[2]; |
| var depth = 1; |
| var j; |
| end_tag_loop: |
| for (j = i + 1; j < n; ++j) { |
| var name2 = matches[j].match(pr_tagNameRe); |
| if (name2 && name2[2] === name) { |
| if (name2[1] === '/') { |
| if (--depth === 0) { break end_tag_loop; } |
| } else { |
| ++depth; |
| } |
| } |
| } |
| if (j < n) { |
| extractedTags.push( |
| sourceBufLen, matches.slice(i, j + 1).join('')); |
| i = j; |
| } else { // Ignore unclosed sections. |
| extractedTags.push(sourceBufLen, match); |
| } |
| } else { |
| extractedTags.push(sourceBufLen, match); |
| } |
| } |
| } else { |
| var literalText = htmlToText(match); |
| sourceBuf.push(literalText); |
| sourceBufLen += literalText.length; |
| } |
| } |
| } |
| return { source: sourceBuf.join(''), tags: extractedTags }; |
| } |
| |
| /** True if the given tag contains a class attribute with the nocode class. */ |
| function isNoCodeTag(tag) { |
| return !!tag |
| // First canonicalize the representation of attributes |
| .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, |
| ' $1="$2$3$4"') |
| // Then look for the attribute we want. |
| .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); |
| } |
| |
| /** |
| * Apply the given language handler to sourceCode and add the resulting |
| * decorations to out. |
| * @param {number} basePos the index of sourceCode within the chunk of source |
| * whose decorations are already present on out. |
| */ |
| function appendDecorations(basePos, sourceCode, langHandler, out) { |
| if (!sourceCode) { return; } |
| var job = { |
| source: sourceCode, |
| basePos: basePos |
| }; |
| langHandler(job); |
| out.push.apply(out, job.decorations); |
| } |
| |
| /** Given triples of [style, pattern, context] returns a lexing function, |
| * The lexing function interprets the patterns to find token boundaries and |
| * returns a decoration list of the form |
| * [index_0, style_0, index_1, style_1, ..., index_n, style_n] |
| * where index_n is an index into the sourceCode, and style_n is a style |
| * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to |
| * all characters in sourceCode[index_n-1:index_n]. |
| * |
| * The stylePatterns is a list whose elements have the form |
| * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. |
| * |
| * Style is a style constant like PR_PLAIN, or can be a string of the |
| * form 'lang-FOO', where FOO is a language extension describing the |
| * language of the portion of the token in $1 after pattern executes. |
| * E.g., if style is 'lang-lisp', and group 1 contains the text |
| * '(hello (world))', then that portion of the token will be passed to the |
| * registered lisp handler for formatting. |
| * The text before and after group 1 will be restyled using this decorator |
| * so decorators should take care that this doesn't result in infinite |
| * recursion. For example, the HTML lexer rule for SCRIPT elements looks |
| * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match |
| * '<script>foo()<\/script>', which would cause the current decorator to |
| * be called with '<script>' which would not match the same rule since |
| * group 1 must not be empty, so it would be instead styled as PR_TAG by |
| * the generic tag rule. The handler registered for the 'js' extension would |
| * then be called with 'foo()', and finally, the current decorator would |
| * be called with '<\/script>' which would not match the original rule and |
| * so the generic tag rule would identify it as a tag. |
| * |
| * Pattern must only match prefixes, and if it matches a prefix, then that |
| * match is considered a token with the same style. |
| * |
| * Context is applied to the last non-whitespace, non-comment token |
| * recognized. |
| * |
| * Shortcut is an optional string of characters, any of which, if the first |
| * character, gurantee that this pattern and only this pattern matches. |
| * |
| * @param {Array} shortcutStylePatterns patterns that always start with |
| * a known character. Must have a shortcut string. |
| * @param {Array} fallthroughStylePatterns patterns that will be tried in |
| * order if the shortcut ones fail. May have shortcuts. |
| * |
| * @return {function (Object)} a |
| * function that takes source code and returns a list of decorations. |
| */ |
| function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { |
| var shortcuts = {}; |
| var tokenizer; |
| (function () { |
| var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); |
| var allRegexs = []; |
| var regexKeys = {}; |
| for (var i = 0, n = allPatterns.length; i < n; ++i) { |
| var patternParts = allPatterns[i]; |
| var shortcutChars = patternParts[3]; |
| if (shortcutChars) { |
| for (var c = shortcutChars.length; --c >= 0;) { |
| shortcuts[shortcutChars.charAt(c)] = patternParts; |
| } |
| } |
| var regex = patternParts[1]; |
| var k = '' + regex; |
| if (!regexKeys.hasOwnProperty(k)) { |
| allRegexs.push(regex); |
| regexKeys[k] = null; |
| } |
| } |
| allRegexs.push(/[\0-\uffff]/); |
| tokenizer = combinePrefixPatterns(allRegexs); |
| })(); |
| |
| var nPatterns = fallthroughStylePatterns.length; |
| var notWs = /\S/; |
| |
| /** |
| * Lexes job.source and produces an output array job.decorations of style |
| * classes preceded by the position at which they start in job.source in |
| * order. |
| * |
| * @param {Object} job an object like {@code |
| * source: {string} sourceText plain text, |
| * basePos: {int} position of job.source in the larger chunk of |
| * sourceCode. |
| * } |
| */ |
| var decorate = function (job) { |
| var sourceCode = job.source, basePos = job.basePos; |
| /** Even entries are positions in source in ascending order. Odd enties |
| * are style markers (e.g., PR_COMMENT) that run from that position until |
| * the end. |
| * @type {Array.<number|string>} |
| */ |
| var decorations = [basePos, PR_PLAIN]; |
| var pos = 0; // index into sourceCode |
| var tokens = sourceCode.match(tokenizer) || []; |
| var styleCache = {}; |
| |
| for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { |
| var token = tokens[ti]; |
| var style = styleCache[token]; |
| var match = void 0; |
| |
| var isEmbedded; |
| if (typeof style === 'string') { |
| isEmbedded = false; |
| } else { |
| var patternParts = shortcuts[token.charAt(0)]; |
| if (patternParts) { |
| match = token.match(patternParts[1]); |
| style = patternParts[0]; |
| } else { |
| for (var i = 0; i < nPatterns; ++i) { |
| patternParts = fallthroughStylePatterns[i]; |
| match = token.match(patternParts[1]); |
| if (match) { |
| style = patternParts[0]; |
| break; |
| } |
| } |
| |
| if (!match) { // make sure that we make progress |
| style = PR_PLAIN; |
| } |
| } |
| |
| isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); |
| if (isEmbedded && !(match && typeof match[1] === 'string')) { |
| isEmbedded = false; |
| style = PR_SOURCE; |
| } |
| |
| if (!isEmbedded) { styleCache[token] = style; } |
| } |
| |
| var tokenStart = pos; |
| pos += token.length; |
| |
| if (!isEmbedded) { |
| decorations.push(basePos + tokenStart, style); |
| } else { // Treat group 1 as an embedded block of source code. |
| var embeddedSource = match[1]; |
| var embeddedSourceStart = token.indexOf(embeddedSource); |
| var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; |
| if (match[2]) { |
| // If embeddedSource can be blank, then it would match at the |
| // beginning which would cause us to infinitely recurse on the |
| // entire token, so we catch the right context in match[2]. |
| embeddedSourceEnd = token.length - match[2].length; |
| embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; |
| } |
| var lang = style.substring(5); |
| // Decorate the left of the embedded source |
| appendDecorations( |
| basePos + tokenStart, |
| token.substring(0, embeddedSourceStart), |
| decorate, decorations); |
| // Decorate the embedded source |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceStart, |
| embeddedSource, |
| langHandlerForExtension(lang, embeddedSource), |
| decorations); |
| // Decorate the right of the embedded section |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceEnd, |
| token.substring(embeddedSourceEnd), |
| decorate, decorations); |
| } |
| } |
| job.decorations = decorations; |
| }; |
| return decorate; |
| } |
| |
| /** returns a function that produces a list of decorations from source text. |
| * |
| * This code treats ", ', and ` as string delimiters, and \ as a string |
| * escape. It does not recognize perl's qq() style strings. |
| * It has no special handling for double delimiter escapes as in basic, or |
| * the tripled delimiters used in python, but should work on those regardless |
| * although in those cases a single string literal may be broken up into |
| * multiple adjacent string literals. |
| * |
| * It recognizes C, C++, and shell style comments. |
| * |
| * @param {Object} options a set of optional parameters. |
| * @return {function (Object)} a function that examines the source code |
| * in the input job and builds the decoration list. |
| */ |
| function sourceDecorator(options) { |
| var shortcutStylePatterns = [], fallthroughStylePatterns = []; |
| if (options['tripleQuotedStrings']) { |
| // '''multi-line-string''', 'single-line-string', and double-quoted |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, |
| null, '\'"']); |
| } else if (options['multiLineStrings']) { |
| // 'multi-line-string', "multi-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, |
| null, '\'"`']); |
| } else { |
| // 'single-line-string', "single-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, |
| /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, |
| null, '"\'']); |
| } |
| if (options['verbatimStrings']) { |
| // verbatim-string-literal production from the C# grammar. See issue 93. |
| fallthroughStylePatterns.push( |
| [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); |
| } |
| if (options['hashComments']) { |
| if (options['cStyleComments']) { |
| // Stop C preprocessor declarations at an unclosed open comment |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, |
| null, '#']); |
| fallthroughStylePatterns.push( |
| [PR_STRING, |
| /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, |
| null]); |
| } else { |
| shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); |
| } |
| } |
| if (options['cStyleComments']) { |
| fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); |
| fallthroughStylePatterns.push( |
| [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); |
| } |
| if (options['regexLiterals']) { |
| var REGEX_LITERAL = ( |
| // A regular expression literal starts with a slash that is |
| // not followed by * or / so that it is not confused with |
| // comments. |
| '/(?=[^/*])' |
| // and then contains any number of raw characters, |
| + '(?:[^/\\x5B\\x5C]' |
| // escape sequences (\x5C), |
| + '|\\x5C[\\s\\S]' |
| // or non-nesting character sets (\x5B\x5D); |
| + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' |
| // finally closed by a /. |
| + '/'); |
| fallthroughStylePatterns.push( |
| ['lang-regex', |
| new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') |
| ]); |
| } |
| |
| var keywords = options['keywords'].replace(/^\s+|\s+$/g, ''); |
| if (keywords.length) { |
| fallthroughStylePatterns.push( |
| [PR_KEYWORD, |
| new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]); |
| } |
| |
| shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); |
| fallthroughStylePatterns.push( |
| // TODO(mikesamuel): recognize non-latin letters and numerals in idents |
| [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null], |
| [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_LITERAL, |
| new RegExp( |
| '^(?:' |
| // A hex number |
| + '0x[a-f0-9]+' |
| // or an octal or decimal number, |
| + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' |
| // possibly in scientific notation |
| + '(?:e[+\\-]?\\d+)?' |
| + ')' |
| // with an optional modifier like UL for unsigned long |
| + '[a-z]*', 'i'), |
| null, '0123456789'], |
| [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#]*/, null]); |
| |
| return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); |
| } |
| |
| var decorateSource = sourceDecorator({ |
| 'keywords': ALL_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }); |
| |
| /** Breaks {@code job.source} around style boundaries in |
| * {@code job.decorations} while re-interleaving {@code job.extractedTags}, |
| * and leaves the result in {@code job.prettyPrintedHtml}. |
| * @param {Object} job like { |
| * source: {string} source as plain text, |
| * extractedTags: {Array.<number|string>} extractedTags chunks of raw |
| * html preceded by their position in {@code job.source} |
| * in order |
| * decorations: {Array.<number|string} an array of style classes preceded |
| * by the position at which they start in job.source in order |
| * } |
| * @private |
| */ |
| function recombineTagsAndDecorations(job) { |
| var sourceText = job.source; |
| var extractedTags = job.extractedTags; |
| var decorations = job.decorations; |
| |
| var html = []; |
| // index past the last char in sourceText written to html |
| var outputIdx = 0; |
| |
| var openDecoration = null; |
| var currentDecoration = null; |
| var tagPos = 0; // index into extractedTags |
| var decPos = 0; // index into decorations |
| var tabExpander = makeTabExpander(window['PR_TAB_WIDTH']); |
| |
| var adjacentSpaceRe = /([\r\n ]) /g; |
| var startOrSpaceRe = /(^| ) /gm; |
| var newlineRe = /\r\n?|\n/g; |
| var trailingSpaceRe = /[ \r\n]$/; |
| var lastWasSpace = true; // the last text chunk emitted ended with a space. |
| |
| // A helper function that is responsible for opening sections of decoration |
| // and outputing properly escaped chunks of source |
| function emitTextUpTo(sourceIdx) { |
| if (sourceIdx > outputIdx) { |
| if (openDecoration && openDecoration !== currentDecoration) { |
| // Close the current decoration |
| html.push('</span>'); |
| openDecoration = null; |
| } |
| if (!openDecoration && currentDecoration) { |
| openDecoration = currentDecoration; |
| html.push('<span class="', openDecoration, '">'); |
| } |
| // This interacts badly with some wikis which introduces paragraph tags |
| // into pre blocks for some strange reason. |
| // It's necessary for IE though which seems to lose the preformattedness |
| // of <pre> tags when their innerHTML is assigned. |
| // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html |
| // and it serves to undo the conversion of <br>s to newlines done in |
| // chunkify. |
| var htmlChunk = textToHtml( |
| tabExpander(sourceText.substring(outputIdx, sourceIdx))) |
| .replace(lastWasSpace |
| ? startOrSpaceRe |
| : adjacentSpaceRe, '$1 '); |
| // Keep track of whether we need to escape space at the beginning of the |
| // next chunk. |
| lastWasSpace = trailingSpaceRe.test(htmlChunk); |
| // IE collapses multiple adjacient <br>s into 1 line break. |
| // Prefix every <br> with ' ' can prevent such IE's behavior. |
| var lineBreakHtml = window['_pr_isIE6']() ? ' <br />' : '<br />'; |
| html.push(htmlChunk.replace(newlineRe, lineBreakHtml)); |
| outputIdx = sourceIdx; |
| } |
| } |
| |
| while (true) { |
| // Determine if we're going to consume a tag this time around. Otherwise |
| // we consume a decoration or exit. |
| var outputTag; |
| if (tagPos < extractedTags.length) { |
| if (decPos < decorations.length) { |
| // Pick one giving preference to extractedTags since we shouldn't open |
| // a new style that we're going to have to immediately close in order |
| // to output a tag. |
| outputTag = extractedTags[tagPos] <= decorations[decPos]; |
| } else { |
| outputTag = true; |
| } |
| } else { |
| outputTag = false; |
| } |
| // Consume either a decoration or a tag or exit. |
| if (outputTag) { |
| emitTextUpTo(extractedTags[tagPos]); |
| if (openDecoration) { |
| // Close the current decoration |
| html.push('</span>'); |
| openDecoration = null; |
| } |
| html.push(extractedTags[tagPos + 1]); |
| tagPos += 2; |
| } else if (decPos < decorations.length) { |
| emitTextUpTo(decorations[decPos]); |
| currentDecoration = decorations[decPos + 1]; |
| decPos += 2; |
| } else { |
| break; |
| } |
| } |
| emitTextUpTo(sourceText.length); |
| if (openDecoration) { |
| html.push('</span>'); |
| } |
| job.prettyPrintedHtml = html.join(''); |
| } |
| |
| /** Maps language-specific file extensions to handlers. */ |
| var langHandlerRegistry = {}; |
| /** Register a language handler for the given file extensions. |
| * @param {function (Object)} handler a function from source code to a list |
| * of decorations. Takes a single argument job which describes the |
| * state of the computation. The single parameter has the form |
| * {@code { |
| * source: {string} as plain text. |
| * decorations: {Array.<number|string>} an array of style classes |
| * preceded by the position at which they start in |
| * job.source in order. |
| * The language handler should assigned this field. |
| * basePos: {int} the position of source in the larger source chunk. |
| * All positions in the output decorations array are relative |
| * to the larger source chunk. |
| * } } |
| * @param {Array.<string>} fileExtensions |
| */ |
| function registerLangHandler(handler, fileExtensions) { |
| for (var i = fileExtensions.length; --i >= 0;) { |
| var ext = fileExtensions[i]; |
| if (!langHandlerRegistry.hasOwnProperty(ext)) { |
| langHandlerRegistry[ext] = handler; |
| } else if ('console' in window) { |
| console.warn('cannot override language handler %s', ext); |
| } |
| } |
| } |
| function langHandlerForExtension(extension, source) { |
| if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { |
| // Treat it as markup if the first non whitespace character is a < and |
| // the last non-whitespace character is a >. |
| extension = /^\s*</.test(source) |
| ? 'default-markup' |
| : 'default-code'; |
| } |
| return langHandlerRegistry[extension]; |
| } |
| registerLangHandler(decorateSource, ['default-code']); |
| registerLangHandler( |
| createSimpleLexer( |
| [], |
| [ |
| [PR_PLAIN, /^[^<?]+/], |
| [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], |
| [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], |
| // Unescaped content in an unknown language |
| ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], |
| ['lang-', /^<%([\s\S]+?)(?:%>|$)/], |
| [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], |
| ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], |
| // Unescaped content in javascript. (Or possibly vbscript). |
| ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], |
| // Contains unescaped stylesheet content |
| ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], |
| ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] |
| ]), |
| ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); |
| registerLangHandler( |
| createSimpleLexer( |
| [ |
| [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], |
| [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] |
| ], |
| [ |
| [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], |
| [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], |
| ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], |
| [PR_PUNCTUATION, /^[=<>\/]+/], |
| ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], |
| ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], |
| ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], |
| ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], |
| ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], |
| ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] |
| ]), |
| ['in.tag']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CPP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true |
| }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': 'null true false' |
| }), ['json']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CSHARP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'verbatimStrings': true |
| }), ['cs']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JAVA_KEYWORDS, |
| 'cStyleComments': true |
| }), ['java']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': SH_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true |
| }), ['bsh', 'csh', 'sh']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PYTHON_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'tripleQuotedStrings': true |
| }), ['cv', 'py']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PERL_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['perl', 'pl', 'pm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': RUBY_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['rb']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JSCRIPT_KEYWORDS, |
| 'cStyleComments': true, |
| 'regexLiterals': true |
| }), ['js']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); |
| |
| function applyDecorator(job) { |
| var sourceCodeHtml = job.sourceCodeHtml; |
| var opt_langExtension = job.langExtension; |
| |
| // Prepopulate output in case processing fails with an exception. |
| job.prettyPrintedHtml = sourceCodeHtml; |
| |
| try { |
| // Extract tags, and convert the source code to plain text. |
| var sourceAndExtractedTags = extractTags(sourceCodeHtml); |
| /** Plain text. @type {string} */ |
| var source = sourceAndExtractedTags.source; |
| job.source = source; |
| job.basePos = 0; |
| |
| /** Even entries are positions in source in ascending order. Odd entries |
| * are tags that were extracted at that position. |
| * @type {Array.<number|string>} |
| */ |
| job.extractedTags = sourceAndExtractedTags.tags; |
| |
| // Apply the appropriate language handler |
| langHandlerForExtension(opt_langExtension, source)(job); |
| // Integrate the decorations and tags back into the source code to produce |
| // a decorated html string which is left in job.prettyPrintedHtml. |
| recombineTagsAndDecorations(job); |
| } catch (e) { |
| if ('console' in window) { |
| console.log(e); |
| console.trace(); |
| } |
| } |
| } |
| |
| function prettyPrintOne(sourceCodeHtml, opt_langExtension) { |
| var job = { |
| sourceCodeHtml: sourceCodeHtml, |
| langExtension: opt_langExtension |
| }; |
| applyDecorator(job); |
| return job.prettyPrintedHtml; |
| } |
| |
| function prettyPrint(opt_whenDone) { |
| var isIE678 = window['_pr_isIE6'](); |
| var ieNewline = isIE678 === 6 ? '\r\n' : '\r'; |
| // See bug 71 and http://stackoverflow.com/questions/136443/why-doesnt-ie7- |
| |
| // fetch a list of nodes to rewrite |
| var codeSegments = [ |
| document.getElementsByTagName('pre'), |
| document.getElementsByTagName('code'), |
| document.getElementsByTagName('xmp') ]; |
| var elements = []; |
| for (var i = 0; i < codeSegments.length; ++i) { |
| for (var j = 0, n = codeSegments[i].length; j < n; ++j) { |
| elements.push(codeSegments[i][j]); |
| } |
| } |
| codeSegments = null; |
| |
| var clock = Date; |
| if (!clock['now']) { |
| clock = { 'now': function () { return (new Date).getTime(); } }; |
| } |
| |
| // The loop is broken into a series of continuations to make sure that we |
| // don't make the browser unresponsive when rewriting a large page. |
| var k = 0; |
| var prettyPrintingJob; |
| |
| function doWork() { |
| var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? |
| clock.now() + 250 /* ms */ : |
| Infinity); |
| for (; k < elements.length && clock.now() < endTime; k++) { |
| var cs = elements[k]; |
| if (cs.className && cs.className.indexOf('prettyprint') >= 0) { |
| // If the classes includes a language extensions, use it. |
| // Language extensions can be specified like |
| // <pre class="prettyprint lang-cpp"> |
| // the language extension "cpp" is used to find a language handler as |
| // passed to PR_registerLangHandler. |
| var langExtension = cs.className.match(/\blang-(\w+)\b/); |
| if (langExtension) { langExtension = langExtension[1]; } |
| |
| // make sure this is not nested in an already prettified element |
| var nested = false; |
| for (var p = cs.parentNode; p; p = p.parentNode) { |
| if ((p.tagName === 'pre' || p.tagName === 'code' || |
| p.tagName === 'xmp') && |
| p.className && p.className.indexOf('prettyprint') >= 0) { |
| nested = true; |
| break; |
| } |
| } |
| if (!nested) { |
| // fetch the content as a snippet of properly escaped HTML. |
| // Firefox adds newlines at the end. |
| var content = getInnerHtml(cs); |
| content = content.replace(/(?:\r\n?|\n)$/, ''); |
| |
| // do the pretty printing |
| prettyPrintingJob = { |
| sourceCodeHtml: content, |
| langExtension: langExtension, |
| sourceNode: cs |
| }; |
| applyDecorator(prettyPrintingJob); |
| replaceWithPrettyPrintedHtml(); |
| } |
| } |
| } |
| if (k < elements.length) { |
| // finish up in a continuation |
| setTimeout(doWork, 250); |
| } else if (opt_whenDone) { |
| opt_whenDone(); |
| } |
| } |
| |
| function replaceWithPrettyPrintedHtml() { |
| var newContent = prettyPrintingJob.prettyPrintedHtml; |
| if (!newContent) { return; } |
| var cs = prettyPrintingJob.sourceNode; |
| |
| // push the prettified html back into the tag. |
| if (!isRawContent(cs)) { |
| // just replace the old html with the new |
| cs.innerHTML = newContent; |
| } else { |
| // we need to change the tag to a <pre> since <xmp>s do not allow |
| // embedded tags such as the span tags used to attach styles to |
| // sections of source code. |
| var pre = document.createElement('PRE'); |
| for (var i = 0; i < cs.attributes.length; ++i) { |
| var a = cs.attributes[i]; |
| if (a.specified) { |
| var aname = a.name.toLowerCase(); |
| if (aname === 'class') { |
| pre.className = a.value; // For IE 6 |
| } else { |
| pre.setAttribute(a.name, a.value); |
| } |
| } |
| } |
| pre.innerHTML = newContent; |
| |
| // remove the old |
| cs.parentNode.replaceChild(pre, cs); |
| cs = pre; |
| } |
| |
| // Replace <br>s with line-feeds so that copying and pasting works |
| // on IE 6. |
| // Doing this on other browsers breaks lots of stuff since \r\n is |
| // treated as two newlines on Firefox, and doing this also slows |
| // down rendering. |
| if (isIE678 && cs.tagName === 'PRE') { |
| var lineBreaks = cs.getElementsByTagName('br'); |
| for (var j = lineBreaks.length; --j >= 0;) { |
| var lineBreak = lineBreaks[j]; |
| lineBreak.parentNode.replaceChild( |
| document.createTextNode(ieNewline), lineBreak); |
| } |
| } |
| } |
| |
| doWork(); |
| } |
| |
| window['PR_normalizedHtml'] = normalizedHtml; |
| window['prettyPrintOne'] = prettyPrintOne; |
| window['prettyPrint'] = prettyPrint; |
| window['PR'] = { |
| 'combinePrefixPatterns': combinePrefixPatterns, |
| 'createSimpleLexer': createSimpleLexer, |
| 'registerLangHandler': registerLangHandler, |
| 'sourceDecorator': sourceDecorator, |
| 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, |
| 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, |
| 'PR_COMMENT': PR_COMMENT, |
| 'PR_DECLARATION': PR_DECLARATION, |
| 'PR_KEYWORD': PR_KEYWORD, |
| 'PR_LITERAL': PR_LITERAL, |
| 'PR_NOCODE': PR_NOCODE, |
| 'PR_PLAIN': PR_PLAIN, |
| 'PR_PUNCTUATION': PR_PUNCTUATION, |
| 'PR_SOURCE': PR_SOURCE, |
| 'PR_STRING': PR_STRING, |
| 'PR_TAG': PR_TAG, |
| 'PR_TYPE': PR_TYPE |
| }; |
| })(); |