/** | |
* @fileoverview | |
* some functions for browser-side pretty printing of code contained in html. | |
* <p> | |
* | |
* For a fairly comprehensive set of languages see the | |
* <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> | |
* file that came with this source. At a minimum, the lexer should work on a | |
* number of languages including C and friends, Java, Python, Bash, SQL, HTML, | |
* XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk | |
* and a subset of Perl, but, because of commenting conventions, doesn't work on | |
* Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. | |
* <p> | |
* Usage: <ol> | |
* <li> include this source file in an html page via | |
* {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} | |
* <li> define style rules. See the example page for examples. | |
* <li> mark the {@code <pre>} and {@code <code>} tags in your source with | |
* {@code class=prettyprint.} | |
* You can also use the (html deprecated) {@code <xmp>} tag, but the pretty | |
* printer needs to do more substantial DOM manipulations to support that, so | |
* some css styles may not be preserved. | |
* </ol> | |
* That's it. I wanted to keep the API as simple as possible, so there's no | |
* need to specify which language the code is in, but if you wish, you can add | |
* another class to the {@code <pre>} or {@code <code>} element to specify the | |
* language, as in {@code <pre class="prettyprint lang-java">}. Any class that | |
* starts with "lang-" followed by a file extension, specifies the file type. | |
* See the "lang-*.js" files in this directory for code that implements | |
* per-language file handlers. | |
* <p> | |
* Change log:<br> | |
* cbeust, 2006/08/22 | |
* <blockquote> | |
* Java annotations (start with "@") are now captured as literals ("lit") | |
* </blockquote> | |
* @requires console | |
*/ | |
// JSLint declarations | |
/*global console, document, navigator, setTimeout, window */ | |
/** | |
* Split {@code prettyPrint} into multiple timeouts so as not to interfere with | |
* UI events. | |
* If set to {@code false}, {@code prettyPrint()} is synchronous. | |
*/ | |
window['PR_SHOULD_USE_CONTINUATION'] = true; | |
/** the number of characters between tab columns */ | |
window['PR_TAB_WIDTH'] = 8; | |
/** Walks the DOM returning a properly escaped version of innerHTML. | |
* @param {Node} node | |
* @param {Array.<string>} out output buffer that receives chunks of HTML. | |
*/ | |
window['PR_normalizedHtml'] | |
/** Contains functions for creating and registering new language handlers. | |
* @type {Object} | |
*/ | |
= window['PR'] | |
/** Pretty print a chunk of code. | |
* | |
* @param {string} sourceCodeHtml code as html | |
* @return {string} code as html, but prettier | |
*/ | |
= window['prettyPrintOne'] | |
/** Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
* {@code class=prettyprint} and prettify them. | |
* @param {Function?} opt_whenDone if specified, called when the last entry | |
* has been finished. | |
*/ | |
= window['prettyPrint'] = void 0; | |
/** browser detection. @extern @returns false if not IE, otherwise the major version. */ | |
window['_pr_isIE6'] = function () { | |
var ieVersion = navigator && navigator.userAgent && | |
navigator.userAgent.match(/\bMSIE ([678])\./); | |
ieVersion = ieVersion ? +ieVersion[1] : false; | |
window['_pr_isIE6'] = function () { return ieVersion; }; | |
return ieVersion; | |
}; | |
(function () { | |
// Keyword lists for various languages. | |
var FLOW_CONTROL_KEYWORDS = | |
"break continue do else for if return while "; | |
var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + | |
"double enum extern float goto int long register short signed sizeof " + | |
"static struct switch typedef union unsigned void volatile "; | |
var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + | |
"new operator private protected public this throw true try typeof "; | |
var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + | |
"concept concept_map const_cast constexpr decltype " + | |
"dynamic_cast explicit export friend inline late_check " + | |
"mutable namespace nullptr reinterpret_cast static_assert static_cast " + | |
"template typeid typename using virtual wchar_t where "; | |
var JAVA_KEYWORDS = COMMON_KEYWORDS + | |
"abstract boolean byte extends final finally implements import " + | |
"instanceof null native package strictfp super synchronized throws " + | |
"transient "; | |
var CSHARP_KEYWORDS = JAVA_KEYWORDS + | |
"as base by checked decimal delegate descending event " + | |
"fixed foreach from group implicit in interface internal into is lock " + | |
"object out override orderby params partial readonly ref sbyte sealed " + | |
"stackalloc string select uint ulong unchecked unsafe ushort var "; | |
var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + | |
"debugger eval export function get null set undefined var with " + | |
"Infinity NaN "; | |
var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + | |
"goto if import last local my next no our print package redo require " + | |
"sub undef unless until use wantarray while BEGIN END "; | |
var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + | |
"elif except exec finally from global import in is lambda " + | |
"nonlocal not or pass print raise try with yield " + | |
"False True None "; | |
var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + | |
" defined elsif end ensure false in module next nil not or redo rescue " + | |
"retry self super then true undef unless until when yield BEGIN END "; | |
var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + | |
"function in local set then until "; | |
var ALL_KEYWORDS = ( | |
CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + | |
PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); | |
// token style names. correspond to css classes | |
/** token style for a string literal */ | |
var PR_STRING = 'str'; | |
/** token style for a keyword */ | |
var PR_KEYWORD = 'kwd'; | |
/** token style for a comment */ | |
var PR_COMMENT = 'com'; | |
/** token style for a type */ | |
var PR_TYPE = 'typ'; | |
/** token style for a literal value. e.g. 1, null, true. */ | |
var PR_LITERAL = 'lit'; | |
/** token style for a punctuation string. */ | |
var PR_PUNCTUATION = 'pun'; | |
/** token style for a punctuation string. */ | |
var PR_PLAIN = 'pln'; | |
/** token style for an sgml tag. */ | |
var PR_TAG = 'tag'; | |
/** token style for a markup declaration such as a DOCTYPE. */ | |
var PR_DECLARATION = 'dec'; | |
/** token style for embedded source. */ | |
var PR_SOURCE = 'src'; | |
/** token style for an sgml attribute name. */ | |
var PR_ATTRIB_NAME = 'atn'; | |
/** token style for an sgml attribute value. */ | |
var PR_ATTRIB_VALUE = 'atv'; | |
/** | |
* A class that indicates a section of markup that is not code, e.g. to allow | |
* embedding of line numbers within code listings. | |
*/ | |
var PR_NOCODE = 'nocode'; | |
/** A set of tokens that can precede a regular expression literal in | |
* javascript. | |
* http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full | |
* list, but I've removed ones that might be problematic when seen in | |
* languages that don't support regular expression literals. | |
* | |
* <p>Specifically, I've removed any keywords that can't precede a regexp | |
* literal in a syntactically legal javascript program, and I've removed the | |
* "in" keyword since it's not a keyword in many languages, and might be used | |
* as a count of inches. | |
* | |
* <p>The link a above does not accurately describe EcmaScript rules since | |
* it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works | |
* very well in practice. | |
* | |
* @private | |
*/ | |
var REGEXP_PRECEDER_PATTERN = function () { | |
var preceders = [ | |
"!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", | |
"&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", | |
"->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", | |
"<", "<<", "<<=", "<=", "=", "==", "===", ">", | |
">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", | |
"^", "^=", "^^", "^^=", "{", "|", "|=", "||", | |
"||=", "~" /* handles =~ and !~ */, | |
"break", "case", "continue", "delete", | |
"do", "else", "finally", "instanceof", | |
"return", "throw", "try", "typeof" | |
]; | |
var pattern = '(?:^^|[+-]'; | |
for (var i = 0; i < preceders.length; ++i) { | |
pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); | |
} | |
pattern += ')\\s*'; // matches at end, and matches empty string | |
return pattern; | |
// CAVEAT: this does not properly handle the case where a regular | |
// expression immediately follows another since a regular expression may | |
// have flags for case-sensitivity and the like. Having regexp tokens | |
// adjacent is not valid in any language I'm aware of, so I'm punting. | |
// TODO: maybe style special characters inside a regexp as punctuation. | |
}(); | |
// Define regexps here so that the interpreter doesn't have to create an | |
// object each time the function containing them is called. | |
// The language spec requires a new object created even if you don't access | |
// the $1 members. | |
var pr_amp = /&/g; | |
var pr_lt = /</g; | |
var pr_gt = />/g; | |
var pr_quot = /\"/g; | |
/** like textToHtml but escapes double quotes to be attribute safe. */ | |
function attribToHtml(str) { | |
return str.replace(pr_amp, '&') | |
.replace(pr_lt, '<') | |
.replace(pr_gt, '>') | |
.replace(pr_quot, '"'); | |
} | |
/** escapest html special characters to html. */ | |
function textToHtml(str) { | |
return str.replace(pr_amp, '&') | |
.replace(pr_lt, '<') | |
.replace(pr_gt, '>'); | |
} | |
var pr_ltEnt = /</g; | |
var pr_gtEnt = />/g; | |
var pr_aposEnt = /'/g; | |
var pr_quotEnt = /"/g; | |
var pr_ampEnt = /&/g; | |
var pr_nbspEnt = / /g; | |
/** unescapes html to plain text. */ | |
function htmlToText(html) { | |
var pos = html.indexOf('&'); | |
if (pos < 0) { return html; } | |
// Handle numeric entities specially. We can't use functional substitution | |
// since that doesn't work in older versions of Safari. | |
// These should be rare since most browsers convert them to normal chars. | |
for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { | |
var end = html.indexOf(';', pos); | |
if (end >= 0) { | |
var num = html.substring(pos + 3, end); | |
var radix = 10; | |
if (num && num.charAt(0) === 'x') { | |
num = num.substring(1); | |
radix = 16; | |
} | |
var codePoint = parseInt(num, radix); | |
if (!isNaN(codePoint)) { | |
html = (html.substring(0, pos) + String.fromCharCode(codePoint) + | |
html.substring(end + 1)); | |
} | |
} | |
} | |
return html.replace(pr_ltEnt, '<') | |
.replace(pr_gtEnt, '>') | |
.replace(pr_aposEnt, "'") | |
.replace(pr_quotEnt, '"') | |
.replace(pr_nbspEnt, ' ') | |
.replace(pr_ampEnt, '&'); | |
} | |
/** is the given node's innerHTML normally unescaped? */ | |
function isRawContent(node) { | |
return 'XMP' === node.tagName; | |
} | |
var newlineRe = /[\r\n]/g; | |
/** | |
* Are newlines and adjacent spaces significant in the given node's innerHTML? | |
*/ | |
function isPreformatted(node, content) { | |
// PRE means preformatted, and is a very common case, so don't create | |
// unnecessary computed style objects. | |
if ('PRE' === node.tagName) { return true; } | |
if (!newlineRe.test(content)) { return true; } // Don't care | |
var whitespace = ''; | |
// For disconnected nodes, IE has no currentStyle. | |
if (node.currentStyle) { | |
whitespace = node.currentStyle.whiteSpace; | |
} else if (window.getComputedStyle) { | |
// Firefox makes a best guess if node is disconnected whereas Safari | |
// returns the empty string. | |
whitespace = window.getComputedStyle(node, null).whiteSpace; | |
} | |
return !whitespace || whitespace === 'pre'; | |
} | |
function normalizedHtml(node, out, opt_sortAttrs) { | |
switch (node.nodeType) { | |
case 1: // an element | |
var name = node.tagName.toLowerCase(); | |
out.push('<', name); | |
var attrs = node.attributes; | |
var n = attrs.length; | |
if (n) { | |
if (opt_sortAttrs) { | |
var sortedAttrs = []; | |
for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } | |
sortedAttrs.sort(function (a, b) { | |
return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; | |
}); | |
attrs = sortedAttrs; | |
} | |
for (var i = 0; i < n; ++i) { | |
var attr = attrs[i]; | |
if (!attr.specified) { continue; } | |
out.push(' ', attr.name.toLowerCase(), | |
'="', attribToHtml(attr.value), '"'); | |
} | |
} | |
out.push('>'); | |
for (var child = node.firstChild; child; child = child.nextSibling) { | |
normalizedHtml(child, out, opt_sortAttrs); | |
} | |
if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { | |
out.push('<\/', name, '>'); | |
} | |
break; | |
case 3: case 4: // text | |
out.push(textToHtml(node.nodeValue)); | |
break; | |
} | |
} | |
/** | |
* Given a group of {@link RegExp}s, returns a {@code RegExp} that globally | |
* matches the union o the sets o strings matched d by the input RegExp. | |
* Since it matches globally, if the input strings have a start-of-input | |
* anchor (/^.../), it is ignored for the purposes of unioning. | |
* @param {Array.<RegExp>} regexs non multiline, non-global regexs. | |
* @return {RegExp} a global regex. | |
*/ | |
function combinePrefixPatterns(regexs) { | |
var capturedGroupIndex = 0; | |
var needToFoldCase = false; | |
var ignoreCase = false; | |
for (var i = 0, n = regexs.length; i < n; ++i) { | |
var regex = regexs[i]; | |
if (regex.ignoreCase) { | |
ignoreCase = true; | |
} else if (/[a-z]/i.test(regex.source.replace( | |
/\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { | |
needToFoldCase = true; | |
ignoreCase = false; | |
break; | |
} | |
} | |
function decodeEscape(charsetPart) { | |
if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } | |
switch (charsetPart.charAt(1)) { | |
case 'b': return 8; | |
case 't': return 9; | |
case 'n': return 0xa; | |
case 'v': return 0xb; | |
case 'f': return 0xc; | |
case 'r': return 0xd; | |
case 'u': case 'x': | |
return parseInt(charsetPart.substring(2), 16) | |
|| charsetPart.charCodeAt(1); | |
case '0': case '1': case '2': case '3': case '4': | |
case '5': case '6': case '7': | |
return parseInt(charsetPart.substring(1), 8); | |
default: return charsetPart.charCodeAt(1); | |
} | |
} | |
function encodeEscape(charCode) { | |
if (charCode < 0x20) { | |
return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); | |
} | |
var ch = String.fromCharCode(charCode); | |
if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { | |
ch = '\\' + ch; | |
} | |
return ch; | |
} | |
function caseFoldCharset(charSet) { | |
var charsetParts = charSet.substring(1, charSet.length - 1).match( | |
new RegExp( | |
'\\\\u[0-9A-Fa-f]{4}' | |
+ '|\\\\x[0-9A-Fa-f]{2}' | |
+ '|\\\\[0-3][0-7]{0,2}' | |
+ '|\\\\[0-7]{1,2}' | |
+ '|\\\\[\\s\\S]' | |
+ '|-' | |
+ '|[^-\\\\]', | |
'g')); | |
var groups = []; | |
var ranges = []; | |
var inverse = charsetParts[0] === '^'; | |
for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { | |
var p = charsetParts[i]; | |
switch (p) { | |
case '\\B': case '\\b': | |
case '\\D': case '\\d': | |
case '\\S': case '\\s': | |
case '\\W': case '\\w': | |
groups.push(p); | |
continue; | |
} | |
var start = decodeEscape(p); | |
var end; | |
if (i + 2 < n && '-' === charsetParts[i + 1]) { | |
end = decodeEscape(charsetParts[i + 2]); | |
i += 2; | |
} else { | |
end = start; | |
} | |
ranges.push([start, end]); | |
// If the range might intersect letters, then expand it. | |
if (!(end < 65 || start > 122)) { | |
if (!(end < 65 || start > 90)) { | |
ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); | |
} | |
if (!(end < 97 || start > 122)) { | |
ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); | |
} | |
} | |
} | |
// [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] | |
// -> [[1, 12], [14, 14], [16, 17]] | |
ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); | |
var consolidatedRanges = []; | |
var lastRange = [NaN, NaN]; | |
for (var i = 0; i < ranges.length; ++i) { | |
var range = ranges[i]; | |
if (range[0] <= lastRange[1] + 1) { | |
lastRange[1] = Math.max(lastRange[1], range[1]); | |
} else { | |
consolidatedRanges.push(lastRange = range); | |
} | |
} | |
var out = ['[']; | |
if (inverse) { out.push('^'); } | |
out.push.apply(out, groups); | |
for (var i = 0; i < consolidatedRanges.length; ++i) { | |
var range = consolidatedRanges[i]; | |
out.push(encodeEscape(range[0])); | |
if (range[1] > range[0]) { | |
if (range[1] + 1 > range[0]) { out.push('-'); } | |
out.push(encodeEscape(range[1])); | |
} | |
} | |
out.push(']'); | |
return out.join(''); | |
} | |
function allowAnywhereFoldCaseAndRenumberGroups(regex) { | |
// Split into character sets, escape sequences, punctuation strings | |
// like ('(', '(?:', ')', '^'), and runs of characters that do not | |
// include any of the above. | |
var parts = regex.source.match( | |
new RegExp( | |
'(?:' | |
+ '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set | |
+ '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape | |
+ '|\\\\x[A-Fa-f0-9]{2}' // a hex escape | |
+ '|\\\\[0-9]+' // a back-reference or octal escape | |
+ '|\\\\[^ux0-9]' // other escape sequence | |
+ '|\\(\\?[:!=]' // start of a non-capturing group | |
+ '|[\\(\\)\\^]' // start/emd of a group, or line start | |
+ '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters | |
+ ')', | |
'g')); | |
var n = parts.length; | |
// Maps captured group numbers to the number they will occupy in | |
// the output or to -1 if that has not been determined, or to | |
// undefined if they need not be capturing in the output. | |
var capturedGroups = []; | |
// Walk over and identify back references to build the capturedGroups | |
// mapping. | |
for (var i = 0, groupIndex = 0; i < n; ++i) { | |
var p = parts[i]; | |
if (p === '(') { | |
// groups are 1-indexed, so max group index is count of '(' | |
++groupIndex; | |
} else if ('\\' === p.charAt(0)) { | |
var decimalValue = +p.substring(1); | |
if (decimalValue && decimalValue <= groupIndex) { | |
capturedGroups[decimalValue] = -1; | |
} | |
} | |
} | |
// Renumber groups and reduce capturing groups to non-capturing groups | |
// where possible. | |
for (var i = 1; i < capturedGroups.length; ++i) { | |
if (-1 === capturedGroups[i]) { | |
capturedGroups[i] = ++capturedGroupIndex; | |
} | |
} | |
for (var i = 0, groupIndex = 0; i < n; ++i) { | |
var p = parts[i]; | |
if (p === '(') { | |
++groupIndex; | |
if (capturedGroups[groupIndex] === undefined) { | |
parts[i] = '(?:'; | |
} | |
} else if ('\\' === p.charAt(0)) { | |
var decimalValue = +p.substring(1); | |
if (decimalValue && decimalValue <= groupIndex) { | |
parts[i] = '\\' + capturedGroups[groupIndex]; | |
} | |
} | |
} | |
// Remove any prefix anchors so that the output will match anywhere. | |
// ^^ really does mean an anchored match though. | |
for (var i = 0, groupIndex = 0; i < n; ++i) { | |
if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } | |
} | |
// Expand letters to groupts to handle mixing of case-sensitive and | |
// case-insensitive patterns if necessary. | |
if (regex.ignoreCase && needToFoldCase) { | |
for (var i = 0; i < n; ++i) { | |
var p = parts[i]; | |
var ch0 = p.charAt(0); | |
if (p.length >= 2 && ch0 === '[') { | |
parts[i] = caseFoldCharset(p); | |
} else if (ch0 !== '\\') { | |
// TODO: handle letters in numeric escapes. | |
parts[i] = p.replace( | |
/[a-zA-Z]/g, | |
function (ch) { | |
var cc = ch.charCodeAt(0); | |
return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; | |
}); | |
} | |
} | |
} | |
return parts.join(''); | |
} | |
var rewritten = []; | |
for (var i = 0, n = regexs.length; i < n; ++i) { | |
var regex = regexs[i]; | |
if (regex.global || regex.multiline) { throw new Error('' + regex); } | |
rewritten.push( | |
'(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); | |
} | |
return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); | |
} | |
var PR_innerHtmlWorks = null; | |
function getInnerHtml(node) { | |
// inner html is hopelessly broken in Safari 2.0.4 when the content is | |
// an html description of well formed XML and the containing tag is a PRE | |
// tag, so we detect that case and emulate innerHTML. | |
if (null === PR_innerHtmlWorks) { | |
var testNode = document.createElement('PRE'); | |
testNode.appendChild( | |
document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); | |
PR_innerHtmlWorks = !/</.test(testNode.innerHTML); | |
} | |
if (PR_innerHtmlWorks) { | |
var content = node.innerHTML; | |
// XMP tags contain unescaped entities so require special handling. | |
if (isRawContent(node)) { | |
content = textToHtml(content); | |
} else if (!isPreformatted(node, content)) { | |
content = content.replace(/(<br\s*\/?>)[\r\n]+/g, '$1') | |
.replace(/(?:[\r\n]+[ \t]*)+/g, ' '); | |
} | |
return content; | |
} | |
var out = []; | |
for (var child = node.firstChild; child; child = child.nextSibling) { | |
normalizedHtml(child, out); | |
} | |
return out.join(''); | |
} | |
/** returns a function that expand tabs to spaces. This function can be fed | |
* successive chunks of text, and will maintain its own internal state to | |
* keep track of how tabs are expanded. | |
* @return {function (string) : string} a function that takes | |
* plain text and return the text with tabs expanded. | |
* @private | |
*/ | |
function makeTabExpander(tabWidth) { | |
var SPACES = ' '; | |
var charInLine = 0; | |
return function (plainText) { | |
// walk over each character looking for tabs and newlines. | |
// On tabs, expand them. On newlines, reset charInLine. | |
// Otherwise increment charInLine | |
var out = null; | |
var pos = 0; | |
for (var i = 0, n = plainText.length; i < n; ++i) { | |
var ch = plainText.charAt(i); | |
switch (ch) { | |
case '\t': | |
if (!out) { out = []; } | |
out.push(plainText.substring(pos, i)); | |
// calculate how much space we need in front of this part | |
// nSpaces is the amount of padding -- the number of spaces needed | |
// to move us to the next column, where columns occur at factors of | |
// tabWidth. | |
var nSpaces = tabWidth - (charInLine % tabWidth); | |
charInLine += nSpaces; | |
for (; nSpaces >= 0; nSpaces -= SPACES.length) { | |
out.push(SPACES.substring(0, nSpaces)); | |
} | |
pos = i + 1; | |
break; | |
case '\n': | |
charInLine = 0; | |
break; | |
default: | |
++charInLine; | |
} | |
} | |
if (!out) { return plainText; } | |
out.push(plainText.substring(pos)); | |
return out.join(''); | |
}; | |
} | |
var pr_chunkPattern = new RegExp( | |
'[^<]+' // A run of characters other than '<' | |
+ '|<\!--[\\s\\S]*?--\>' // an HTML comment | |
+ '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' // a CDATA section | |
// a probable tag that should not be highlighted | |
+ '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' | |
+ '|<', // A '<' that does not begin a larger chunk | |
'g'); | |
var pr_commentPrefix = /^<\!--/; | |
var pr_cdataPrefix = /^<!\[CDATA\[/; | |
var pr_brPrefix = /^<br\b/i; | |
var pr_tagNameRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)/; | |
/** split markup into chunks of html tags (style null) and | |
* plain text (style {@link #PR_PLAIN}), converting tags which are | |
* significant for tokenization (<br>) into their textual equivalent. | |
* | |
* @param {string} s html where whitespace is considered significant. | |
* @return {Object} source code and extracted tags. | |
* @private | |
*/ | |
function extractTags(s) { | |
// since the pattern has the 'g' modifier and defines no capturing groups, | |
// this will return a list of all chunks which we then classify and wrap as | |
// PR_Tokens | |
var matches = s.match(pr_chunkPattern); | |
var sourceBuf = []; | |
var sourceBufLen = 0; | |
var extractedTags = []; | |
if (matches) { | |
for (var i = 0, n = matches.length; i < n; ++i) { | |
var match = matches[i]; | |
if (match.length > 1 && match.charAt(0) === '<') { | |
if (pr_commentPrefix.test(match)) { continue; } | |
if (pr_cdataPrefix.test(match)) { | |
// strip CDATA prefix and suffix. Don't unescape since it's CDATA | |
sourceBuf.push(match.substring(9, match.length - 3)); | |
sourceBufLen += match.length - 12; | |
} else if (pr_brPrefix.test(match)) { | |
// <br> tags are lexically significant so convert them to text. | |
// This is undone later. | |
sourceBuf.push('\n'); | |
++sourceBufLen; | |
} else { | |
if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { | |
// A <span class="nocode"> will start a section that should be | |
// ignored. Continue walking the list until we see a matching end | |
// tag. | |
var name = match.match(pr_tagNameRe)[2]; | |
var depth = 1; | |
var j; | |
end_tag_loop: | |
for (j = i + 1; j < n; ++j) { | |
var name2 = matches[j].match(pr_tagNameRe); | |
if (name2 && name2[2] === name) { | |
if (name2[1] === '/') { | |
if (--depth === 0) { break end_tag_loop; } | |
} else { | |
++depth; | |
} | |
} | |
} | |
if (j < n) { | |
extractedTags.push( | |
sourceBufLen, matches.slice(i, j + 1).join('')); | |
i = j; | |
} else { // Ignore unclosed sections. | |
extractedTags.push(sourceBufLen, match); | |
} | |
} else { | |
extractedTags.push(sourceBufLen, match); | |
} | |
} | |
} else { | |
var literalText = htmlToText(match); | |
sourceBuf.push(literalText); | |
sourceBufLen += literalText.length; | |
} | |
} | |
} | |
return { source: sourceBuf.join(''), tags: extractedTags }; | |
} | |
/** True if the given tag contains a class attribute with the nocode class. */ | |
function isNoCodeTag(tag) { | |
return !!tag | |
// First canonicalize the representation of attributes | |
.replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, | |
' $1="$2$3$4"') | |
// Then look for the attribute we want. | |
.match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); | |
} | |
/** | |
* Apply the given language handler to sourceCode and add the resulting | |
* decorations to out. | |
* @param {number} basePos the index of sourceCode within the chunk of source | |
* whose decorations are already present on out. | |
*/ | |
function appendDecorations(basePos, sourceCode, langHandler, out) { | |
if (!sourceCode) { return; } | |
var job = { | |
source: sourceCode, | |
basePos: basePos | |
}; | |
langHandler(job); | |
out.push.apply(out, job.decorations); | |
} | |
/** Given triples of [style, pattern, context] returns a lexing function, | |
* The lexing function interprets the patterns to find token boundaries and | |
* returns a decoration list of the form | |
* [index_0, style_0, index_1, style_1, ..., index_n, style_n] | |
* where index_n is an index into the sourceCode, and style_n is a style | |
* constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to | |
* all characters in sourceCode[index_n-1:index_n]. | |
* | |
* The stylePatterns is a list whose elements have the form | |
* [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. | |
* | |
* Style is a style constant like PR_PLAIN, or can be a string of the | |
* form 'lang-FOO', where FOO is a language extension describing the | |
* language of the portion of the token in $1 after pattern executes. | |
* E.g., if style is 'lang-lisp', and group 1 contains the text | |
* '(hello (world))', then that portion of the token will be passed to the | |
* registered lisp handler for formatting. | |
* The text before and after group 1 will be restyled using this decorator | |
* so decorators should take care that this doesn't result in infinite | |
* recursion. For example, the HTML lexer rule for SCRIPT elements looks | |
* something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match | |
* '<script>foo()<\/script>', which would cause the current decorator to | |
* be called with '<script>' which would not match the same rule since | |
* group 1 must not be empty, so it would be instead styled as PR_TAG by | |
* the generic tag rule. The handler registered for the 'js' extension would | |
* then be called with 'foo()', and finally, the current decorator would | |
* be called with '<\/script>' which would not match the original rule and | |
* so the generic tag rule would identify it as a tag. | |
* | |
* Pattern must only match prefixes, and if it matches a prefix, then that | |
* match is considered a token with the same style. | |
* | |
* Context is applied to the last non-whitespace, non-comment token | |
* recognized. | |
* | |
* Shortcut is an optional string of characters, any of which, if the first | |
* character, gurantee that this pattern and only this pattern matches. | |
* | |
* @param {Array} shortcutStylePatterns patterns that always start with | |
* a known character. Must have a shortcut string. | |
* @param {Array} fallthroughStylePatterns patterns that will be tried in | |
* order if the shortcut ones fail. May have shortcuts. | |
* | |
* @return {function (Object)} a | |
* function that takes source code and returns a list of decorations. | |
*/ | |
function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { | |
var shortcuts = {}; | |
var tokenizer; | |
(function () { | |
var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); | |
var allRegexs = []; | |
var regexKeys = {}; | |
for (var i = 0, n = allPatterns.length; i < n; ++i) { | |
var patternParts = allPatterns[i]; | |
var shortcutChars = patternParts[3]; | |
if (shortcutChars) { | |
for (var c = shortcutChars.length; --c >= 0;) { | |
shortcuts[shortcutChars.charAt(c)] = patternParts; | |
} | |
} | |
var regex = patternParts[1]; | |
var k = '' + regex; | |
if (!regexKeys.hasOwnProperty(k)) { | |
allRegexs.push(regex); | |
regexKeys[k] = null; | |
} | |
} | |
allRegexs.push(/[\0-\uffff]/); | |
tokenizer = combinePrefixPatterns(allRegexs); | |
})(); | |
var nPatterns = fallthroughStylePatterns.length; | |
var notWs = /\S/; | |
/** | |
* Lexes job.source and produces an output array job.decorations of style | |
* classes preceded by the position at which they start in job.source in | |
* order. | |
* | |
* @param {Object} job an object like {@code | |
* source: {string} sourceText plain text, | |
* basePos: {int} position of job.source in the larger chunk of | |
* sourceCode. | |
* } | |
*/ | |
var decorate = function (job) { | |
var sourceCode = job.source, basePos = job.basePos; | |
/** Even entries are positions in source in ascending order. Odd enties | |
* are style markers (e.g., PR_COMMENT) that run from that position until | |
* the end. | |
* @type {Array.<number|string>} | |
*/ | |
var decorations = [basePos, PR_PLAIN]; | |
var pos = 0; // index into sourceCode | |
var tokens = sourceCode.match(tokenizer) || []; | |
var styleCache = {}; | |
for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { | |
var token = tokens[ti]; | |
var style = styleCache[token]; | |
var match = void 0; | |
var isEmbedded; | |
if (typeof style === 'string') { | |
isEmbedded = false; | |
} else { | |
var patternParts = shortcuts[token.charAt(0)]; | |
if (patternParts) { | |
match = token.match(patternParts[1]); | |
style = patternParts[0]; | |
} else { | |
for (var i = 0; i < nPatterns; ++i) { | |
patternParts = fallthroughStylePatterns[i]; | |
match = token.match(patternParts[1]); | |
if (match) { | |
style = patternParts[0]; | |
break; | |
} | |
} | |
if (!match) { // make sure that we make progress | |
style = PR_PLAIN; | |
} | |
} | |
isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); | |
if (isEmbedded && !(match && typeof match[1] === 'string')) { | |
isEmbedded = false; | |
style = PR_SOURCE; | |
} | |
if (!isEmbedded) { styleCache[token] = style; } | |
} | |
var tokenStart = pos; | |
pos += token.length; | |
if (!isEmbedded) { | |
decorations.push(basePos + tokenStart, style); | |
} else { // Treat group 1 as an embedded block of source code. | |
var embeddedSource = match[1]; | |
var embeddedSourceStart = token.indexOf(embeddedSource); | |
var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; | |
if (match[2]) { | |
// If embeddedSource can be blank, then it would match at the | |
// beginning which would cause us to infinitely recurse on the | |
// entire token, so we catch the right context in match[2]. | |
embeddedSourceEnd = token.length - match[2].length; | |
embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; | |
} | |
var lang = style.substring(5); | |
// Decorate the left of the embedded source | |
appendDecorations( | |
basePos + tokenStart, | |
token.substring(0, embeddedSourceStart), | |
decorate, decorations); | |
// Decorate the embedded source | |
appendDecorations( | |
basePos + tokenStart + embeddedSourceStart, | |
embeddedSource, | |
langHandlerForExtension(lang, embeddedSource), | |
decorations); | |
// Decorate the right of the embedded section | |
appendDecorations( | |
basePos + tokenStart + embeddedSourceEnd, | |
token.substring(embeddedSourceEnd), | |
decorate, decorations); | |
} | |
} | |
job.decorations = decorations; | |
}; | |
return decorate; | |
} | |
/** returns a function that produces a list of decorations from source text. | |
* | |
* This code treats ", ', and ` as string delimiters, and \ as a string | |
* escape. It does not recognize perl's qq() style strings. | |
* It has no special handling for double delimiter escapes as in basic, or | |
* the tripled delimiters used in python, but should work on those regardless | |
* although in those cases a single string literal may be broken up into | |
* multiple adjacent string literals. | |
* | |
* It recognizes C, C++, and shell style comments. | |
* | |
* @param {Object} options a set of optional parameters. | |
* @return {function (Object)} a function that examines the source code | |
* in the input job and builds the decoration list. | |
*/ | |
function sourceDecorator(options) { | |
var shortcutStylePatterns = [], fallthroughStylePatterns = []; | |
if (options['tripleQuotedStrings']) { | |
// '''multi-line-string''', 'single-line-string', and double-quoted | |
shortcutStylePatterns.push( | |
[PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, | |
null, '\'"']); | |
} else if (options['multiLineStrings']) { | |
// 'multi-line-string', "multi-line-string" | |
shortcutStylePatterns.push( | |
[PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, | |
null, '\'"`']); | |
} else { | |
// 'single-line-string', "single-line-string" | |
shortcutStylePatterns.push( | |
[PR_STRING, | |
/^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, | |
null, '"\'']); | |
} | |
if (options['verbatimStrings']) { | |
// verbatim-string-literal production from the C# grammar. See issue 93. | |
fallthroughStylePatterns.push( | |
[PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); | |
} | |
if (options['hashComments']) { | |
if (options['cStyleComments']) { | |
// Stop C preprocessor declarations at an unclosed open comment | |
shortcutStylePatterns.push( | |
[PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, | |
null, '#']); | |
fallthroughStylePatterns.push( | |
[PR_STRING, | |
/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, | |
null]); | |
} else { | |
shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); | |
} | |
} | |
if (options['cStyleComments']) { | |
fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); | |
fallthroughStylePatterns.push( | |
[PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); | |
} | |
if (options['regexLiterals']) { | |
var REGEX_LITERAL = ( | |
// A regular expression literal starts with a slash that is | |
// not followed by * or / so that it is not confused with | |
// comments. | |
'/(?=[^/*])' | |
// and then contains any number of raw characters, | |
+ '(?:[^/\\x5B\\x5C]' | |
// escape sequences (\x5C), | |
+ '|\\x5C[\\s\\S]' | |
// or non-nesting character sets (\x5B\x5D); | |
+ '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' | |
// finally closed by a /. | |
+ '/'); | |
fallthroughStylePatterns.push( | |
['lang-regex', | |
new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') | |
]); | |
} | |
var keywords = options['keywords'].replace(/^\s+|\s+$/g, ''); | |
if (keywords.length) { | |
fallthroughStylePatterns.push( | |
[PR_KEYWORD, | |
new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]); | |
} | |
shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); | |
fallthroughStylePatterns.push( | |
// TODO(mikesamuel): recognize non-latin letters and numerals in idents | |
[PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], | |
[PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null], | |
[PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], | |
[PR_LITERAL, | |
new RegExp( | |
'^(?:' | |
// A hex number | |
+ '0x[a-f0-9]+' | |
// or an octal or decimal number, | |
+ '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' | |
// possibly in scientific notation | |
+ '(?:e[+\\-]?\\d+)?' | |
+ ')' | |
// with an optional modifier like UL for unsigned long | |
+ '[a-z]*', 'i'), | |
null, '0123456789'], | |
[PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#]*/, null]); | |
return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); | |
} | |
var decorateSource = sourceDecorator({ | |
'keywords': ALL_KEYWORDS, | |
'hashComments': true, | |
'cStyleComments': true, | |
'multiLineStrings': true, | |
'regexLiterals': true | |
}); | |
/** Breaks {@code job.source} around style boundaries in | |
* {@code job.decorations} while re-interleaving {@code job.extractedTags}, | |
* and leaves the result in {@code job.prettyPrintedHtml}. | |
* @param {Object} job like { | |
* source: {string} source as plain text, | |
* extractedTags: {Array.<number|string>} extractedTags chunks of raw | |
* html preceded by their position in {@code job.source} | |
* in order | |
* decorations: {Array.<number|string} an array of style classes preceded | |
* by the position at which they start in job.source in order | |
* } | |
* @private | |
*/ | |
function recombineTagsAndDecorations(job) { | |
var sourceText = job.source; | |
var extractedTags = job.extractedTags; | |
var decorations = job.decorations; | |
var html = []; | |
// index past the last char in sourceText written to html | |
var outputIdx = 0; | |
var openDecoration = null; | |
var currentDecoration = null; | |
var tagPos = 0; // index into extractedTags | |
var decPos = 0; // index into decorations | |
var tabExpander = makeTabExpander(window['PR_TAB_WIDTH']); | |
var adjacentSpaceRe = /([\r\n ]) /g; | |
var startOrSpaceRe = /(^| ) /gm; | |
var newlineRe = /\r\n?|\n/g; | |
var trailingSpaceRe = /[ \r\n]$/; | |
var lastWasSpace = true; // the last text chunk emitted ended with a space. | |
// See bug 71 and http://stackoverflow.com/questions/136443/why-doesnt-ie7- | |
var isIE678 = window['_pr_isIE6'](); | |
var lineBreakHtml = ( | |
isIE678 | |
? (job.sourceNode.tagName === 'PRE' | |
// Use line feeds instead of <br>s so that copying and pasting works | |
// on IE. | |
// Doing this on other browsers breaks lots of stuff since \r\n is | |
// treated as two newlines on Firefox. | |
? (isIE678 === 6 ? ' \r\n' : | |
isIE678 === 7 ? ' <br>\r' : ' \r') | |
// IE collapses multiple adjacent <br>s into 1 line break. | |
// Prefix every newline with ' ' to prevent such behavior. | |
// is the same as   but works in XML as well as HTML. | |
: ' <br />') | |
: '<br />'); | |
// Look for a class like linenums or linenums:<n> where <n> is the 1-indexed | |
// number of the first line. | |
var numberLines = job.sourceNode.className.match(/\blinenums\b(?::(\d+))?/); | |
var lineBreaker; | |
if (numberLines) { | |
var lineBreaks = []; | |
for (var i = 0; i < 10; ++i) { | |
lineBreaks[i] = lineBreakHtml + '</li><li class="L' + i + '">'; | |
} | |
var lineNum = numberLines[1] && numberLines[1].length | |
? numberLines[1] - 1 : 0; // Lines are 1-indexed | |
html.push('<ol class="linenums"><li class="L', (lineNum) % 10, '"'); | |
if (lineNum) { | |
html.push(' value="', lineNum + 1, '"'); | |
} | |
html.push('>'); | |
lineBreaker = function () { | |
var lb = lineBreaks[++lineNum % 10]; | |
// If a decoration is open, we need to close it before closing a list-item | |
// and reopen it on the other side of the list item. | |
return openDecoration | |
? ('</span>' + lb + '<span class="' + openDecoration + '">') : lb; | |
}; | |
} else { | |
lineBreaker = lineBreakHtml; | |
} | |
// A helper function that is responsible for opening sections of decoration | |
// and outputing properly escaped chunks of source | |
function emitTextUpTo(sourceIdx) { | |
if (sourceIdx > outputIdx) { | |
if (openDecoration && openDecoration !== currentDecoration) { | |
// Close the current decoration | |
html.push('</span>'); | |
openDecoration = null; | |
} | |
if (!openDecoration && currentDecoration) { | |
openDecoration = currentDecoration; | |
html.push('<span class="', openDecoration, '">'); | |
} | |
// This interacts badly with some wikis which introduces paragraph tags | |
// into pre blocks for some strange reason. | |
// It's necessary for IE though which seems to lose the preformattedness | |
// of <pre> tags when their innerHTML is assigned. | |
// http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html | |
// and it serves to undo the conversion of <br>s to newlines done in | |
// chunkify. | |
var htmlChunk = textToHtml( | |
tabExpander(sourceText.substring(outputIdx, sourceIdx))) | |
.replace(lastWasSpace | |
? startOrSpaceRe | |
: adjacentSpaceRe, '$1 '); | |
// Keep track of whether we need to escape space at the beginning of the | |
// next chunk. | |
lastWasSpace = trailingSpaceRe.test(htmlChunk); | |
html.push(htmlChunk.replace(newlineRe, lineBreaker)); | |
outputIdx = sourceIdx; | |
} | |
} | |
while (true) { | |
// Determine if we're going to consume a tag this time around. Otherwise | |
// we consume a decoration or exit. | |
var outputTag; | |
if (tagPos < extractedTags.length) { | |
if (decPos < decorations.length) { | |
// Pick one giving preference to extractedTags since we shouldn't open | |
// a new style that we're going to have to immediately close in order | |
// to output a tag. | |
outputTag = extractedTags[tagPos] <= decorations[decPos]; | |
} else { | |
outputTag = true; | |
} | |
} else { | |
outputTag = false; | |
} | |
// Consume either a decoration or a tag or exit. | |
if (outputTag) { | |
emitTextUpTo(extractedTags[tagPos]); | |
if (openDecoration) { | |
// Close the current decoration | |
html.push('</span>'); | |
openDecoration = null; | |
} | |
html.push(extractedTags[tagPos + 1]); | |
tagPos += 2; | |
} else if (decPos < decorations.length) { | |
emitTextUpTo(decorations[decPos]); | |
currentDecoration = decorations[decPos + 1]; | |
decPos += 2; | |
} else { | |
break; | |
} | |
} | |
emitTextUpTo(sourceText.length); | |
if (openDecoration) { | |
html.push('</span>'); | |
} | |
if (numberLines) { html.push('</li></ol>'); } | |
job.prettyPrintedHtml = html.join(''); | |
} | |
/** Maps language-specific file extensions to handlers. */ | |
var langHandlerRegistry = {}; | |
/** Register a language handler for the given file extensions. | |
* @param {function (Object)} handler a function from source code to a list | |
* of decorations. Takes a single argument job which describes the | |
* state of the computation. The single parameter has the form | |
* {@code { | |
* source: {string} as plain text. | |
* decorations: {Array.<number|string>} an array of style classes | |
* preceded by the position at which they start in | |
* job.source in order. | |
* The language handler should assigned this field. | |
* basePos: {int} the position of source in the larger source chunk. | |
* All positions in the output decorations array are relative | |
* to the larger source chunk. | |
* } } | |
* @param {Array.<string>} fileExtensions | |
*/ | |
function registerLangHandler(handler, fileExtensions) { | |
for (var i = fileExtensions.length; --i >= 0;) { | |
var ext = fileExtensions[i]; | |
if (!langHandlerRegistry.hasOwnProperty(ext)) { | |
langHandlerRegistry[ext] = handler; | |
} else if ('console' in window) { | |
console['warn']('cannot override language handler %s', ext); | |
} | |
} | |
} | |
function langHandlerForExtension(extension, source) { | |
if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { | |
// Treat it as markup if the first non whitespace character is a < and | |
// the last non-whitespace character is a >. | |
extension = /^\s*</.test(source) | |
? 'default-markup' | |
: 'default-code'; | |
} | |
return langHandlerRegistry[extension]; | |
} | |
registerLangHandler(decorateSource, ['default-code']); | |
registerLangHandler( | |
createSimpleLexer( | |
[], | |
[ | |
[PR_PLAIN, /^[^<?]+/], | |
[PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], | |
[PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], | |
// Unescaped content in an unknown language | |
['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], | |
['lang-', /^<%([\s\S]+?)(?:%>|$)/], | |
[PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], | |
['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], | |
// Unescaped content in javascript. (Or possibly vbscript). | |
['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], | |
// Contains unescaped stylesheet content | |
['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], | |
['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] | |
]), | |
['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); | |
registerLangHandler( | |
createSimpleLexer( | |
[ | |
[PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], | |
[PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] | |
], | |
[ | |
[PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], | |
[PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], | |
['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], | |
[PR_PUNCTUATION, /^[=<>\/]+/], | |
['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], | |
['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], | |
['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], | |
['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], | |
['lang-css', /^style\s*=\s*\'([^\']+)\'/i], | |
['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] | |
]), | |
['in.tag']); | |
registerLangHandler( | |
createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': CPP_KEYWORDS, | |
'hashComments': true, | |
'cStyleComments': true | |
}), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': 'null true false' | |
}), ['json']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': CSHARP_KEYWORDS, | |
'hashComments': true, | |
'cStyleComments': true, | |
'verbatimStrings': true | |
}), ['cs']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': JAVA_KEYWORDS, | |
'cStyleComments': true | |
}), ['java']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': SH_KEYWORDS, | |
'hashComments': true, | |
'multiLineStrings': true | |
}), ['bsh', 'csh', 'sh']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': PYTHON_KEYWORDS, | |
'hashComments': true, | |
'multiLineStrings': true, | |
'tripleQuotedStrings': true | |
}), ['cv', 'py']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': PERL_KEYWORDS, | |
'hashComments': true, | |
'multiLineStrings': true, | |
'regexLiterals': true | |
}), ['perl', 'pl', 'pm']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': RUBY_KEYWORDS, | |
'hashComments': true, | |
'multiLineStrings': true, | |
'regexLiterals': true | |
}), ['rb']); | |
registerLangHandler(sourceDecorator({ | |
'keywords': JSCRIPT_KEYWORDS, | |
'cStyleComments': true, | |
'regexLiterals': true | |
}), ['js']); | |
registerLangHandler( | |
createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); | |
function applyDecorator(job) { | |
var sourceCodeHtml = job.sourceCodeHtml; | |
var opt_langExtension = job.langExtension; | |
// Prepopulate output in case processing fails with an exception. | |
job.prettyPrintedHtml = sourceCodeHtml; | |
try { | |
// Extract tags, and convert the source code to plain text. | |
var sourceAndExtractedTags = extractTags(sourceCodeHtml); | |
/** Plain text. @type {string} */ | |
var source = sourceAndExtractedTags.source; | |
job.source = source; | |
job.basePos = 0; | |
/** Even entries are positions in source in ascending order. Odd entries | |
* are tags that were extracted at that position. | |
* @type {Array.<number|string>} | |
*/ | |
job.extractedTags = sourceAndExtractedTags.tags; | |
// Apply the appropriate language handler | |
langHandlerForExtension(opt_langExtension, source)(job); | |
// Integrate the decorations and tags back into the source code to produce | |
// a decorated html string which is left in job.prettyPrintedHtml. | |
recombineTagsAndDecorations(job); | |
} catch (e) { | |
if ('console' in window) { | |
console['log'](e && e['stack'] ? e['stack'] : e); | |
} | |
} | |
} | |
function prettyPrintOne(sourceCodeHtml, opt_langExtension) { | |
var job = { | |
sourceCodeHtml: sourceCodeHtml, | |
langExtension: opt_langExtension | |
}; | |
applyDecorator(job); | |
return job.prettyPrintedHtml; | |
} | |
function prettyPrint(opt_whenDone) { | |
function byTagName(tn) { return document.getElementsByTagName(tn); } | |
// fetch a list of nodes to rewrite | |
var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; | |
var elements = []; | |
for (var i = 0; i < codeSegments.length; ++i) { | |
for (var j = 0, n = codeSegments[i].length; j < n; ++j) { | |
elements.push(codeSegments[i][j]); | |
} | |
} | |
codeSegments = null; | |
var clock = Date; | |
if (!clock['now']) { | |
clock = { 'now': function () { return (new Date).getTime(); } }; | |
} | |
// The loop is broken into a series of continuations to make sure that we | |
// don't make the browser unresponsive when rewriting a large page. | |
var k = 0; | |
var prettyPrintingJob; | |
function doWork() { | |
var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? | |
clock.now() + 250 /* ms */ : | |
Infinity); | |
for (; k < elements.length && clock.now() < endTime; k++) { | |
var cs = elements[k]; | |
if (cs.className && cs.className.indexOf('prettyprint') >= 0) { | |
// If the classes includes a language extensions, use it. | |
// Language extensions can be specified like | |
// <pre class="prettyprint lang-cpp"> | |
// the language extension "cpp" is used to find a language handler as | |
// passed to PR_registerLangHandler. | |
var langExtension = cs.className.match(/\blang-(\w+)\b/); | |
if (langExtension) { langExtension = langExtension[1]; } | |
// make sure this is not nested in an already prettified element | |
var nested = false; | |
for (var p = cs.parentNode; p; p = p.parentNode) { | |
if ((p.tagName === 'pre' || p.tagName === 'code' || | |
p.tagName === 'xmp') && | |
p.className && p.className.indexOf('prettyprint') >= 0) { | |
nested = true; | |
break; | |
} | |
} | |
if (!nested) { | |
// fetch the content as a snippet of properly escaped HTML. | |
// Firefox adds newlines at the end. | |
var content = getInnerHtml(cs); | |
content = content.replace(/(?:\r\n?|\n)$/, ''); | |
// do the pretty printing | |
prettyPrintingJob = { | |
sourceCodeHtml: content, | |
langExtension: langExtension, | |
sourceNode: cs | |
}; | |
applyDecorator(prettyPrintingJob); | |
replaceWithPrettyPrintedHtml(); | |
} | |
} | |
} | |
if (k < elements.length) { | |
// finish up in a continuation | |
setTimeout(doWork, 250); | |
} else if (opt_whenDone) { | |
opt_whenDone(); | |
} | |
} | |
function replaceWithPrettyPrintedHtml() { | |
var newContent = prettyPrintingJob.prettyPrintedHtml; | |
if (!newContent) { return; } | |
var cs = prettyPrintingJob.sourceNode; | |
// push the prettified html back into the tag. | |
if (!isRawContent(cs)) { | |
// just replace the old html with the new | |
cs.innerHTML = newContent; | |
} else { | |
// we need to change the tag to a <pre> since <xmp>s do not allow | |
// embedded tags such as the span tags used to attach styles to | |
// sections of source code. | |
var pre = document.createElement('PRE'); | |
for (var i = 0; i < cs.attributes.length; ++i) { | |
var a = cs.attributes[i]; | |
if (a.specified) { | |
var aname = a.name.toLowerCase(); | |
if (aname === 'class') { | |
pre.className = a.value; // For IE 6 | |
} else { | |
pre.setAttribute(a.name, a.value); | |
} | |
} | |
} | |
pre.innerHTML = newContent; | |
// remove the old | |
cs.parentNode.replaceChild(pre, cs); | |
cs = pre; | |
} | |
} | |
doWork(); | |
} | |
window['PR_normalizedHtml'] = normalizedHtml; | |
window['prettyPrintOne'] = prettyPrintOne; | |
window['prettyPrint'] = prettyPrint; | |
window['PR'] = { | |
'combinePrefixPatterns': combinePrefixPatterns, | |
'createSimpleLexer': createSimpleLexer, | |
'registerLangHandler': registerLangHandler, | |
'sourceDecorator': sourceDecorator, | |
'PR_ATTRIB_NAME': PR_ATTRIB_NAME, | |
'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, | |
'PR_COMMENT': PR_COMMENT, | |
'PR_DECLARATION': PR_DECLARATION, | |
'PR_KEYWORD': PR_KEYWORD, | |
'PR_LITERAL': PR_LITERAL, | |
'PR_NOCODE': PR_NOCODE, | |
'PR_PLAIN': PR_PLAIN, | |
'PR_PUNCTUATION': PR_PUNCTUATION, | |
'PR_SOURCE': PR_SOURCE, | |
'PR_STRING': PR_STRING, | |
'PR_TAG': PR_TAG, | |
'PR_TYPE': PR_TYPE | |
}; | |
})(); |