// Copyright (C) 2006 Google Inc. | |
// | |
// Licensed under the Apache License, Version 2.0 (the "License"); | |
// you may not use this file except in compliance with the License. | |
// You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, software | |
// distributed under the License is distributed on an "AS IS" BASIS, | |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
// See the License for the specific language governing permissions and | |
// limitations under the License. | |
/** | |
* @fileoverview | |
* some functions for browser-side pretty printing of code contained in html. | |
* <p> | |
* | |
* For a fairly comprehensive set of languages see the | |
* <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> | |
* file that came with this source. At a minimum, the lexer should work on a | |
* number of languages including C and friends, Java, Python, Bash, SQL, HTML, | |
* XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk | |
* and a subset of Perl, but, because of commenting conventions, doesn't work on | |
* Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. | |
* <p> | |
* Usage: <ol> | |
* <li> include this source file in an html page via | |
* {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} | |
* <li> define style rules. See the example page for examples. | |
* <li> mark the {@code <pre>} and {@code <code>} tags in your source with | |
* {@code class=prettyprint.} | |
* You can also use the (html deprecated) {@code <xmp>} tag, but the pretty | |
* printer needs to do more substantial DOM manipulations to support that, so | |
* some css styles may not be preserved. | |
* </ol> | |
* That's it. I wanted to keep the API as simple as possible, so there's no | |
* need to specify which language the code is in, but if you wish, you can add | |
* another class to the {@code <pre>} or {@code <code>} element to specify the | |
* language, as in {@code <pre class="prettyprint lang-java">}. Any class that | |
* starts with "lang-" followed by a file extension, specifies the file type. | |
* See the "lang-*.js" files in this directory for code that implements | |
* per-language file handlers. | |
* <p> | |
* Change log:<br> | |
* cbeust, 2006/08/22 | |
* <blockquote> | |
* Java annotations (start with "@") are now captured as literals ("lit") | |
* </blockquote> | |
* @requires console | |
* @overrides window | |
*/ | |
// JSLint declarations | |
/*global console, document, navigator, setTimeout, window */ | |
/** | |
* Split {@code prettyPrint} into multiple timeouts so as not to interfere with | |
* UI events. If set to {@code false}, {@code prettyPrint()} is synchronous. | |
*/ | |
window['PR_SHOULD_USE_CONTINUATION'] = true; | |
/** the number of characters between tab columns */ | |
window['PR_TAB_WIDTH'] = 8; | |
/** | |
* Walks the DOM returning a properly escaped version of innerHTML. | |
* | |
* @param {Node} | |
* node | |
* @param {Array. | |
* <string>} out output buffer that receives chunks of HTML. | |
*/ | |
window['PR_normalizedHtml'] | |
/** | |
* Contains functions for creating and registering new language handlers. | |
* | |
* @type {Object} | |
*/ | |
= window['PR'] | |
/** | |
* Pretty print a chunk of code. | |
* | |
* @param {string} | |
* sourceCodeHtml code as html | |
* @return {string} code as html, but prettier | |
*/ | |
= window['prettyPrintOne'] | |
/** | |
* Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
* {@code class=prettyprint} and prettify them. | |
* | |
* @param {Function?} | |
* opt_whenDone if specified, called when the last entry has been | |
* finished. | |
*/ | |
= window['prettyPrint'] = void 0; | |
/** | |
* browser detection. | |
* | |
* @extern | |
* @returns false if not IE, otherwise the major version. | |
*/ | |
window['_pr_isIE6'] = function() { | |
var ieVersion = navigator && navigator.userAgent | |
&& navigator.userAgent.match(/\bMSIE ([678])\./); | |
ieVersion = ieVersion ? +ieVersion[1] : false; | |
window['_pr_isIE6'] = function() { | |
return ieVersion; | |
}; | |
return ieVersion; | |
}; | |
(function() { | |
// Keyword lists for various languages. | |
var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while "; | |
var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " | |
+ "double enum extern float goto int long register short signed sizeof " | |
+ "static struct switch typedef union unsigned void volatile "; | |
var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " | |
+ "new operator private protected public this throw true try typeof "; | |
var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " | |
+ "concept concept_map const_cast constexpr decltype " | |
+ "dynamic_cast explicit export friend inline late_check " | |
+ "mutable namespace nullptr reinterpret_cast static_assert static_cast " | |
+ "template typeid typename using virtual wchar_t where "; | |
var JAVA_KEYWORDS = COMMON_KEYWORDS | |
+ "abstract boolean byte extends final finally implements import " | |
+ "instanceof null native package strictfp super synchronized throws " | |
+ "transient "; | |
var CSHARP_KEYWORDS = JAVA_KEYWORDS | |
+ "as base by checked decimal delegate descending event " | |
+ "fixed foreach from group implicit in interface internal into is lock " | |
+ "object out override orderby params partial readonly ref sbyte sealed " | |
+ "stackalloc string select uint ulong unchecked unsafe ushort var "; | |
var JSCRIPT_KEYWORDS = COMMON_KEYWORDS | |
+ "debugger eval export function get null set undefined var with " | |
+ "Infinity NaN "; | |
var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " | |
+ "goto if import last local my next no our print package redo require " | |
+ "sub undef unless until use wantarray while BEGIN END "; | |
var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " | |
+ "elif except exec finally from global import in is lambda " | |
+ "nonlocal not or pass print raise try with yield " + "False True None "; | |
var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS | |
+ "alias and begin case class def" | |
+ " defined elsif end ensure false in module next nil not or redo rescue " | |
+ "retry self super then true undef unless until when yield BEGIN END "; | |
var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " | |
+ "function in local set then until "; | |
var ALL_KEYWORDS = (CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS | |
+ PERL_KEYWORDS + PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); | |
// token style names. correspond to css classes | |
/** token style for a string literal */ | |
var PR_STRING = 'str'; | |
/** token style for a keyword */ | |
var PR_KEYWORD = 'kwd'; | |
/** token style for a comment */ | |
var PR_COMMENT = 'com'; | |
/** token style for a type */ | |
var PR_TYPE = 'typ'; | |
/** token style for a literal value. e.g. 1, null, true. */ | |
var PR_LITERAL = 'lit'; | |
/** token style for a punctuation string. */ | |
var PR_PUNCTUATION = 'pun'; | |
/** token style for a punctuation string. */ | |
var PR_PLAIN = 'pln'; | |
/** token style for an sgml tag. */ | |
var PR_TAG = 'tag'; | |
/** token style for a markup declaration such as a DOCTYPE. */ | |
var PR_DECLARATION = 'dec'; | |
/** token style for embedded source. */ | |
var PR_SOURCE = 'src'; | |
/** token style for an sgml attribute name. */ | |
var PR_ATTRIB_NAME = 'atn'; | |
/** token style for an sgml attribute value. */ | |
var PR_ATTRIB_VALUE = 'atv'; | |
/** | |
* A class that indicates a section of markup that is not code, e.g. to allow | |
* embedding of line numbers within code listings. | |
*/ | |
var PR_NOCODE = 'nocode'; | |
/** | |
* A set of tokens that can precede a regular expression literal in | |
* javascript. http://www.mozilla.org/js/language/js20/rationale/syntax.html | |
* has the full list, but I've removed ones that might be problematic when | |
* seen in languages that don't support regular expression literals. | |
* | |
* <p> | |
* Specifically, I've removed any keywords that can't precede a regexp literal | |
* in a syntactically legal javascript program, and I've removed the "in" | |
* keyword since it's not a keyword in many languages, and might be used as a | |
* count of inches. | |
* | |
* <p> | |
* The link a above does not accurately describe EcmaScript rules since it | |
* fails to distinguish between (a=++/b/i) and (a++/b/i) but it works very | |
* well in practice. | |
* | |
* @private | |
*/ | |
var REGEXP_PRECEDER_PATTERN = function() { | |
var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", | |
"(", "*", "*=", /* "+", */"+=", ",", /* "-", */"-=", "->", /* | |
* ".", | |
* "..", | |
* "...", | |
* handled | |
* below | |
*/ | |
"/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", | |
">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", | |
"^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, | |
"break", "case", "continue", "delete", "do", "else", "finally", | |
"instanceof", "return", "throw", "try", "typeof" ]; | |
var pattern = '(?:^^|[+-]'; | |
for ( var i = 0; i < preceders.length; ++i) { | |
pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); | |
} | |
pattern += ')\\s*'; // matches at end, and matches empty string | |
return pattern; | |
// CAVEAT: this does not properly handle the case where a regular | |
// expression immediately follows another since a regular expression may | |
// have flags for case-sensitivity and the like. Having regexp tokens | |
// adjacent is not valid in any language I'm aware of, so I'm punting. | |
// TODO: maybe style special characters inside a regexp as punctuation. | |
}(); | |
// Define regexps here so that the interpreter doesn't have to create an | |
// object each time the function containing them is called. | |
// The language spec requires a new object created even if you don't access | |
// the $1 members. | |
var pr_amp = /&/g; | |
var pr_lt = /</g; | |
var pr_gt = />/g; | |
var pr_quot = /\"/g; | |
/** like textToHtml but escapes double quotes to be attribute safe. */ | |
function attribToHtml(str) { | |
return str.replace(pr_amp, '&').replace(pr_lt, '<').replace(pr_gt, | |
'>').replace(pr_quot, '"'); | |
} | |
/** escapest html special characters to html. */ | |
function textToHtml(str) { | |
return str.replace(pr_amp, '&').replace(pr_lt, '<').replace(pr_gt, | |
'>'); | |
} | |
var pr_ltEnt = /</g; | |
var pr_gtEnt = />/g; | |
var pr_aposEnt = /'/g; | |
var pr_quotEnt = /"/g; | |
var pr_ampEnt = /&/g; | |
var pr_nbspEnt = / /g; | |
/** unescapes html to plain text. */ | |
function htmlToText(html) { | |
var pos = html.indexOf('&'); | |
if (pos < 0) { | |
return html; | |
} | |
// Handle numeric entities specially. We can't use functional substitution | |
// since that doesn't work in older versions of Safari. | |
// These should be rare since most browsers convert them to normal chars. | |
for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { | |
var end = html.indexOf(';', pos); | |
if (end >= 0) { | |
var num = html.substring(pos + 3, end); | |
var radix = 10; | |
if (num && num.charAt(0) === 'x') { | |
num = num.substring(1); | |
radix = 16; | |
} | |
var codePoint = parseInt(num, radix); | |
if (!isNaN(codePoint)) { | |
html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html | |
.substring(end + 1)); | |
} | |
} | |
} | |
return html.replace(pr_ltEnt, '<').replace(pr_gtEnt, '>').replace( | |
pr_aposEnt, "'").replace(pr_quotEnt, '"').replace(pr_nbspEnt, ' ') | |
.replace(pr_ampEnt, '&'); | |
} | |
/** is the given node's innerHTML normally unescaped? */ | |
function isRawContent(node) { | |
return 'XMP' === node.tagName; | |
} | |
var newlineRe = /[\r\n]/g; | |
/** | |
* Are newlines and adjacent spaces significant in the given node's innerHTML? | |
*/ | |
function isPreformatted(node, content) { | |
// PRE means preformatted, and is a very common case, so don't create | |
// unnecessary computed style objects. | |
if ('PRE' === node.tagName) { | |
return true; | |
} | |
if (!newlineRe.test(content)) { | |
return true; | |
} // Don't care | |
var whitespace = ''; | |
// For disconnected nodes, IE has no currentStyle. | |
if (node.currentStyle) { | |
whitespace = node.currentStyle.whiteSpace; | |
} else if (window.getComputedStyle) { | |
// Firefox makes a best guess if node is disconnected whereas Safari | |
// returns the empty string. | |
whitespace = window.getComputedStyle(node, null).whiteSpace; | |
} | |
return !whitespace || whitespace === 'pre'; | |
} | |
function normalizedHtml(node, out) { | |
switch (node.nodeType) { | |
case 1: // an element | |
var name = node.tagName.toLowerCase(); | |
out.push('<', name); | |
for ( var i = 0; i < node.attributes.length; ++i) { | |
var attr = node.attributes[i]; | |
if (!attr.specified) { | |
continue; | |
} | |
out.push(' '); | |
normalizedHtml(attr, out); | |
} | |
out.push('>'); | |
for ( var child = node.firstChild; child; child = child.nextSibling) { | |
normalizedHtml(child, out); | |
} | |
if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { | |
out.push('<\/', name, '>'); | |
} | |
break; | |
case 2: // an attribute | |
out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"'); | |
break; | |
case 3: | |
case 4: // text | |
out.push(textToHtml(node.nodeValue)); | |
break; | |
} | |
} | |
/** | |
* Given a group of {@link RegExp}s, returns a {@code RegExp} that globally | |
* matches the union o the sets o strings matched d by the input RegExp. Since | |
* it matches globally, if the input strings have a start-of-input anchor | |
* (/^.../), it is ignored for the purposes of unioning. | |
* | |
* @param {Array. | |
* <RegExp>} regexs non multiline, non-global regexs. | |
* @return {RegExp} a global regex. | |
*/ | |
function combinePrefixPatterns(regexs) { | |
var capturedGroupIndex = 0; | |
var needToFoldCase = false; | |
var ignoreCase = false; | |
for ( var i = 0, n = regexs.length; i < n; ++i) { | |
var regex = regexs[i]; | |
if (regex.ignoreCase) { | |
ignoreCase = true; | |
} else if (/[a-z]/i.test(regex.source.replace( | |
/\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { | |
needToFoldCase = true; | |
ignoreCase = false; | |
break; | |
} | |
} | |
function decodeEscape(charsetPart) { | |
if (charsetPart.charAt(0) !== '\\') { | |
return charsetPart.charCodeAt(0); | |
} | |
switch (charsetPart.charAt(1)) { | |
case 'b': | |
return 8; | |
case 't': | |
return 9; | |
case 'n': | |
return 0xa; | |
case 'v': | |
return 0xb; | |
case 'f': | |
return 0xc; | |
case 'r': | |
return 0xd; | |
case 'u': | |
case 'x': | |
return parseInt(charsetPart.substring(2), 16) | |
|| charsetPart.charCodeAt(1); | |
case '0': | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
return parseInt(charsetPart.substring(1), 8); | |
default: | |
return charsetPart.charCodeAt(1); | |
} | |
} | |
function encodeEscape(charCode) { | |
if (charCode < 0x20) { | |
return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); | |
} | |
var ch = String.fromCharCode(charCode); | |
if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { | |
ch = '\\' + ch; | |
} | |
return ch; | |
} | |
function caseFoldCharset(charSet) { | |
var charsetParts = charSet.substring(1, charSet.length - 1).match( | |
new RegExp('\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' | |
+ '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' | |
+ '|-' + '|[^-\\\\]', 'g')); | |
var groups = []; | |
var ranges = []; | |
var inverse = charsetParts[0] === '^'; | |
for ( var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { | |
var p = charsetParts[i]; | |
switch (p) { | |
case '\\B': | |
case '\\b': | |
case '\\D': | |
case '\\d': | |
case '\\S': | |
case '\\s': | |
case '\\W': | |
case '\\w': | |
groups.push(p); | |
continue; | |
} | |
var start = decodeEscape(p); | |
var end; | |
if (i + 2 < n && '-' === charsetParts[i + 1]) { | |
end = decodeEscape(charsetParts[i + 2]); | |
i += 2; | |
} else { | |
end = start; | |
} | |
ranges.push([ start, end ]); | |
// If the range might intersect letters, then expand it. | |
if (!(end < 65 || start > 122)) { | |
if (!(end < 65 || start > 90)) { | |
ranges.push([ Math.max(65, start) | 32, Math.min(end, 90) | 32 ]); | |
} | |
if (!(end < 97 || start > 122)) { | |
ranges | |
.push([ Math.max(97, start) & ~32, Math.min(end, 122) & ~32 ]); | |
} | |
} | |
} | |
// [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] | |
// -> [[1, 12], [14, 14], [16, 17]] | |
ranges.sort(function(a, b) { | |
return (a[0] - b[0]) || (b[1] - a[1]); | |
}); | |
var consolidatedRanges = []; | |
var lastRange = [ NaN, NaN ]; | |
for ( var i = 0; i < ranges.length; ++i) { | |
var range = ranges[i]; | |
if (range[0] <= lastRange[1] + 1) { | |
lastRange[1] = Math.max(lastRange[1], range[1]); | |
} else { | |
consolidatedRanges.push(lastRange = range); | |
} | |
} | |
var out = [ '[' ]; | |
if (inverse) { | |
out.push('^'); | |
} | |
out.push.apply(out, groups); | |
for ( var i = 0; i < consolidatedRanges.length; ++i) { | |
var range = consolidatedRanges[i]; | |
out.push(encodeEscape(range[0])); | |
if (range[1] > range[0]) { | |
if (range[1] + 1 > range[0]) { | |
out.push('-'); | |
} | |
out.push(encodeEscape(range[1])); | |
} | |
} | |
out.push(']'); | |
return out.join(''); | |
} | |
function allowAnywhereFoldCaseAndRenumberGroups(regex) { | |
// Split into character sets, escape sequences, punctuation strings | |
// like ('(', '(?:', ')', '^'), and runs of characters that do not | |
// include any of the above. | |
var parts = regex.source.match(new RegExp('(?:' | |
+ '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set | |
+ '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape | |
+ '|\\\\x[A-Fa-f0-9]{2}' // a hex escape | |
+ '|\\\\[0-9]+' // a back-reference or octal escape | |
+ '|\\\\[^ux0-9]' // other escape sequence | |
+ '|\\(\\?[:!=]' // start of a non-capturing group | |
+ '|[\\(\\)\\^]' // start/emd of a group, or line start | |
+ '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters | |
+ ')', 'g')); | |
var n = parts.length; | |
// Maps captured group numbers to the number they will occupy in | |
// the output or to -1 if that has not been determined, or to | |
// undefined if they need not be capturing in the output. | |
var capturedGroups = []; | |
// Walk over and identify back references to build the capturedGroups | |
// mapping. | |
for ( var i = 0, groupIndex = 0; i < n; ++i) { | |
var p = parts[i]; | |
if (p === '(') { | |
// groups are 1-indexed, so max group index is count of '(' | |
++groupIndex; | |
} else if ('\\' === p.charAt(0)) { | |
var decimalValue = +p.substring(1); | |
if (decimalValue && decimalValue <= groupIndex) { | |
capturedGroups[decimalValue] = -1; | |
} | |
} | |
} | |
// Renumber groups and reduce capturing groups to non-capturing groups | |
// where possible. | |
for ( var i = 1; i < capturedGroups.length; ++i) { | |
if (-1 === capturedGroups[i]) { | |
capturedGroups[i] = ++capturedGroupIndex; | |
} | |
} | |
for ( var i = 0, groupIndex = 0; i < n; ++i) { | |
var p = parts[i]; | |
if (p === '(') { | |
++groupIndex; | |
if (capturedGroups[groupIndex] === undefined) { | |
parts[i] = '(?:'; | |
} | |
} else if ('\\' === p.charAt(0)) { | |
var decimalValue = +p.substring(1); | |
if (decimalValue && decimalValue <= groupIndex) { | |
parts[i] = '\\' + capturedGroups[groupIndex]; | |
} | |
} | |
} | |
// Remove any prefix anchors so that the output will match anywhere. | |
// ^^ really does mean an anchored match though. | |
for ( var i = 0, groupIndex = 0; i < n; ++i) { | |
if ('^' === parts[i] && '^' !== parts[i + 1]) { | |
parts[i] = ''; | |
} | |
} | |
// Expand letters to groupts to handle mixing of case-sensitive and | |
// case-insensitive patterns if necessary. | |
if (regex.ignoreCase && needToFoldCase) { | |
for ( var i = 0; i < n; ++i) { | |
var p = parts[i]; | |
var ch0 = p.charAt(0); | |
if (p.length >= 2 && ch0 === '[') { | |
parts[i] = caseFoldCharset(p); | |
} else if (ch0 !== '\\') { | |
// TODO: handle letters in numeric escapes. | |
parts[i] = p.replace(/[a-zA-Z]/g, function(ch) { | |
var cc = ch.charCodeAt(0); | |
return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; | |
}); | |
} | |
} | |
} | |
return parts.join(''); | |
} | |
var rewritten = []; | |
for ( var i = 0, n = regexs.length; i < n; ++i) { | |
var regex = regexs[i]; | |
if (regex.global || regex.multiline) { | |
throw new Error('' + regex); | |
} | |
rewritten.push('(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) | |
+ ')'); | |
} | |
return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); | |
} | |
var PR_innerHtmlWorks = null; | |
function getInnerHtml(node) { | |
// inner html is hopelessly broken in Safari 2.0.4 when the content is | |
// an html description of well formed XML and the containing tag is a PRE | |
// tag, so we detect that case and emulate innerHTML. | |
if (null === PR_innerHtmlWorks) { | |
var testNode = document.createElement('PRE'); | |
testNode.appendChild(document | |
.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); | |
PR_innerHtmlWorks = !/</.test(testNode.innerHTML); | |
} | |
if (PR_innerHtmlWorks) { | |
var content = node.innerHTML; | |
// XMP tags contain unescaped entities so require special handling. | |
if (isRawContent(node)) { | |
content = textToHtml(content); | |
} else if (!isPreformatted(node, content)) { | |
content = content.replace(/(<br\s*\/?>)[\r\n]+/g, '$1').replace( | |
/(?:[\r\n]+[ \t]*)+/g, ' '); | |
} | |
return content; | |
} | |
var out = []; | |
for ( var child = node.firstChild; child; child = child.nextSibling) { | |
normalizedHtml(child, out); | |
} | |
return out.join(''); | |
} | |
/** | |
* returns a function that expand tabs to spaces. This function can be fed | |
* successive chunks of text, and will maintain its own internal state to keep | |
* track of how tabs are expanded. | |
* | |
* @return {function (string) : string} a function that takes plain text and | |
* return the text with tabs expanded. | |
* @private | |
*/ | |
function makeTabExpander(tabWidth) { | |
var SPACES = ' '; | |
var charInLine = 0; | |
return function(plainText) { | |
// walk over each character looking for tabs and newlines. | |
// On tabs, expand them. On newlines, reset charInLine. | |
// Otherwise increment charInLine | |
var out = null; | |
var pos = 0; | |
for ( var i = 0, n = plainText.length; i < n; ++i) { | |
var ch = plainText.charAt(i); | |
switch (ch) { | |
case '\t': | |
if (!out) { | |
out = []; | |
} | |
out.push(plainText.substring(pos, i)); | |
// calculate how much space we need in front of this part | |
// nSpaces is the amount of padding -- the number of spaces needed | |
// to move us to the next column, where columns occur at factors of | |
// tabWidth. | |
var nSpaces = tabWidth - (charInLine % tabWidth); | |
charInLine += nSpaces; | |
for (; nSpaces >= 0; nSpaces -= SPACES.length) { | |
out.push(SPACES.substring(0, nSpaces)); | |
} | |
pos = i + 1; | |
break; | |
case '\n': | |
charInLine = 0; | |
break; | |
default: | |
++charInLine; | |
} | |
} | |
if (!out) { | |
return plainText; | |
} | |
out.push(plainText.substring(pos)); | |
return out.join(''); | |
}; | |
} | |
var pr_chunkPattern = new RegExp('[^<]+' // A run of characters other than | |
// '<' | |
+ '|<\!--[\\s\\S]*?--\>' // an HTML comment | |
+ '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' // a CDATA section | |
// a probable tag that should not be highlighted | |
+ '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' | |
// that does | |
// not begin | |
// a larger | |
// chunk | |
'g'); | |
var pr_commentPrefix = /^<\!--/; | |
var pr_cdataPrefix = /^<!\[CDATA\[/; | |
var pr_brPrefix = /^<br\b/i; | |
var pr_tagNameRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)/; | |
/** | |
* split markup into chunks of html tags (style null) and plain text (style | |
* {@link #PR_PLAIN}), converting tags which are significant for tokenization (<br>) | |
* into their textual equivalent. | |
* | |
* @param {string} | |
* s html where whitespace is considered significant. | |
* @return {Object} source code and extracted tags. | |
* @private | |
*/ | |
function extractTags(s) { | |
// since the pattern has the 'g' modifier and defines no capturing groups, | |
// this will return a list of all chunks which we then classify and wrap as | |
// PR_Tokens | |
var matches = s.match(pr_chunkPattern); | |
var sourceBuf = []; | |
var sourceBufLen = 0; | |
var extractedTags = []; | |
if (matches) { | |
for ( var i = 0, n = matches.length; i < n; ++i) { | |
var match = matches[i]; | |
if (match.length > 1 && match.charAt(0) === '<') { | |
if (pr_commentPrefix.test(match)) { | |
continue; | |
} | |
if (pr_cdataPrefix.test(match)) { | |
// strip CDATA prefix and suffix. Don't unescape since it's CDATA | |
sourceBuf.push(match.substring(9, match.length - 3)); | |
sourceBufLen += match.length - 12; | |
} else if (pr_brPrefix.test(match)) { | |
// <br> tags are lexically significant so convert them to text. | |
// This is undone later. | |
sourceBuf.push('\n'); | |
++sourceBufLen; | |
} else { | |
if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { | |
// A <span class="nocode"> will start a section that should be | |
// ignored. Continue walking the list until we see a matching end | |
// tag. | |
var name = match.match(pr_tagNameRe)[2]; | |
var depth = 1; | |
var j; | |
end_tag_loop: for (j = i + 1; j < n; ++j) { | |
var name2 = matches[j].match(pr_tagNameRe); | |
if (name2 && name2[2] === name) { | |
if (name2[1] === '/') { | |
if (--depth === 0) { | |
break end_tag_loop; | |
} | |
} else { | |
++depth; | |
} | |
} | |
} | |
if (j < n) { | |
extractedTags.push(sourceBufLen, matches.slice(i, j + 1).join( | |
'')); | |
i = j; | |
} else { // Ignore unclosed sections. | |
extractedTags.push(sourceBufLen, match); | |
} | |
} else { | |
extractedTags.push(sourceBufLen, match); | |
} | |
} | |
} else { | |
var literalText = htmlToText(match); | |
sourceBuf.push(literalText); | |
sourceBufLen += literalText.length; | |
} | |
} | |
} | |
return { | |
source : sourceBuf.join(''), | |
tags : extractedTags | |
}; | |
} | |
/** True if the given tag contains a class attribute with the nocode class. */ | |
function isNoCodeTag(tag) { | |
return !!tag | |
// First canonicalize the representation of attributes | |
.replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, | |
' $1="$2$3$4"') | |
// Then look for the attribute we want. | |
.match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); | |
} | |
/** | |
* Apply the given language handler to sourceCode and add the resulting | |
* decorations to out. | |
* | |
* @param {number} | |
* basePos the index of sourceCode within the chunk of source whose | |
* decorations are already present on out. | |
*/ | |
function appendDecorations(basePos, sourceCode, langHandler, out) { | |
if (!sourceCode) { | |
return; | |
} | |
var job = { | |
source : sourceCode, | |
basePos : basePos | |
}; | |
langHandler(job); | |
out.push.apply(out, job.decorations); | |
} | |
/** | |
* Given triples of [style, pattern, context] returns a lexing function, The | |
* lexing function interprets the patterns to find token boundaries and | |
* returns a decoration list of the form [index_0, style_0, index_1, style_1, | |
* ..., index_n, style_n] where index_n is an index into the sourceCode, and | |
* style_n is a style constant like PR_PLAIN. index_n-1 <= index_n, and | |
* style_n-1 applies to all characters in sourceCode[index_n-1:index_n]. | |
* | |
* The stylePatterns is a list whose elements have the form [style : string, | |
* pattern : RegExp, DEPRECATED, shortcut : string]. | |
* | |
* Style is a style constant like PR_PLAIN, or can be a string of the form | |
* 'lang-FOO', where FOO is a language extension describing the language of | |
* the portion of the token in $1 after pattern executes. E.g., if style is | |
* 'lang-lisp', and group 1 contains the text '(hello (world))', then that | |
* portion of the token will be passed to the registered lisp handler for | |
* formatting. The text before and after group 1 will be restyled using this | |
* decorator so decorators should take care that this doesn't result in | |
* infinite recursion. For example, the HTML lexer rule for SCRIPT elements | |
* looks something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may | |
* match '<script>foo()<\/script>', which would cause the current decorator | |
* to be called with '<script>' which would not match the same rule since | |
* group 1 must not be empty, so it would be instead styled as PR_TAG by the | |
* generic tag rule. The handler registered for the 'js' extension would then | |
* be called with 'foo()', and finally, the current decorator would be called | |
* with '<\/script>' which would not match the original rule and so the | |
* generic tag rule would identify it as a tag. | |
* | |
* Pattern must only match prefixes, and if it matches a prefix, then that | |
* match is considered a token with the same style. | |
* | |
* Context is applied to the last non-whitespace, non-comment token | |
* recognized. | |
* | |
* Shortcut is an optional string of characters, any of which, if the first | |
* character, gurantee that this pattern and only this pattern matches. | |
* | |
* @param {Array} | |
* shortcutStylePatterns patterns that always start with a known | |
* character. Must have a shortcut string. | |
* @param {Array} | |
* fallthroughStylePatterns patterns that will be tried in order if | |
* the shortcut ones fail. May have shortcuts. | |
* | |
* @return {function (Object)} a function that takes source code and returns a | |
* list of decorations. | |
*/ | |
function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { | |
var shortcuts = {}; | |
var tokenizer; | |
(function() { | |
var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); | |
var allRegexs = []; | |
var regexKeys = {}; | |
for ( var i = 0, n = allPatterns.length; i < n; ++i) { | |
var patternParts = allPatterns[i]; | |
var shortcutChars = patternParts[3]; | |
if (shortcutChars) { | |
for ( var c = shortcutChars.length; --c >= 0;) { | |
shortcuts[shortcutChars.charAt(c)] = patternParts; | |
} | |
} | |
var regex = patternParts[1]; | |
var k = '' + regex; | |
if (!regexKeys.hasOwnProperty(k)) { | |
allRegexs.push(regex); | |
regexKeys[k] = null; | |
} | |
} | |
allRegexs.push(/[\0-\uffff]/); | |
tokenizer = combinePrefixPatterns(allRegexs); | |
})(); | |
var nPatterns = fallthroughStylePatterns.length; | |
var notWs = /\S/; | |
/** | |
* Lexes job.source and produces an output array job.decorations of style | |
* classes preceded by the position at which they start in job.source in | |
* order. | |
* | |
* @param {Object} | |
* job an object like {@code source: {string} sourceText plain | |
* text, basePos: {int} position of job.source in the larger chunk | |
* of sourceCode. } | |
*/ | |
var decorate = function(job) { | |
var sourceCode = job.source, basePos = job.basePos; | |
/** | |
* Even entries are positions in source in ascending order. Odd enties are | |
* style markers (e.g., PR_COMMENT) that run from that position until the | |
* end. | |
* | |
* @type {Array.<number|string>} | |
*/ | |
var decorations = [ basePos, PR_PLAIN ]; | |
var pos = 0; // index into sourceCode | |
var tokens = sourceCode.match(tokenizer) || []; | |
var styleCache = {}; | |
for ( var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { | |
var token = tokens[ti]; | |
var style = styleCache[token]; | |
var match = void 0; | |
var isEmbedded; | |
if (typeof style === 'string') { | |
isEmbedded = false; | |
} else { | |
var patternParts = shortcuts[token.charAt(0)]; | |
if (patternParts) { | |
match = token.match(patternParts[1]); | |
style = patternParts[0]; | |
} else { | |
for ( var i = 0; i < nPatterns; ++i) { | |
patternParts = fallthroughStylePatterns[i]; | |
match = token.match(patternParts[1]); | |
if (match) { | |
style = patternParts[0]; | |
break; | |
} | |
} | |
if (!match) { // make sure that we make progress | |
style = PR_PLAIN; | |
} | |
} | |
isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); | |
if (isEmbedded && !(match && typeof match[1] === 'string')) { | |
isEmbedded = false; | |
style = PR_SOURCE; | |
} | |
if (!isEmbedded) { | |
styleCache[token] = style; | |
} | |
} | |
var tokenStart = pos; | |
pos += token.length; | |
if (!isEmbedded) { | |
decorations.push(basePos + tokenStart, style); | |
} else { // Treat group 1 as an embedded block of source code. | |
var embeddedSource = match[1]; | |
var embeddedSourceStart = token.indexOf(embeddedSource); | |
var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; | |
if (match[2]) { | |
// If embeddedSource can be blank, then it would match at the | |
// beginning which would cause us to infinitely recurse on the | |
// entire token, so we catch the right context in match[2]. | |
embeddedSourceEnd = token.length - match[2].length; | |
embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; | |
} | |
var lang = style.substring(5); | |
// Decorate the left of the embedded source | |
appendDecorations(basePos + tokenStart, token.substring(0, | |
embeddedSourceStart), decorate, decorations); | |
// Decorate the embedded source | |
appendDecorations(basePos + tokenStart + embeddedSourceStart, | |
embeddedSource, langHandlerForExtension(lang, embeddedSource), | |
decorations); | |
// Decorate the right of the embedded section | |
appendDecorations(basePos + tokenStart + embeddedSourceEnd, token | |
.substring(embeddedSourceEnd), decorate, decorations); | |
} | |
} | |
job.decorations = decorations; | |
}; | |
return decorate; | |
} | |
/** | |
* returns a function that produces a list of decorations from source text. | |
* | |
* This code treats ", ', and ` as string delimiters, and \ as a string | |
* escape. It does not recognize perl's qq() style strings. It has no special | |
* handling for double delimiter escapes as in basic, or the tripled | |
* delimiters used in python, but should work on those regardless although in | |
* those cases a single string literal may be broken up into multiple adjacent | |
* string literals. | |
* | |
* It recognizes C, C++, and shell style comments. | |
* | |
* @param {Object} | |
* options a set of optional parameters. | |
* @return {function (Object)} a function that examines the source code in the | |
* input job and builds the decoration list. | |
*/ | |
function sourceDecorator(options) { | |
var shortcutStylePatterns = [], fallthroughStylePatterns = []; | |
if (options['tripleQuotedStrings']) { | |
// '''multi-line-string''', 'single-line-string', and double-quoted | |
shortcutStylePatterns | |
.push([ | |
PR_STRING, | |
/^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, | |
null, '\'"' ]); | |
} else if (options['multiLineStrings']) { | |
// 'multi-line-string', "multi-line-string" | |
shortcutStylePatterns | |
.push([ | |
PR_STRING, | |
/^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, | |
null, '\'"`' ]); | |
} else { | |
// 'single-line-string', "single-line-string" | |
shortcutStylePatterns.push([ PR_STRING, | |
/^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, | |
null, '"\'' ]); | |
} | |
if (options['verbatimStrings']) { | |
// verbatim-string-literal production from the C# grammar. See issue 93. | |
fallthroughStylePatterns.push([ PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, | |
null ]); | |
} | |
if (options['hashComments']) { | |
if (options['cStyleComments']) { | |
// Stop C preprocessor declarations at an unclosed open comment | |
shortcutStylePatterns | |
.push([ | |
PR_COMMENT, | |
/^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, | |
null, '#' ]); | |
fallthroughStylePatterns | |
.push([ | |
PR_STRING, | |
/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, | |
null ]); | |
} else { | |
shortcutStylePatterns.push([ PR_COMMENT, /^#[^\r\n]*/, null, '#' ]); | |
} | |
} | |
if (options['cStyleComments']) { | |
fallthroughStylePatterns.push([ PR_COMMENT, /^\/\/[^\r\n]*/, null ]); | |
fallthroughStylePatterns.push([ PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, | |
null ]); | |
} | |
if (options['regexLiterals']) { | |
var REGEX_LITERAL = ( | |
// A regular expression literal starts with a slash that is | |
// not followed by * or / so that it is not confused with | |
// comments. | |
'/(?=[^/*])' | |
// and then contains any number of raw characters, | |
+ '(?:[^/\\x5B\\x5C]' | |
// escape sequences (\x5C), | |
+ '|\\x5C[\\s\\S]' | |
// or non-nesting character sets (\x5B\x5D); | |
+ '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' | |
// finally closed by a /. | |
+ '/'); | |
fallthroughStylePatterns | |
.push([ | |
'lang-regex', | |
new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL | |
+ ')') ]); | |
} | |
var keywords = options['keywords'].replace(/^\s+|\s+$/g, ''); | |
if (keywords.length) { | |
fallthroughStylePatterns.push([ PR_KEYWORD, | |
new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null ]); | |
} | |
shortcutStylePatterns.push([ PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0' ]); | |
fallthroughStylePatterns.push( | |
// TODO(mikesamuel): recognize non-latin letters and numerals in idents | |
[ PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null ], [ PR_TYPE, | |
/^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null ], [ PR_PLAIN, | |
/^[a-z_$][a-z_$@0-9]*/i, null ], [ PR_LITERAL, new RegExp('^(?:' | |
// A hex number | |
+ '0x[a-f0-9]+' | |
// or an octal or decimal number, | |
+ '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' | |
// possibly in scientific notation | |
+ '(?:e[+\\-]?\\d+)?' + ')' | |
// with an optional modifier like UL for unsigned long | |
+ '[a-z]*', 'i'), null, '0123456789' ], [ PR_PUNCTUATION, | |
/^.[^\s\w\.$@\'\"\`\/\#]*/, null ]); | |
return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); | |
} | |
var decorateSource = sourceDecorator({ | |
'keywords' : ALL_KEYWORDS, | |
'hashComments' : true, | |
'cStyleComments' : true, | |
'multiLineStrings' : true, | |
'regexLiterals' : true | |
}); | |
/** | |
* Breaks {@code job.source} around style boundaries in | |
* {@code job.decorations} while re-interleaving {@code job.extractedTags}, | |
* and leaves the result in {@code job.prettyPrintedHtml}. | |
* | |
* @param {Object} | |
* job like { source: {string} source as plain text, extractedTags: | |
* {Array.<number|string>} extractedTags chunks of raw html preceded | |
* by their position in {@code job.source} in order decorations: | |
* {Array.<number|string} an array of style classes preceded by the | |
* position at which they start in job.source in order } | |
* @private | |
*/ | |
function recombineTagsAndDecorations(job) { | |
var sourceText = job.source; | |
var extractedTags = job.extractedTags; | |
var decorations = job.decorations; | |
var html = []; | |
// index past the last char in sourceText written to html | |
var outputIdx = 0; | |
var openDecoration = null; | |
var currentDecoration = null; | |
var tagPos = 0; // index into extractedTags | |
var decPos = 0; // index into decorations | |
var tabExpander = makeTabExpander(window['PR_TAB_WIDTH']); | |
var adjacentSpaceRe = /([\r\n ]) /g; | |
var startOrSpaceRe = /(^| ) /gm; | |
var newlineRe = /\r\n?|\n/g; | |
var trailingSpaceRe = /[ \r\n]$/; | |
var lastWasSpace = true; // the last text chunk emitted ended with a space. | |
// A helper function that is responsible for opening sections of decoration | |
// and outputing properly escaped chunks of source | |
function emitTextUpTo(sourceIdx) { | |
if (sourceIdx > outputIdx) { | |
if (openDecoration && openDecoration !== currentDecoration) { | |
// Close the current decoration | |
html.push('</span>'); | |
openDecoration = null; | |
} | |
if (!openDecoration && currentDecoration) { | |
openDecoration = currentDecoration; | |
html.push('<span class="', openDecoration, '">'); | |
} | |
// This interacts badly with some wikis which introduces paragraph tags | |
// into pre blocks for some strange reason. | |
// It's necessary for IE though which seems to lose the preformattedness | |
// of <pre> tags when their innerHTML is assigned. | |
// http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html | |
// and it serves to undo the conversion of <br>s to newlines done in | |
// chunkify. | |
var htmlChunk = textToHtml( | |
tabExpander(sourceText.substring(outputIdx, sourceIdx))).replace( | |
lastWasSpace ? startOrSpaceRe : adjacentSpaceRe, '$1 '); | |
// Keep track of whether we need to escape space at the beginning of the | |
// next chunk. | |
lastWasSpace = trailingSpaceRe.test(htmlChunk); | |
// IE collapses multiple adjacient <br>s into 1 line break. | |
// Prefix every <br> with ' ' can prevent such IE's behavior. | |
var lineBreakHtml = window['_pr_isIE6']() ? ' <br />' : '<br />'; | |
html.push(htmlChunk.replace(newlineRe, lineBreakHtml)); | |
outputIdx = sourceIdx; | |
} | |
} | |
while (true) { | |
// Determine if we're going to consume a tag this time around. Otherwise | |
// we consume a decoration or exit. | |
var outputTag; | |
if (tagPos < extractedTags.length) { | |
if (decPos < decorations.length) { | |
// Pick one giving preference to extractedTags since we shouldn't open | |
// a new style that we're going to have to immediately close in order | |
// to output a tag. | |
outputTag = extractedTags[tagPos] <= decorations[decPos]; | |
} else { | |
outputTag = true; | |
} | |
} else { | |
outputTag = false; | |
} | |
// Consume either a decoration or a tag or exit. | |
if (outputTag) { | |
emitTextUpTo(extractedTags[tagPos]); | |
if (openDecoration) { | |
// Close the current decoration | |
html.push('</span>'); | |
openDecoration = null; | |
} | |
html.push(extractedTags[tagPos + 1]); | |
tagPos += 2; | |
} else if (decPos < decorations.length) { | |
emitTextUpTo(decorations[decPos]); | |
currentDecoration = decorations[decPos + 1]; | |
decPos += 2; | |
} else { | |
break; | |
} | |
} | |
emitTextUpTo(sourceText.length); | |
if (openDecoration) { | |
html.push('</span>'); | |
} | |
job.prettyPrintedHtml = html.join(''); | |
} | |
/** Maps language-specific file extensions to handlers. */ | |
var langHandlerRegistry = {}; | |
/** | |
* Register a language handler for the given file extensions. | |
* | |
* @param {function | |
* (Object)} handler a function from source code to a list of | |
* decorations. Takes a single argument job which describes the state | |
* of the computation. The single parameter has the form {@code { | |
* source: {string} as plain text. decorations: {Array.<number|string>} | |
* an array of style classes preceded by the position at which they | |
* start in job.source in order. The language handler should assigned | |
* this field. basePos: {int} the position of source in the larger | |
* source chunk. All positions in the output decorations array are | |
* relative to the larger source chunk. } } | |
* @param {Array. | |
* <string>} fileExtensions | |
*/ | |
function registerLangHandler(handler, fileExtensions) { | |
for ( var i = fileExtensions.length; --i >= 0;) { | |
var ext = fileExtensions[i]; | |
if (!langHandlerRegistry.hasOwnProperty(ext)) { | |
langHandlerRegistry[ext] = handler; | |
} else if ('console' in window) { | |
console.warn('cannot override language handler %s', ext); | |
} | |
} | |
} | |
function langHandlerForExtension(extension, source) { | |
if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { | |
// Treat it as markup if the first non whitespace character is a < and | |
// the last non-whitespace character is a >. | |
extension = /^\s*</.test(source) ? 'default-markup' : 'default-code'; | |
} | |
return langHandlerRegistry[extension]; | |
} | |
registerLangHandler(decorateSource, [ 'default-code' ]); | |
registerLangHandler(createSimpleLexer([], [ [ PR_PLAIN, /^[^<?]+/ ], | |
[ PR_DECLARATION, /^<!\w[^>]*(?:>|$)/ ], | |
[ PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/ ], | |
// Unescaped content in an unknown language | |
[ 'lang-', /^<\?([\s\S]+?)(?:\?>|$)/ ], | |
[ 'lang-', /^<%([\s\S]+?)(?:%>|$)/ ], | |
[ PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/ ], | |
[ 'lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i ], | |
// Unescaped content in javascript. (Or possibly vbscript). | |
[ 'lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i ], | |
// Contains unescaped stylesheet content | |
[ 'lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i ], | |
[ 'lang-in.tag', /^(<\/?[a-z][^<>]*>)/i ] ]), [ 'default-markup', 'htm', | |
'html', 'mxml', 'xhtml', 'xml', 'xsl' ]); | |
registerLangHandler(createSimpleLexer([ | |
[ PR_PLAIN, /^[\s]+/, null, ' \t\r\n' ], | |
[ PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'' ] ], [ | |
[ PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i ], | |
[ PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i ], | |
[ 'lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/ ], | |
[ PR_PUNCTUATION, /^[=<>\/]+/ ], | |
[ 'lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i ], | |
[ 'lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i ], | |
[ 'lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i ], | |
[ 'lang-css', /^style\s*=\s*\"([^\"]+)\"/i ], | |
[ 'lang-css', /^style\s*=\s*\'([^\']+)\'/i ], | |
[ 'lang-css', /^style\s*=\s*([^\"\'>\s]+)/i ] ]), [ 'in.tag' ]); | |
registerLangHandler( | |
createSimpleLexer([], [ [ PR_ATTRIB_VALUE, /^[\s\S]+/ ] ]), [ 'uq.val' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : CPP_KEYWORDS, | |
'hashComments' : true, | |
'cStyleComments' : true | |
}), [ 'c', 'cc', 'cpp', 'cxx', 'cyc', 'm' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : 'null true false' | |
}), [ 'json' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : CSHARP_KEYWORDS, | |
'hashComments' : true, | |
'cStyleComments' : true, | |
'verbatimStrings' : true | |
}), [ 'cs' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : JAVA_KEYWORDS, | |
'cStyleComments' : true | |
}), [ 'java' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : SH_KEYWORDS, | |
'hashComments' : true, | |
'multiLineStrings' : true | |
}), [ 'bsh', 'csh', 'sh' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : PYTHON_KEYWORDS, | |
'hashComments' : true, | |
'multiLineStrings' : true, | |
'tripleQuotedStrings' : true | |
}), [ 'cv', 'py' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : PERL_KEYWORDS, | |
'hashComments' : true, | |
'multiLineStrings' : true, | |
'regexLiterals' : true | |
}), [ 'perl', 'pl', 'pm' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : RUBY_KEYWORDS, | |
'hashComments' : true, | |
'multiLineStrings' : true, | |
'regexLiterals' : true | |
}), [ 'rb' ]); | |
registerLangHandler(sourceDecorator({ | |
'keywords' : JSCRIPT_KEYWORDS, | |
'cStyleComments' : true, | |
'regexLiterals' : true | |
}), [ 'js' ]); | |
registerLangHandler(createSimpleLexer([], [ [ PR_STRING, /^[\s\S]+/ ] ]), | |
[ 'regex' ]); | |
function applyDecorator(job) { | |
var sourceCodeHtml = job.sourceCodeHtml; | |
var opt_langExtension = job.langExtension; | |
// Prepopulate output in case processing fails with an exception. | |
job.prettyPrintedHtml = sourceCodeHtml; | |
try { | |
// Extract tags, and convert the source code to plain text. | |
var sourceAndExtractedTags = extractTags(sourceCodeHtml); | |
/** | |
* Plain text. | |
* | |
* @type {string} | |
*/ | |
var source = sourceAndExtractedTags.source; | |
job.source = source; | |
job.basePos = 0; | |
/** | |
* Even entries are positions in source in ascending order. Odd entries | |
* are tags that were extracted at that position. | |
* | |
* @type {Array.<number|string>} | |
*/ | |
job.extractedTags = sourceAndExtractedTags.tags; | |
// Apply the appropriate language handler | |
langHandlerForExtension(opt_langExtension, source)(job); | |
// Integrate the decorations and tags back into the source code to produce | |
// a decorated html string which is left in job.prettyPrintedHtml. | |
recombineTagsAndDecorations(job); | |
} catch (e) { | |
if ('console' in window) { | |
console.log(e); | |
console.trace(); | |
} | |
} | |
} | |
function prettyPrintOne(sourceCodeHtml, opt_langExtension) { | |
var job = { | |
sourceCodeHtml : sourceCodeHtml, | |
langExtension : opt_langExtension | |
}; | |
applyDecorator(job); | |
return job.prettyPrintedHtml; | |
} | |
function prettyPrint(opt_whenDone) { | |
var isIE678 = window['_pr_isIE6'](); | |
var ieNewline = isIE678 === 6 ? '\r\n' : '\r'; | |
// See bug 71 and http://stackoverflow.com/questions/136443/why-doesnt-ie7- | |
// fetch a list of nodes to rewrite | |
var codeSegments = [ document.getElementsByTagName('pre'), | |
document.getElementsByTagName('code'), | |
document.getElementsByTagName('xmp') ]; | |
var elements = []; | |
for ( var i = 0; i < codeSegments.length; ++i) { | |
for ( var j = 0, n = codeSegments[i].length; j < n; ++j) { | |
elements.push(codeSegments[i][j]); | |
} | |
} | |
codeSegments = null; | |
var clock = Date; | |
if (!clock['now']) { | |
clock = { | |
'now' : function() { | |
return (new Date).getTime(); | |
} | |
}; | |
} | |
// The loop is broken into a series of continuations to make sure that we | |
// don't make the browser unresponsive when rewriting a large page. | |
var k = 0; | |
var prettyPrintingJob; | |
function doWork() { | |
var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? clock.now() + 250 /* ms */ | |
: Infinity); | |
for (; k < elements.length && clock.now() < endTime; k++) { | |
var cs = elements[k]; | |
if (cs.className && cs.className.indexOf('prettyprint') >= 0) { | |
// If the classes includes a language extensions, use it. | |
// Language extensions can be specified like | |
// <pre class="prettyprint lang-cpp"> | |
// the language extension "cpp" is used to find a language handler as | |
// passed to PR_registerLangHandler. | |
var langExtension = cs.className.match(/\blang-(\w+)\b/); | |
if (langExtension) { | |
langExtension = langExtension[1]; | |
} | |
// make sure this is not nested in an already prettified element | |
var nested = false; | |
for ( var p = cs.parentNode; p; p = p.parentNode) { | |
if ((p.tagName === 'pre' || p.tagName === 'code' || p.tagName === 'xmp') | |
&& p.className && p.className.indexOf('prettyprint') >= 0) { | |
nested = true; | |
break; | |
} | |
} | |
if (!nested) { | |
// fetch the content as a snippet of properly escaped HTML. | |
// Firefox adds newlines at the end. | |
var content = getInnerHtml(cs); | |
content = content.replace(/(?:\r\n?|\n)$/, ''); | |
// do the pretty printing | |
prettyPrintingJob = { | |
sourceCodeHtml : content, | |
langExtension : langExtension, | |
sourceNode : cs | |
}; | |
applyDecorator(prettyPrintingJob); | |
replaceWithPrettyPrintedHtml(); | |
} | |
} | |
} | |
if (k < elements.length) { | |
// finish up in a continuation | |
setTimeout(doWork, 250); | |
} else if (opt_whenDone) { | |
opt_whenDone(); | |
} | |
} | |
function replaceWithPrettyPrintedHtml() { | |
var newContent = prettyPrintingJob.prettyPrintedHtml; | |
if (!newContent) { | |
return; | |
} | |
var cs = prettyPrintingJob.sourceNode; | |
// push the prettified html back into the tag. | |
if (!isRawContent(cs)) { | |
// just replace the old html with the new | |
cs.innerHTML = newContent; | |
} else { | |
// we need to change the tag to a <pre> since <xmp>s do not allow | |
// embedded tags such as the span tags used to attach styles to | |
// sections of source code. | |
var pre = document.createElement('PRE'); | |
for ( var i = 0; i < cs.attributes.length; ++i) { | |
var a = cs.attributes[i]; | |
if (a.specified) { | |
var aname = a.name.toLowerCase(); | |
if (aname === 'class') { | |
pre.className = a.value; // For IE 6 | |
} else { | |
pre.setAttribute(a.name, a.value); | |
} | |
} | |
} | |
pre.innerHTML = newContent; | |
// remove the old | |
cs.parentNode.replaceChild(pre, cs); | |
cs = pre; | |
} | |
// Replace <br>s with line-feeds so that copying and pasting works | |
// on IE 6. | |
// Doing this on other browsers breaks lots of stuff since \r\n is | |
// treated as two newlines on Firefox, and doing this also slows | |
// down rendering. | |
if (isIE678 && cs.tagName === 'PRE') { | |
var lineBreaks = cs.getElementsByTagName('br'); | |
for ( var j = lineBreaks.length; --j >= 0;) { | |
var lineBreak = lineBreaks[j]; | |
lineBreak.parentNode.replaceChild(document.createTextNode(ieNewline), | |
lineBreak); | |
} | |
} | |
} | |
doWork(); | |
} | |
window['PR_normalizedHtml'] = normalizedHtml; | |
window['prettyPrintOne'] = prettyPrintOne; | |
window['prettyPrint'] = prettyPrint; | |
window['PR'] = { | |
'combinePrefixPatterns' : combinePrefixPatterns, | |
'createSimpleLexer' : createSimpleLexer, | |
'registerLangHandler' : registerLangHandler, | |
'sourceDecorator' : sourceDecorator, | |
'PR_ATTRIB_NAME' : PR_ATTRIB_NAME, | |
'PR_ATTRIB_VALUE' : PR_ATTRIB_VALUE, | |
'PR_COMMENT' : PR_COMMENT, | |
'PR_DECLARATION' : PR_DECLARATION, | |
'PR_KEYWORD' : PR_KEYWORD, | |
'PR_LITERAL' : PR_LITERAL, | |
'PR_NOCODE' : PR_NOCODE, | |
'PR_PLAIN' : PR_PLAIN, | |
'PR_PUNCTUATION' : PR_PUNCTUATION, | |
'PR_SOURCE' : PR_SOURCE, | |
'PR_STRING' : PR_STRING, | |
'PR_TAG' : PR_TAG, | |
'PR_TYPE' : PR_TYPE | |
}; | |
})(); |