| // Copyright (C) 2006 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| /** |
| * @fileoverview |
| * some functions for browser-side pretty printing of code contained in html. |
| * |
| * <p> |
| * For a fairly comprehensive set of languages see the |
| * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> |
| * file that came with this source. At a minimum, the lexer should work on a |
| * number of languages including C and friends, Java, Python, Bash, SQL, HTML, |
| * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk |
| * and a subset of Perl, but, because of commenting conventions, doesn't work on |
| * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. |
| * <p> |
| * Usage: <ol> |
| * <li> include this source file in an html page via |
| * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} |
| * <li> define style rules. See the example page for examples. |
| * <li> mark the {@code <pre>} and {@code <code>} tags in your source with |
| * {@code class=prettyprint.} |
| * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty |
| * printer needs to do more substantial DOM manipulations to support that, so |
| * some css styles may not be preserved. |
| * </ol> |
| * That's it. I wanted to keep the API as simple as possible, so there's no |
| * need to specify which language the code is in, but if you wish, you can add |
| * another class to the {@code <pre>} or {@code <code>} element to specify the |
| * language, as in {@code <pre class="prettyprint lang-java">}. Any class that |
| * starts with "lang-" followed by a file extension, specifies the file type. |
| * See the "lang-*.js" files in this directory for code that implements |
| * per-language file handlers. |
| * <p> |
| * Change log:<br> |
| * cbeust, 2006/08/22 |
| * <blockquote> |
| * Java annotations (start with "@") are now captured as literals ("lit") |
| * </blockquote> |
| * @requires console |
| */ |
| |
| // JSLint declarations |
| /*global console, document, navigator, setTimeout, window, define */ |
| |
| /** |
| * Split {@code prettyPrint} into multiple timeouts so as not to interfere with |
| * UI events. |
| * If set to {@code false}, {@code prettyPrint()} is synchronous. |
| */ |
| window['PR_SHOULD_USE_CONTINUATION'] = true; |
| |
| /** |
| * Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
| * {@code class=prettyprint} and prettify them. |
| * |
| * @param {Function?} opt_whenDone if specified, called when the last entry |
| * has been finished. |
| */ |
| var prettyPrintOne; |
| /** |
| * Pretty print a chunk of code. |
| * |
| * @param {string} sourceCodeHtml code as html |
| * @return {string} code as html, but prettier |
| */ |
| var prettyPrint; |
| |
| |
| (function () { |
| var win = window; |
| // Keyword lists for various languages. |
| // We use things that coerce to strings to make them compact when minified |
| // and to defeat aggressive optimizers that fold large string constants. |
| var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]; |
| var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + |
| "double,enum,extern,float,goto,int,long,register,short,signed,sizeof,module," + |
| "static,struct,switch,typedef,union,unsigned,void,volatile"]; |
| var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + |
| "new,operator,private,protected,public,this,throw,true,try,typeof"]; |
| var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," + |
| "concept,concept_map,const_cast,constexpr,decltype," + |
| "dynamic_cast,explicit,export,friend,inline,late_check," + |
| "mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," + |
| "template,typeid,typename,using,virtual,where,request_req"]; |
| var JAVA_KEYWORDS = [COMMON_KEYWORDS, |
| "abstract,boolean,byte,extends,final,finally,implements,import," + |
| "instanceof,null,native,package,strictfp,super,synchronized,throws," + |
| "transient"]; |
| var CSHARP_KEYWORDS = [JAVA_KEYWORDS, |
| "as,base,by,checked,decimal,delegate,descending,dynamic,event," + |
| "fixed,foreach,from,group,implicit,in,interface,internal,into,is,let," + |
| "lock,object,out,override,orderby,params,partial,readonly,ref,sbyte," + |
| "sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort," + |
| "var,virtual,where"]; |
| var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," + |
| "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," + |
| "throw,true,try,unless,until,when,while,yes"; |
| var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, |
| "debugger,eval,export,function,get,null,set,undefined,var,with," + |
| "Infinity,NaN"]; |
| var PERL_KEYWORDS = "caller,delete,die,do,dump,else,elsif,eval,exit,foreach,for," + |
| "goto,if,import,last,local,my,next,no,our,print,printf,package,redo,require," + |
| "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; |
| var PHP_KEYWORDS = "abstract,and,array,as,break,case,catch,cfunction,class," + |
| "clone,const,continue,declare,default,do,else,elseif,enddeclare,endfor," + |
| "endforeach,endif,endswitch,endwhile,extends,final,for,foreach,function," + |
| "global,goto,if,implements,interface,instanceof,namespace,new,old_function," + |
| "or,private,protected,public,static,switch,throw,try,use,var,while,xor," + |
| "die,echo,empty,exit,eval,include,include_once,isset,list,require," + |
| "require_once,return,print,unset"; |
| var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," + |
| "elif,except,exec,finally,from,global,import,in,is,lambda," + |
| "nonlocal,not,or,pass,print,raise,try,with,yield," + |
| "False,True,None"]; |
| var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," + |
| "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + |
| "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + |
| "BEGIN,END"]; |
| var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + |
| "function,in,local,set,then,until,echo"]; |
| var CONFIG_ENVS = ["User-Agent,HTTP_USER_AGENT,HTTP_REFERER,HTTP_COOKIE,HTTP_FORWARDED,HTTP_HOST,HTTP_PROXY_CONNECTION,HTTP_ACCEPT,REMOTE_ADDR,REMOTE_HOST,REMOTE_PORT,REMOTE_USER,REMOTE_IDENT,REQUEST_METHOD,SCRIPT_FILENAME,PATH_INFO,QUERY_STRING,AUTH_TYPE,DOCUMENT_ROOT,SERVER_ADMIN,SERVER_NAME,SERVER_ADDR,SERVER_PORT,SERVER_PROTOCOL,SERVER_SOFTWARE,TIME_YEAR,TIME_MON,TIME_DAY,TIME_HOUR,TIME_MIN,TIME_SEC,TIME_WDAY,TIME,API_VERSION,THE_REQUEST,REQUEST_URI,REQUEST_FILENAME,IS_SUBREQ,HTTPS,REQUEST_SCHEME"]; |
| var CONFIG_KEYWORDS = ["<AuthnProviderAlias>,<AuthzProviderAlias>,<Directory>,<DirectoryMatch>,<Else>,<ElseIf>,<Files>,<FilesMatch>,<If>,<IfDefine>,<IfModule>,<IfVersion>,<Limit>,<LimitExcept>,<Location>,<LocationMatch>,<MDomainSet>,<Macro>,<Proxy>,<ProxyMatch>,<RequireAll>,<RequireAny>,<RequireNone>,<VirtualHost>,AcceptFilter,AcceptPathInfo,AccessFileName,Action,AddAlt,AddAltByEncoding,AddAltByType,AddCharset,AddDefaultCharset,AddDescription,AddEncoding,AddHandler,AddIcon,AddIconByEncoding,AddIconByType,AddInputFilter,AddLanguage,AddModuleInfo,AddOutputFilter,AddOutputFilterByType,AddType,Alias,AliasMatch,Allow,AllowCONNECT,AllowEncodedSlashes,AllowMethods,AllowOverride,AllowOverrideList,Anonymous,Anonymous_LogEmail,Anonymous_MustGiveEmail,Anonymous_NoUserID,Anonymous_VerifyEmail,AsyncRequestWorkerFactor,AuthBasicAuthoritative,AuthBasicFake,AuthBasicProvider,AuthBasicUseDigestAlgorithm,AuthDBDUserPWQuery,AuthDBDUserRealmQuery,AuthDBMGroupFile,AuthDBMType,AuthDBMUserFile,AuthDigestAlgorithm,AuthDigestDomain,AuthDigestNonceLifetime,AuthDigestProvider,AuthDigestQop,AuthDigestShmemSize,AuthFormAuthoritative,AuthFormBody,AuthFormDisableNoStore,AuthFormFakeBasicAuth,AuthFormLocation,AuthFormLoginRequiredLocation,AuthFormLoginSuccessLocation,AuthFormLogoutLocation,AuthFormMethod,AuthFormMimetype,AuthFormPassword,AuthFormProvider,AuthFormSitePassphrase,AuthFormSize,AuthFormUsername,AuthGroupFile,AuthLDAPAuthorizePrefix,AuthLDAPBindAuthoritative,AuthLDAPBindDN,AuthLDAPBindPassword,AuthLDAPCharsetConfig,AuthLDAPCompareAsUser,AuthLDAPCompareDNOnServer,AuthLDAPDereferenceAliases,AuthLDAPGroupAttribute,AuthLDAPGroupAttributeIsDN,AuthLDAPInitialBindAsUser,AuthLDAPInitialBindPattern,AuthLDAPMaxSubGroupDepth,AuthLDAPRemoteUserAttribute,AuthLDAPRemoteUserIsDN,AuthLDAPSearchAsUser,AuthLDAPSubGroupAttribute,AuthLDAPSubGroupClass,AuthLDAPUrl,AuthMerging,AuthName,AuthType,AuthUserFile,AuthnCacheContext,AuthnCacheEnable,AuthnCacheProvideFor,AuthnCacheSOCache,AuthnCacheTimeout,AuthnzFcgiCheckAuthnProvider,AuthnzFcgiDefineProvider,AuthzDBDLoginToReferer,AuthzDBDQuery,AuthzDBDRedirectQuery,AuthzDBMType,AuthzSendForbiddenOnFailure,BalancerGrowth,BalancerInherit,BalancerMember,BalancerPersist,BrotliAlterETag,BrotliCompressionMaxInputBlock,BrotliCompressionQuality,BrotliCompressionWindow,BrotliFilterNote,BrowserMatch,BrowserMatchNoCase,BufferSize,BufferedLogs,CGIDScriptTimeout,CGIMapExtension,CGIPassAuth,CGIVar,CacheDefaultExpire,CacheDetailHeader,CacheDirLength,CacheDirLevels,CacheDisable,CacheEnable,CacheFile,CacheHeader,CacheIgnoreCacheControl,CacheIgnoreHeaders,CacheIgnoreNoLastMod,CacheIgnoreQueryString,CacheIgnoreURLSessionIdentifiers,CacheKeyBaseURL,CacheLastModifiedFactor,CacheLock,CacheLockMaxAge,CacheLockPath,CacheMaxExpire,CacheMaxFileSize,CacheMinExpire,CacheMinFileSize,CacheNegotiatedDocs,CacheQuickHandler,CacheReadSize,CacheReadTime,CacheRoot,CacheSocache,CacheSocacheMaxSize,CacheSocacheMaxTime,CacheSocacheMinTime,CacheSocacheReadSize,CacheSocacheReadTime,CacheStaleOnError,CacheStoreExpired,CacheStoreNoStore,CacheStorePrivate,CharsetDefault,CharsetOptions,CharsetSourceEnc,CheckCaseOnly,CheckSpelling,ChrootDir,ContentDigest,CookieDomain,CookieExpires,CookieName,CookieStyle,CookieTracking,CoreDumpDirectory,CustomLog,DBDExptime,DBDInitSQL,DBDKeep,DBDMax,DBDMin,DBDParams,DBDPersist,DBDPrepareSQL,DBDriver,DTracePrivileges,Dav,DavDepthInfinity,DavGenericLockDB,DavLockDB,DavMinTimeout,DefaultIcon,DefaultLanguage,DefaultRuntimeDir,DefaultType,Define,DeflateBufferSize,DeflateCompressionLevel,DeflateFilterNote,DeflateInflateLimitRequestBody,DeflateInflateRatioBurst,DeflateInflateRatioLimit,DeflateMemLevel,DeflateWindowSize,Deny,DirectoryCheckHandler,DirectoryIndex,DirectoryIndexRedirect,DirectorySlash,DocumentRoot,DumpIOInput,DumpIOOutput,EnableExceptionHook,EnableMMAP,EnableSendfile,Error,ErrorDocument,ErrorLog,ErrorLogFormat,Example,ExpiresActive,ExpiresByType,ExpiresDefault,ExtFilterDefine,ExtFilterOptions,ExtendedStatus,FallbackResource,FileETag,FilterChain,FilterDeclare,FilterProtocol,FilterProvider,FilterTrace,ForceLanguagePriority,ForceType,ForensicLog,GlobalLog,GprofDir,GracefulShutdownTimeout,Group,H2CopyFiles,H2Direct,H2EarlyHints,H2MaxSessionStreams,H2MaxWorkerIdleSeconds,H2MaxWorkers,H2MinWorkers,H2ModernTLSOnly,H2Push,H2PushDiarySize,H2PushPriority,H2PushResource,H2SerializeHeaders,H2StreamMaxMemSize,H2TLSCoolDownSecs,H2TLSWarmUpSize,H2Upgrade,H2WindowSize,Header,HeaderName,HeartbeatAddress,HeartbeatListen,HeartbeatMaxServers,HeartbeatStorage,HeartbeatStorage,HostnameLookups,HttpProtocolOptions,ISAPIAppendLogToErrors,ISAPIAppendLogToQuery,ISAPICacheFile,ISAPIFakeAsync,ISAPILogNotSupported,ISAPIReadAheadBuffer,IdentityCheck,IdentityCheckTimeout,ImapBase,ImapDefault,ImapMenu,Include,IncludeOptional,IndexHeadInsert,IndexIgnore,IndexIgnoreReset,IndexOptions,IndexOrderDefault,IndexStyleSheet,InputSed,KeepAlive,KeepAliveTimeout,KeptBodySize,LDAPCacheEntries,LDAPCacheTTL,LDAPConnectionPoolTTL,LDAPConnectionTimeout,LDAPLibraryDebug,LDAPOpCacheEntries,LDAPOpCacheTTL,LDAPReferralHopLimit,LDAPReferrals,LDAPRetries,LDAPRetryDelay,LDAPSharedCacheFile,LDAPSharedCacheSize,LDAPTimeout,LDAPTrustedClientCert,LDAPTrustedGlobalCert,LDAPTrustedMode,LDAPVerifyServerCert,LanguagePriority,LimitInternalRecursion,LimitRequestBody,LimitRequestFieldSize,LimitRequestFields,LimitRequestLine,LimitXMLRequestBody,Listen,ListenBackLog,ListenCoresBucketsRatio,LoadFile,LoadModule,LogFormat,LogIOTrackTTFB,LogLevel,LogMessage,LuaAuthzProvider,LuaCodeCache,LuaHookAccessChecker,LuaHookAuthChecker,LuaHookCheckUserID,LuaHookFixups,LuaHookInsertFilter,LuaHookLog,LuaHookMapToStorage,LuaHookTranslateName,LuaHookTypeChecker,LuaInherit,LuaInputFilter,LuaMapHandler,LuaOutputFilter,LuaPackageCPath,LuaPackagePath,LuaQuickHandler,LuaRoot,LuaScope,MDBaseServer,MDCAChallenges,MDCertificateAgreement,MDCertificateAuthority,MDCertificateProtocol,MDDriveMode,MDHttpProxy,MDMember,MDMembers,MDMustStaple,MDNotifyCmd,MDPortMap,MDPrivateKeys,MDRenewWindow,MDRequireHttps,MDStoreDir,MDomain,MMapFile,MaxConnectionsPerChild,MaxKeepAliveRequests,MaxMemFree,MaxRangeOverlaps,MaxRangeReversals,MaxRanges,MaxRequestWorkers,MaxSpareServers,MaxSpareThreads,MaxThreads,MemcacheConnTTL,MergeTrailers,MetaDir,MetaFiles,MetaSuffix,MimeMagicFile,MinSpareServers,MinSpareThreads,ModMimeUsePathInfo,ModemStandard,MultiviewsMatch,Mutex,NWSSLTrustedCerts,NWSSLUpgradeable,NameVirtualHost,NoProxy,Options,Order,OutputSed,PassEnv,PidFile,PrivilegesMode,Protocol,ProtocolEcho,Protocols,ProtocolsHonorOrder,ProxyAddHeaders,ProxyBadHeader,ProxyBlock,ProxyDomain,ProxyErrorOverride,ProxyExpressDBMFile,ProxyExpressDBMType,ProxyExpressEnable,ProxyFCGIBackendType,ProxyFCGISetEnvIf,ProxyFtpDirCharset,ProxyFtpEscapeWildcards,ProxyFtpListOnWildcard,ProxyHCExpr,ProxyHCTPsize,ProxyHCTemplate,ProxyHTMLBufSize,ProxyHTMLCharsetOut,ProxyHTMLDocType,ProxyHTMLEnable,ProxyHTMLEvents,ProxyHTMLExtended,ProxyHTMLFixups,ProxyHTMLInterp,ProxyHTMLLinks,ProxyHTMLMeta,ProxyHTMLStripComments,ProxyHTMLURLMap,ProxyIOBufferSize,ProxyMaxForwards,ProxyPass,ProxyPassInherit,ProxyPassInterpolateEnv,ProxyPassMatch,ProxyPassReverse,ProxyPassReverseCookieDomain,ProxyPassReverseCookiePath,ProxyPreserveHost,ProxyReceiveBufferSize,ProxyRemote,ProxyRemoteMatch,ProxyRequests,ProxySCGIInternalRedirect,ProxySCGISendfile,ProxySet,ProxySourceAddress,ProxyStatus,ProxyTimeout,ProxyVia,QualifyRedirectURL,RLimitCPU,RLimitMEM,RLimitNPROC,ReadmeName,ReceiveBufferSize,Redirect,RedirectMatch,RedirectPermanent,RedirectTemp,ReflectorHeader,RegisterHttpMethod,RemoteIPHeader,RemoteIPInternalProxy,RemoteIPInternalProxyList,RemoteIPProxiesHeader,RemoteIPTrustedProxy,RemoteIPTrustedProxyList,RemoveCharset,RemoveEncoding,RemoveHandler,RemoveInputFilter,RemoveLanguage,RemoveOutputFilter,RemoveType,RequestHeader,RequestReadTimeout,Require,RewriteBase,RewriteCond,RewriteEngine,RewriteMap,RewriteOptions,RewriteRule,SSIETag,SSIEndTag,SSIErrorMsg,SSILastModified,SSILegacyExprParser,SSIStartTag,SSITimeFormat,SSIUndefinedEcho,SSLCACertificateFile,SSLCACertificatePath,SSLCADNRequestFile,SSLCADNRequestPath,SSLCARevocationCheck,SSLCARevocationFile,SSLCARevocationPath,SSLCertificateChainFile,SSLCertificateFile,SSLCertificateKeyFile,SSLCipherSuite,SSLCompression,SSLCryptoDevice,SSLEngine,SSLFIPS,SSLHonorCipherOrder,SSLInsecureRenegotiation,SSLOCSPDefaultResponder,SSLOCSPEnable,SSLOCSPNoverify,SSLOCSPOverrideResponder,SSLOCSPProxyURL,SSLOCSPResponderCertificateFile,SSLOCSPResponderTimeout,SSLOCSPResponseMaxAge,SSLOCSPResponseTimeSkew,SSLOCSPUseRequestNonce,SSLOpenSSLConfCmd,SSLOptions,SSLPassPhraseDialog,SSLProtocol,SSLProxyCACertificateFile,SSLProxyCACertificatePath,SSLProxyCARevocationCheck,SSLProxyCARevocationFile,SSLProxyCARevocationPath,SSLProxyCheckPeerCN,SSLProxyCheckPeerExpire,SSLProxyCheckPeerName,SSLProxyCipherSuite,SSLProxyEngine,SSLProxyMachineCertificateChainFile,SSLProxyMachineCertificateFile,SSLProxyMachineCertificatePath,SSLProxyProtocol,SSLProxyVerify,SSLProxyVerifyDepth,SSLRandomSeed,SSLRenegBufferSize,SSLRequire,SSLRequireSSL,SSLSRPUnknownUserSeed,SSLSRPVerifierFile,SSLSessionCache,SSLSessionCacheTimeout,SSLSessionTicketKeyFile,SSLSessionTickets,SSLStaplingCache,SSLStaplingErrorCacheTimeout,SSLStaplingFakeTryLater,SSLStaplingForceURL,SSLStaplingResponderTimeout,SSLStaplingResponseMaxAge,SSLStaplingResponseTimeSkew,SSLStaplingReturnResponderErrors,SSLStaplingStandardCacheTimeout,SSLStrictSNIVHostCheck,SSLUseStapling,SSLUserName,SSLVerifyClient,SSLVerifyDepth,Satisfy,ScoreBoardFile,Script,ScriptAlias,ScriptAliasMatch,ScriptInterpreterSource,ScriptLog,ScriptLogBuffer,ScriptLogLength,ScriptSock,SecureListen,SeeRequestTail,SendBufferSize,ServerAdmin,ServerAlias,ServerLimit,ServerName,ServerPath,ServerRoot,ServerSignature,ServerTokens,Session,SessionCookieName,SessionCookieName2,SessionCookieRemove,SessionCryptoCipher,SessionCryptoDriver,SessionCryptoPassphrase,SessionCryptoPassphraseFile,SessionDBDCookieName,SessionDBDCookieName2,SessionDBDCookieRemove,SessionDBDDeleteLabel,SessionDBDInsertLabel,SessionDBDPerUser,SessionDBDSelectLabel,SessionDBDUpdateLabel,SessionEnv,SessionExclude,SessionHeader,SessionInclude,SessionMaxAge,SetEnv,SetEnvIf,SetEnvIfExpr,SetEnvIfNoCase,SetHandler,SetInputFilter,SetOutputFilter,StartServers,StartThreads,Substitute,SubstituteInheritBefore,SubstituteMaxLineLength,Suexec,SuexecUserGroup,ThreadLimit,ThreadStackSize,ThreadsPerChild,TimeOut,TraceEnable,TransferLog,TypesConfig,UnDefine,UndefMacro,UnsetEnv,Use,UseCanonicalName,UseCanonicalPhysicalPort,User,UserDir,VHostCGIMode,VHostCGIPrivs,VHostGroup,VHostPrivs,VHostSecure,VHostUser,VirtualDocumentRoot,VirtualDocumentRootIP,VirtualScriptAlias,VirtualScriptAliasIP,WatchdogInterval,XBitHack,xml2EncAlias,xml2EncDefault,xml2StartParse"] |
| var CONFIG_OPTIONS = /^[\\+\\-]?(AuthConfig|IncludesNOEXEC|ExecCGI|FollowSymLinks|MultiViews|Includes|Indexes|SymLinksIfOwnerMatch)\b/i; |
| var ALL_KEYWORDS = [ |
| CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS + |
| PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS, CONFIG_KEYWORDS, PHP_KEYWORDS]; |
| var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float|char|void|const|static|struct)\d*(_t)?\b)|[a-z_]+_rec|cmd_parms\b/; |
| |
| // token style names. correspond to css classes |
| /** |
| * token style for a string literal |
| * @const |
| */ |
| var PR_STRING = 'str'; |
| /** |
| * token style for a keyword |
| * @const |
| */ |
| var PR_KEYWORD = 'kwd'; |
| /** |
| * token style for a comment |
| * @const |
| */ |
| var PR_COMMENT = 'com'; |
| /** |
| * token style for a type |
| * @const |
| */ |
| var PR_TYPE = 'typ'; |
| /** |
| * token style for a literal value. e.g. 1, null, true. |
| * @const |
| */ |
| var PR_LITERAL = 'lit'; |
| /** |
| * token style for a punctuation string. |
| * @const |
| */ |
| var PR_PUNCTUATION = 'pun'; |
| /** |
| * token style for plain text. |
| * @const |
| */ |
| var PR_PLAIN = 'pln'; |
| |
| /** |
| * token style for an sgml tag. |
| * @const |
| */ |
| var PR_TAG = 'tag'; |
| /** |
| * token style for a markup declaration such as a DOCTYPE. |
| * @const |
| */ |
| var PR_DECLARATION = 'dec'; |
| /** |
| * token style for embedded source. |
| * @const |
| */ |
| var PR_SOURCE = 'src'; |
| /** |
| * token style for an sgml attribute name. |
| * @const |
| */ |
| var PR_ATTRIB_NAME = 'atn'; |
| /** |
| * token style for an sgml attribute value. |
| * @const |
| */ |
| var PR_ATTRIB_VALUE = 'atv'; |
| |
| /** |
| * A class that indicates a section of markup that is not code, e.g. to allow |
| * embedding of line numbers within code listings. |
| * @const |
| */ |
| var PR_NOCODE = 'nocode'; |
| |
| |
| |
| /** |
| * A set of tokens that can precede a regular expression literal in |
| * javascript |
| * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html |
| * has the full list, but I've removed ones that might be problematic when |
| * seen in languages that don't support regular expression literals. |
| * |
| * <p>Specifically, I've removed any keywords that can't precede a regexp |
| * literal in a syntactically legal javascript program, and I've removed the |
| * "in" keyword since it's not a keyword in many languages, and might be used |
| * as a count of inches. |
| * |
| * <p>The link above does not accurately describe EcmaScript rules since |
| * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works |
| * very well in practice. |
| * |
| * @private |
| * @const |
| */ |
| var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*'; |
| |
| // CAVEAT: this does not properly handle the case where a regular |
| // expression immediately follows another since a regular expression may |
| // have flags for case-sensitivity and the like. Having regexp tokens |
| // adjacent is not valid in any language I'm aware of, so I'm punting. |
| // TODO: maybe style special characters inside a regexp as punctuation. |
| |
| |
| /** |
| * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally |
| * matches the union of the sets of strings matched by the input RegExp. |
| * Since it matches globally, if the input strings have a start-of-input |
| * anchor (/^.../), it is ignored for the purposes of unioning. |
| * @param {Array.<RegExp>} regexs non multiline, non-global regexs. |
| * @return {RegExp} a global regex. |
| */ |
| function combinePrefixPatterns(regexs) { |
| var capturedGroupIndex = 0; |
| |
| var needToFoldCase = false; |
| var ignoreCase = false; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.ignoreCase) { |
| ignoreCase = true; |
| } else if (/[a-z]/i.test(regex.source.replace( |
| /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { |
| needToFoldCase = true; |
| ignoreCase = false; |
| break; |
| } |
| } |
| |
| var escapeCharToCodeUnit = { |
| 'b': 8, |
| 't': 9, |
| 'n': 0xa, |
| 'v': 0xb, |
| 'f': 0xc, |
| 'r': 0xd |
| }; |
| |
| function decodeEscape(charsetPart) { |
| var cc0 = charsetPart.charCodeAt(0); |
| if (cc0 !== 92 /* \\ */) { |
| return cc0; |
| } |
| var c1 = charsetPart.charAt(1); |
| cc0 = escapeCharToCodeUnit[c1]; |
| if (cc0) { |
| return cc0; |
| } else if ('0' <= c1 && c1 <= '7') { |
| return parseInt(charsetPart.substring(1), 8); |
| } else if (c1 === 'u' || c1 === 'x') { |
| return parseInt(charsetPart.substring(2), 16); |
| } else { |
| return charsetPart.charCodeAt(1); |
| } |
| } |
| |
| function encodeEscape(charCode) { |
| if (charCode < 0x20) { |
| return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); |
| } |
| var ch = String.fromCharCode(charCode); |
| return (ch === '\\' || ch === '-' || ch === ']' || ch === '^') |
| ? "\\" + ch : ch; |
| } |
| |
| function caseFoldCharset(charSet) { |
| var charsetParts = charSet.substring(1, charSet.length - 1).match( |
| new RegExp( |
| '\\\\u[0-9A-Fa-f]{4}' |
| + '|\\\\x[0-9A-Fa-f]{2}' |
| + '|\\\\[0-3][0-7]{0,2}' |
| + '|\\\\[0-7]{1,2}' |
| + '|\\\\[\\s\\S]' |
| + '|-' |
| + '|[^-\\\\]', |
| 'g')); |
| var ranges = []; |
| var inverse = charsetParts[0] === '^'; |
| |
| var out = ['[']; |
| if (inverse) { out.push('^'); } |
| |
| for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { |
| var p = charsetParts[i]; |
| if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups. |
| out.push(p); |
| } else { |
| var start = decodeEscape(p); |
| var end; |
| if (i + 2 < n && '-' === charsetParts[i + 1]) { |
| end = decodeEscape(charsetParts[i + 2]); |
| i += 2; |
| } else { |
| end = start; |
| } |
| ranges.push([start, end]); |
| // If the range might intersect letters, then expand it. |
| // This case handling is too simplistic. |
| // It does not deal with non-latin case folding. |
| // It works for latin source code identifiers though. |
| if (!(end < 65 || start > 122)) { |
| if (!(end < 65 || start > 90)) { |
| ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); |
| } |
| if (!(end < 97 || start > 122)) { |
| ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); |
| } |
| } |
| } |
| } |
| |
| // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] |
| // -> [[1, 12], [14, 14], [16, 17]] |
| ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); |
| var consolidatedRanges = []; |
| var lastRange = []; |
| for (var i = 0; i < ranges.length; ++i) { |
| var range = ranges[i]; |
| if (range[0] <= lastRange[1] + 1) { |
| lastRange[1] = Math.max(lastRange[1], range[1]); |
| } else { |
| consolidatedRanges.push(lastRange = range); |
| } |
| } |
| |
| for (var i = 0; i < consolidatedRanges.length; ++i) { |
| var range = consolidatedRanges[i]; |
| out.push(encodeEscape(range[0])); |
| if (range[1] > range[0]) { |
| if (range[1] + 1 > range[0]) { out.push('-'); } |
| out.push(encodeEscape(range[1])); |
| } |
| } |
| out.push(']'); |
| return out.join(''); |
| } |
| |
| function allowAnywhereFoldCaseAndRenumberGroups(regex) { |
| // Split into character sets, escape sequences, punctuation strings |
| // like ('(', '(?:', ')', '^'), and runs of characters that do not |
| // include any of the above. |
| var parts = regex.source.match( |
| new RegExp( |
| '(?:' |
| + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set |
| + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape |
| + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape |
| + '|\\\\[0-9]+' // a back-reference or octal escape |
| + '|\\\\[^ux0-9]' // other escape sequence |
| + '|\\(\\?[:!=]' // start of a non-capturing group |
| + '|[\\(\\)\\^]' // start/end of a group, or line start |
| + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters |
| + ')', |
| 'g')); |
| var n = parts.length; |
| |
| // Maps captured group numbers to the number they will occupy in |
| // the output or to -1 if that has not been determined, or to |
| // undefined if they need not be capturing in the output. |
| var capturedGroups = []; |
| |
| // Walk over and identify back references to build the capturedGroups |
| // mapping. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| // groups are 1-indexed, so max group index is count of '(' |
| ++groupIndex; |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue) { |
| if (decimalValue <= groupIndex) { |
| capturedGroups[decimalValue] = -1; |
| } else { |
| // Replace with an unambiguous escape sequence so that |
| // an octal escape sequence does not turn into a backreference |
| // to a capturing group from an earlier regex. |
| parts[i] = encodeEscape(decimalValue); |
| } |
| } |
| } |
| } |
| |
| // Renumber groups and reduce capturing groups to non-capturing groups |
| // where possible. |
| for (var i = 1; i < capturedGroups.length; ++i) { |
| if (-1 === capturedGroups[i]) { |
| capturedGroups[i] = ++capturedGroupIndex; |
| } |
| } |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| ++groupIndex; |
| if (!capturedGroups[groupIndex]) { |
| parts[i] = '(?:'; |
| } |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| parts[i] = '\\' + capturedGroups[decimalValue]; |
| } |
| } |
| } |
| |
| // Remove any prefix anchors so that the output will match anywhere. |
| // ^^ really does mean an anchored match though. |
| for (var i = 0; i < n; ++i) { |
| if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } |
| } |
| |
| // Expand letters to groups to handle mixing of case-sensitive and |
| // case-insensitive patterns if necessary. |
| if (regex.ignoreCase && needToFoldCase) { |
| for (var i = 0; i < n; ++i) { |
| var p = parts[i]; |
| var ch0 = p.charAt(0); |
| if (p.length >= 2 && ch0 === '[') { |
| parts[i] = caseFoldCharset(p); |
| } else if (ch0 !== '\\') { |
| // TODO: handle letters in numeric escapes. |
| parts[i] = p.replace( |
| /[a-zA-Z]/g, |
| function (ch) { |
| var cc = ch.charCodeAt(0); |
| return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; |
| }); |
| } |
| } |
| } |
| |
| return parts.join(''); |
| } |
| |
| var rewritten = []; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.global || regex.multiline) { throw new Error('' + regex); } |
| rewritten.push( |
| '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); |
| } |
| |
| return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); |
| } |
| |
| |
| /** |
| * Split markup into a string of source code and an array mapping ranges in |
| * that string to the text nodes in which they appear. |
| * |
| * <p> |
| * The HTML DOM structure:</p> |
| * <pre> |
| * (Element "p" |
| * (Element "b" |
| * (Text "print ")) ; #1 |
| * (Text "'Hello '") ; #2 |
| * (Element "br") ; #3 |
| * (Text " + 'World';")) ; #4 |
| * </pre> |
| * <p> |
| * corresponds to the HTML |
| * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> |
| * |
| * <p> |
| * It will produce the output:</p> |
| * <pre> |
| * { |
| * sourceCode: "print 'Hello '\n + 'World';", |
| * // 1 2 |
| * // 012345678901234 5678901234567 |
| * spans: [0, #1, 6, #2, 14, #3, 15, #4] |
| * } |
| * </pre> |
| * <p> |
| * where #1 is a reference to the {@code "print "} text node above, and so |
| * on for the other text nodes. |
| * </p> |
| * |
| * <p> |
| * The {@code} spans array is an array of pairs. Even elements are the start |
| * indices of substrings, and odd elements are the text nodes (or BR elements) |
| * that contain the text for those substrings. |
| * Substrings continue until the next index or the end of the source. |
| * </p> |
| * |
| * @param {Node} node an HTML DOM subtree containing source-code. |
| * @param {boolean} isPreformatted true if white-space in text nodes should |
| * be considered significant. |
| * @return {Object} source code and the text nodes in which they occur. |
| */ |
| function extractSourceSpans(node, isPreformatted) { |
| var nocode = /(?:^|\s)nocode(?:\s|$)/; |
| |
| var chunks = []; |
| var length = 0; |
| var spans = []; |
| var k = 0; |
| |
| function walk(node) { |
| switch (node.nodeType) { |
| case 1: // Element |
| if (nocode.test(node.className)) { return; } |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| walk(child); |
| } |
| var nodeName = node.nodeName.toLowerCase(); |
| if ('br' === nodeName || 'li' === nodeName) { |
| chunks[k] = '\n'; |
| spans[k << 1] = length++; |
| spans[(k++ << 1) | 1] = node; |
| } |
| break; |
| case 3: case 4: // Text |
| var text = node.nodeValue; |
| if (text.length) { |
| if (!isPreformatted) { |
| text = text.replace(/[ \t\r\n]+/g, ' '); |
| } else { |
| text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. |
| text = text.replace(/^(\r?\n\s*)+/g, ''); // Remove leading newlines |
| text = text.replace(/^\s*/g, ''); // Remove leading spaces due to indented formatting |
| text = text.replace(/(\r?\n\s*)+$/g, ''); // Remove ending newlines |
| |
| } |
| // TODO: handle tabs here? |
| chunks[k] = text; |
| spans[k << 1] = length; |
| length += text.length; |
| spans[(k++ << 1) | 1] = node; |
| } |
| break; |
| } |
| } |
| |
| walk(node); |
| |
| return { |
| sourceCode: chunks.join('').replace(/\n$/, ''), |
| spans: spans |
| }; |
| } |
| |
| |
| /** |
| * Apply the given language handler to sourceCode and add the resulting |
| * decorations to out. |
| * @param {number} basePos the index of sourceCode within the chunk of source |
| * whose decorations are already present on out. |
| */ |
| function appendDecorations(basePos, sourceCode, langHandler, out) { |
| if (!sourceCode) { return; } |
| var job = { |
| sourceCode: sourceCode, |
| basePos: basePos |
| }; |
| langHandler(job); |
| out.push.apply(out, job.decorations); |
| } |
| |
| var notWs = /\S/; |
| |
| /** |
| * Given an element, if it contains only one child element and any text nodes |
| * it contains contain only space characters, return the sole child element. |
| * Otherwise returns undefined. |
| * <p> |
| * This is meant to return the CODE element in {@code <pre><code ...>} when |
| * there is a single child element that contains all the non-space textual |
| * content, but not to return anything where there are multiple child elements |
| * as in {@code <pre><code>...</code><code>...</code></pre>} or when there |
| * is textual content. |
| */ |
| function childContentWrapper(element) { |
| var wrapper = undefined; |
| for (var c = element.firstChild; c; c = c.nextSibling) { |
| var type = c.nodeType; |
| wrapper = (type === 1) // Element Node |
| ? (wrapper ? element : c) |
| : (type === 3) // Text Node |
| ? (notWs.test(c.nodeValue) ? element : wrapper) |
| : wrapper; |
| } |
| return wrapper === element ? undefined : wrapper; |
| } |
| |
| /** Given triples of [style, pattern, context] returns a lexing function, |
| * The lexing function interprets the patterns to find token boundaries and |
| * returns a decoration list of the form |
| * [index_0, style_0, index_1, style_1, ..., index_n, style_n] |
| * where index_n is an index into the sourceCode, and style_n is a style |
| * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to |
| * all characters in sourceCode[index_n-1:index_n]. |
| * |
| * The stylePatterns is a list whose elements have the form |
| * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. |
| * |
| * Style is a style constant like PR_PLAIN, or can be a string of the |
| * form 'lang-FOO', where FOO is a language extension describing the |
| * language of the portion of the token in $1 after pattern executes. |
| * E.g., if style is 'lang-lisp', and group 1 contains the text |
| * '(hello (world))', then that portion of the token will be passed to the |
| * registered lisp handler for formatting. |
| * The text before and after group 1 will be restyled using this decorator |
| * so decorators should take care that this doesn't result in infinite |
| * recursion. For example, the HTML lexer rule for SCRIPT elements looks |
| * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match |
| * '<script>foo()<\/script>', which would cause the current decorator to |
| * be called with '<script>' which would not match the same rule since |
| * group 1 must not be empty, so it would be instead styled as PR_TAG by |
| * the generic tag rule. The handler registered for the 'js' extension would |
| * then be called with 'foo()', and finally, the current decorator would |
| * be called with '<\/script>' which would not match the original rule and |
| * so the generic tag rule would identify it as a tag. |
| * |
| * Pattern must only match prefixes, and if it matches a prefix, then that |
| * match is considered a token with the same style. |
| * |
| * Context is applied to the last non-whitespace, non-comment token |
| * recognized. |
| * |
| * Shortcut is an optional string of characters, any of which, if the first |
| * character, gurantee that this pattern and only this pattern matches. |
| * |
| * @param {Array} shortcutStylePatterns patterns that always start with |
| * a known character. Must have a shortcut string. |
| * @param {Array} fallthroughStylePatterns patterns that will be tried in |
| * order if the shortcut ones fail. May have shortcuts. |
| * |
| * @return {function (Object)} a |
| * function that takes source code and returns a list of decorations. |
| */ |
| function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { |
| var shortcuts = {}; |
| var tokenizer; |
| (function () { |
| var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); |
| var allRegexs = []; |
| var regexKeys = {}; |
| for (var i = 0, n = allPatterns.length; i < n; ++i) { |
| var patternParts = allPatterns[i]; |
| var shortcutChars = patternParts[3]; |
| if (shortcutChars) { |
| for (var c = shortcutChars.length; --c >= 0;) { |
| shortcuts[shortcutChars.charAt(c)] = patternParts; |
| } |
| } |
| var regex = patternParts[1]; |
| var k = '' + regex; |
| if (!regexKeys.hasOwnProperty(k)) { |
| allRegexs.push(regex); |
| regexKeys[k] = null; |
| } |
| } |
| allRegexs.push(/[\0-\uffff]/); |
| tokenizer = combinePrefixPatterns(allRegexs); |
| })(); |
| |
| var nPatterns = fallthroughStylePatterns.length; |
| |
| /** |
| * Lexes job.sourceCode and produces an output array job.decorations of |
| * style classes preceded by the position at which they start in |
| * job.sourceCode in order. |
| * |
| * @param {Object} job an object like <pre>{ |
| * sourceCode: {string} sourceText plain text, |
| * basePos: {int} position of job.sourceCode in the larger chunk of |
| * sourceCode. |
| * }</pre> |
| */ |
| var decorate = function (job) { |
| var sourceCode = job.sourceCode, basePos = job.basePos; |
| /** Even entries are positions in source in ascending order. Odd enties |
| * are style markers (e.g., PR_COMMENT) that run from that position until |
| * the end. |
| * @type {Array.<number|string>} |
| */ |
| var decorations = [basePos, PR_PLAIN]; |
| var pos = 0; // index into sourceCode |
| var tokens = sourceCode.match(tokenizer) || []; |
| var styleCache = {}; |
| |
| for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { |
| var token = tokens[ti]; |
| var style = styleCache[token]; |
| var match = void 0; |
| |
| var isEmbedded; |
| if (typeof style === 'string') { |
| isEmbedded = false; |
| } else { |
| var patternParts = shortcuts[token.charAt(0)]; |
| if (patternParts) { |
| match = token.match(patternParts[1]); |
| style = patternParts[0]; |
| } else { |
| for (var i = 0; i < nPatterns; ++i) { |
| patternParts = fallthroughStylePatterns[i]; |
| match = token.match(patternParts[1]); |
| if (match) { |
| style = patternParts[0]; |
| break; |
| } |
| } |
| |
| if (!match) { // make sure that we make progress |
| style = PR_PLAIN; |
| } |
| } |
| |
| isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); |
| if (isEmbedded && !(match && typeof match[1] === 'string')) { |
| isEmbedded = false; |
| style = PR_SOURCE; |
| } |
| |
| if (!isEmbedded) { styleCache[token] = style; } |
| } |
| |
| var tokenStart = pos; |
| pos += token.length; |
| |
| if (!isEmbedded) { |
| decorations.push(basePos + tokenStart, style); |
| } else { // Treat group 1 as an embedded block of source code. |
| var embeddedSource = match[1]; |
| var embeddedSourceStart = token.indexOf(embeddedSource); |
| var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; |
| if (match[2]) { |
| // If embeddedSource can be blank, then it would match at the |
| // beginning which would cause us to infinitely recurse on the |
| // entire token, so we catch the right context in match[2]. |
| embeddedSourceEnd = token.length - match[2].length; |
| embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; |
| } |
| var lang = style.substring(5); |
| // Decorate the left of the embedded source |
| appendDecorations( |
| basePos + tokenStart, |
| token.substring(0, embeddedSourceStart), |
| decorate, decorations); |
| // Decorate the embedded source |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceStart, |
| embeddedSource, |
| langHandlerForExtension(lang, embeddedSource), |
| decorations); |
| // Decorate the right of the embedded section |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceEnd, |
| token.substring(embeddedSourceEnd), |
| decorate, decorations); |
| } |
| } |
| job.decorations = decorations; |
| }; |
| return decorate; |
| } |
| |
| /** returns a function that produces a list of decorations from source text. |
| * |
| * This code treats ", ', and ` as string delimiters, and \ as a string |
| * escape. It does not recognize perl's qq() style strings. |
| * It has no special handling for double delimiter escapes as in basic, or |
| * the tripled delimiters used in python, but should work on those regardless |
| * although in those cases a single string literal may be broken up into |
| * multiple adjacent string literals. |
| * |
| * It recognizes C, C++, and shell style comments. |
| * |
| * @param {Object} options a set of optional parameters. |
| * @return {function (Object)} a function that examines the source code |
| * in the input job and builds the decoration list. |
| */ |
| function sourceDecorator(options) { |
| var shortcutStylePatterns = [], fallthroughStylePatterns = []; |
| if (options['tripleQuotedStrings']) { |
| // '''multi-line-string''', 'single-line-string', and double-quoted |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, |
| null, '\'"']); |
| } else if (options['multiLineStrings']) { |
| // 'multi-line-string', "multi-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, |
| null, '\'"`']); |
| } else { |
| // 'single-line-string', "single-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, |
| /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, |
| null, '"\'']); |
| } |
| if (options['verbatimStrings']) { |
| // verbatim-string-literal production from the C# grammar. See issue 93. |
| fallthroughStylePatterns.push( |
| [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); |
| } |
| var hc = options['hashComments']; |
| if (hc) { |
| if (options['cStyleComments']) { |
| if (hc > 1) { // multiline hash comments |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); |
| } else { |
| // Stop C preprocessor declarations at an unclosed open comment |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, |
| null, '#']); |
| } |
| // #include <stdio.h> |
| fallthroughStylePatterns.push( |
| [PR_STRING, |
| /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/, |
| null]); |
| } else { |
| shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); |
| } |
| } |
| if (options['cStyleComments']) { |
| fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); |
| fallthroughStylePatterns.push( |
| [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); |
| } |
| if (options['regexLiterals']) { |
| /** |
| * @const |
| */ |
| var REGEX_LITERAL = ( |
| // A regular expression literal starts with a slash that is |
| // not followed by * or / so that it is not confused with |
| // comments. |
| '/(?=[^/*])' |
| // and then contains any number of raw characters, |
| + '(?:[^/\\x5B\\x5C]' |
| // escape sequences (\x5C), |
| + '|\\x5C[\\s\\S]' |
| // or non-nesting character sets (\x5B\x5D); |
| + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' |
| // finally closed by a /. |
| + '/'); |
| fallthroughStylePatterns.push( |
| ['lang-regex', |
| new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') |
| ]); |
| } |
| |
| var types = options['types']; |
| if (types) { |
| fallthroughStylePatterns.push([PR_TYPE, types]); |
| } |
| |
| if (options['strings']) { |
| var strings = ("" + options['strings']).replace(/^ | $/g, '').replace(/-/g, '\\-'); |
| fallthroughStylePatterns.push( |
| [PR_STRING, |
| new RegExp('(?:' + strings.replace(/[\s,]+/g, '|') + ')'), |
| , null] |
| ); |
| } |
| |
| var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); |
| if (keywords.length) { |
| fallthroughStylePatterns.push( |
| [PR_KEYWORD, |
| new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), |
| null]); |
| } |
| |
| shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); |
| if (options['httpdComments']) { |
| fallthroughStylePatterns.push( |
| [PR_PLAIN, /^.*\S.*#/i, null] |
| ); |
| } |
| |
| fallthroughStylePatterns.push( |
| // TODO(mikesamuel): recognize non-latin letters and numerals in idents |
| [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*|\bNULL\b/i, null], |
| [PR_LITERAL, CONFIG_OPTIONS, null], |
| //[PR_STRING, CONFIG_ENVS, null], |
| [PR_TAG, /^\b(AuthzProviderAlias|AuthnProviderAlias|RequireAny|RequireAll|RequireNone|Directory|DirectoryMatch|Location|LocationMatch|VirtualHost|If|Else|ElseIf|Proxy\b|LoadBalancer|Files|FilesMatch|Limit|LimitExcept|IfDefine|IfModule|IfVersion)\b/, null], |
| [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_(t|req|module)\b)/, null], |
| [PR_TAG, /^apr_[a-z_0-9]+|ap_[a-z_0-9]+/i, null], |
| [PR_PLAIN, /^[a-z_$][a-z_$@0-9\-]*/i, null], |
| [PR_LITERAL, |
| new RegExp( |
| '^(?:' |
| // A hex number |
| + '0x[a-f0-9]+' |
| // An IPv6 Address |
| + '|[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+' |
| // or an octal or decimal number, |
| + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' |
| // possibly in scientific notation |
| + '(?:e[+\\-]?\\d+)?' |
| + ')' |
| // with an optional modifier like UL for unsigned long |
| + '[a-z]*', 'i'), |
| null, '0123456789'], |
| // Don't treat escaped quotes in bash as starting strings. See issue 144. |
| [PR_PLAIN, /^\\[\s\S]?/, null], |
| [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]); |
| |
| return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); |
| } |
| |
| var decorateSource = sourceDecorator({ |
| 'keywords': ALL_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }); |
| |
| /** |
| * Given a DOM subtree, wraps it in a list, and puts each line into its own |
| * list item. |
| * |
| * @param {Node} node modified in place. Its content is pulled into an |
| * HTMLOListElement, and each line is moved into a separate list item. |
| * This requires cloning elements, so the input might not have unique |
| * IDs after numbering. |
| * @param {boolean} isPreformatted true iff white-space in text nodes should |
| * be treated as significant. |
| */ |
| function numberLines(node, opt_startLineNum, isPreformatted) { |
| var nocode = /(?:^|\s)nocode(?:\s|$)/; |
| var lineBreak = /\r\n?|\n/; |
| |
| var document = node.ownerDocument; |
| |
| var li = document.createElement('li'); |
| while (node.firstChild) { |
| li.appendChild(node.firstChild); |
| } |
| // An array of lines. We split below, so this is initialized to one |
| // un-split line. |
| var listItems = [li]; |
| |
| function walk(node) { |
| switch (node.nodeType) { |
| case 1: // Element |
| if (nocode.test(node.className)) { break; } |
| if ('br' === node.nodeName) { |
| breakAfter(node); |
| // Discard the <BR> since it is now flush against a </LI>. |
| if (node.parentNode) { |
| node.parentNode.removeChild(node); |
| } |
| } else { |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| walk(child); |
| } |
| } |
| break; |
| case 3: case 4: // Text |
| if (isPreformatted) { |
| var text = node.nodeValue; |
| var match = text.match(lineBreak); |
| if (match) { |
| var firstLine = text.substring(0, match.index); |
| node.nodeValue = firstLine; |
| var tail = text.substring(match.index + match[0].length); |
| if (tail) { |
| var parent = node.parentNode; |
| parent.insertBefore( |
| document.createTextNode(tail), node.nextSibling); |
| } |
| breakAfter(node); |
| if (!firstLine) { |
| // Don't leave blank text nodes in the DOM. |
| node.parentNode.removeChild(node); |
| } |
| } |
| } |
| break; |
| } |
| } |
| |
| // Split a line after the given node. |
| function breakAfter(lineEndNode) { |
| // If there's nothing to the right, then we can skip ending the line |
| // here, and move root-wards since splitting just before an end-tag |
| // would require us to create a bunch of empty copies. |
| while (!lineEndNode.nextSibling) { |
| lineEndNode = lineEndNode.parentNode; |
| if (!lineEndNode) { return; } |
| } |
| |
| function breakLeftOf(limit, copy) { |
| // Clone shallowly if this node needs to be on both sides of the break. |
| var rightSide = copy ? limit.cloneNode(false) : limit; |
| var parent = limit.parentNode; |
| if (parent) { |
| // We clone the parent chain. |
| // This helps us resurrect important styling elements that cross lines. |
| // E.g. in <i>Foo<br>Bar</i> |
| // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. |
| var parentClone = breakLeftOf(parent, 1); |
| // Move the clone and everything to the right of the original |
| // onto the cloned parent. |
| var next = limit.nextSibling; |
| parentClone.appendChild(rightSide); |
| for (var sibling = next; sibling; sibling = next) { |
| next = sibling.nextSibling; |
| parentClone.appendChild(sibling); |
| } |
| } |
| return rightSide; |
| } |
| |
| var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); |
| |
| // Walk the parent chain until we reach an unattached LI. |
| for (var parent; |
| // Check nodeType since IE invents document fragments. |
| (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { |
| copiedListItem = parent; |
| } |
| // Put it on the list of lines for later processing. |
| listItems.push(copiedListItem); |
| } |
| |
| // Split lines while there are lines left to split. |
| for (var i = 0; // Number of lines that have been split so far. |
| i < listItems.length; // length updated by breakAfter calls. |
| ++i) { |
| walk(listItems[i]); |
| } |
| |
| // Make sure numeric indices show correctly. |
| if (opt_startLineNum === (opt_startLineNum|0)) { |
| listItems[0].setAttribute('value', opt_startLineNum); |
| } |
| |
| var ol = document.createElement('ol'); |
| ol.className = 'linenums'; |
| var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0; |
| for (var i = 0, n = listItems.length; i < n; ++i) { |
| li = listItems[i]; |
| // Stick a class on the LIs so that stylesheets can |
| // color odd/even rows, or any other row pattern that |
| // is co-prime with 10. |
| li.className = 'L' + ((i + offset) % 1); |
| if (!li.firstChild) { |
| li.appendChild(document.createTextNode('\xA0')); |
| } |
| ol.appendChild(li); |
| } |
| |
| node.appendChild(ol); |
| } |
| |
| /** |
| * Breaks {@code job.sourceCode} around style boundaries in |
| * {@code job.decorations} and modifies {@code job.sourceNode} in place. |
| * @param {Object} job like <pre>{ |
| * sourceCode: {string} source as plain text, |
| * spans: {Array.<number|Node>} alternating span start indices into source |
| * and the text node or element (e.g. {@code <BR>}) corresponding to that |
| * span. |
| * decorations: {Array.<number|string} an array of style classes preceded |
| * by the position at which they start in job.sourceCode in order |
| * }</pre> |
| * @private |
| */ |
| function recombineTagsAndDecorations(job) { |
| var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent); |
| isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8; |
| var newlineRe = /\n/g; |
| |
| var source = job.sourceCode; |
| var sourceLength = source.length; |
| // Index into source after the last code-unit recombined. |
| var sourceIndex = 0; |
| |
| var spans = job.spans; |
| var nSpans = spans.length; |
| // Index into spans after the last span which ends at or before sourceIndex. |
| var spanIndex = 0; |
| |
| var decorations = job.decorations; |
| var nDecorations = decorations.length; |
| // Index into decorations after the last decoration which ends at or before |
| // sourceIndex. |
| var decorationIndex = 0; |
| |
| // Remove all zero-length decorations. |
| decorations[nDecorations] = sourceLength; |
| var decPos, i; |
| for (i = decPos = 0; i < nDecorations;) { |
| if (decorations[i] !== decorations[i + 2]) { |
| decorations[decPos++] = decorations[i++]; |
| decorations[decPos++] = decorations[i++]; |
| } else { |
| i += 2; |
| } |
| } |
| nDecorations = decPos; |
| |
| // Simplify decorations. |
| for (i = decPos = 0; i < nDecorations;) { |
| var startPos = decorations[i]; |
| // Conflate all adjacent decorations that use the same style. |
| var startDec = decorations[i + 1]; |
| var end = i + 2; |
| while (end + 2 <= nDecorations && decorations[end + 1] === startDec) { |
| end += 2; |
| } |
| decorations[decPos++] = startPos; |
| decorations[decPos++] = startDec; |
| i = end; |
| } |
| |
| nDecorations = decorations.length = decPos; |
| |
| var sourceNode = job.sourceNode; |
| var oldDisplay; |
| if (sourceNode) { |
| oldDisplay = sourceNode.style.display; |
| sourceNode.style.display = 'none'; |
| } |
| try { |
| var decoration = null; |
| var X = 0; |
| while (spanIndex < nSpans) { |
| X = X + 1; |
| if (X > 5000) { break; } |
| var spanStart = spans[spanIndex]; |
| var spanEnd = spans[spanIndex + 2] || sourceLength; |
| |
| var decEnd = decorations[decorationIndex + 2] || sourceLength; |
| |
| var end = Math.min(spanEnd, decEnd); |
| |
| var textNode = spans[spanIndex + 1]; |
| var styledText; |
| if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s |
| // Don't introduce spans around empty text nodes. |
| && (styledText = source.substring(sourceIndex, end))) { |
| // This may seem bizarre, and it is. Emitting LF on IE causes the |
| // code to display with spaces instead of line breaks. |
| // Emitting Windows standard issue linebreaks (CRLF) causes a blank |
| // space to appear at the beginning of every line but the first. |
| // Emitting an old Mac OS 9 line separator makes everything spiffy. |
| if (isIE8OrEarlier) { |
| styledText = styledText.replace(newlineRe, '\r'); |
| } |
| textNode.nodeValue = styledText; |
| var document = textNode.ownerDocument; |
| var span = document.createElement('span'); |
| span.className = decorations[decorationIndex + 1]; |
| var parentNode = textNode.parentNode; |
| parentNode.replaceChild(span, textNode); |
| span.appendChild(textNode); |
| if (sourceIndex < spanEnd) { // Split off a text node. |
| spans[spanIndex + 1] = textNode |
| // TODO: Possibly optimize by using '' if there's no flicker. |
| = document.createTextNode(source.substring(end, spanEnd)); |
| parentNode.insertBefore(textNode, span.nextSibling); |
| } |
| } |
| |
| sourceIndex = end; |
| |
| if (sourceIndex >= spanEnd) { |
| spanIndex += 2; |
| } |
| if (sourceIndex >= decEnd) { |
| decorationIndex += 2; |
| } |
| } |
| } finally { |
| if (sourceNode) { |
| sourceNode.style.display = oldDisplay; |
| } |
| } |
| } |
| |
| |
| /** Maps language-specific file extensions to handlers. */ |
| var langHandlerRegistry = {}; |
| /** Register a language handler for the given file extensions. |
| * @param {function (Object)} handler a function from source code to a list |
| * of decorations. Takes a single argument job which describes the |
| * state of the computation. The single parameter has the form |
| * {@code { |
| * sourceCode: {string} as plain text. |
| * decorations: {Array.<number|string>} an array of style classes |
| * preceded by the position at which they start in |
| * job.sourceCode in order. |
| * The language handler should assigned this field. |
| * basePos: {int} the position of source in the larger source chunk. |
| * All positions in the output decorations array are relative |
| * to the larger source chunk. |
| * } } |
| * @param {Array.<string>} fileExtensions |
| */ |
| function registerLangHandler(handler, fileExtensions) { |
| for (var i = fileExtensions.length; --i >= 0;) { |
| var ext = fileExtensions[i]; |
| if (!langHandlerRegistry.hasOwnProperty(ext)) { |
| langHandlerRegistry[ext] = handler; |
| } else if (win['console']) { |
| console['warn']('cannot override language handler %s', ext); |
| } |
| } |
| } |
| function langHandlerForExtension(extension, source) { |
| if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { |
| // Treat it as markup if the first non whitespace character is a < and |
| // the last non-whitespace character is a >. |
| extension = /^\s*</.test(source) |
| ? 'default-markup' |
| : 'default-code'; |
| } |
| return langHandlerRegistry[extension]; |
| } |
| registerLangHandler(decorateSource, ['default-code']); |
| registerLangHandler( |
| createSimpleLexer( |
| [], |
| [ |
| [PR_PLAIN, /^[^<?]+/], |
| [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], |
| [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], |
| // Unescaped content in an unknown language |
| ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], |
| ['lang-', /^<%([\s\S]+?)(?:%>|$)/], |
| [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], |
| ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], |
| // Unescaped content in javascript. (Or possibly vbscript). |
| ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], |
| // Contains unescaped stylesheet content |
| ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], |
| ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] |
| ]), |
| ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); |
| registerLangHandler( |
| createSimpleLexer( |
| [ |
| [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], |
| [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] |
| ], |
| [ |
| [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], |
| [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], |
| ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], |
| [PR_PUNCTUATION, /^[=<>\/]+/], |
| ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], |
| ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], |
| ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], |
| ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], |
| ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], |
| ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] |
| ]), |
| ['in.tag']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CPP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'types': C_TYPES |
| }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PHP_KEYWORDS, |
| 'hashComments': false, |
| 'cStyleComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| // 'types': C_TYPES, |
| }), ['php', 'phtml', 'inc']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': 'null,true,false' |
| }), ['json']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CSHARP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'verbatimStrings': true, |
| 'types': C_TYPES |
| }), ['cs']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JAVA_KEYWORDS, |
| 'cStyleComments': true |
| }), ['java']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': SH_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true |
| }), ['bsh', 'csh', 'sh']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PYTHON_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'tripleQuotedStrings': true |
| }), ['cv', 'py']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PERL_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['perl', 'pl', 'pm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': RUBY_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['rb']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JSCRIPT_KEYWORDS, |
| 'cStyleComments': true, |
| 'regexLiterals': true |
| }), ['js']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': COFFEE_KEYWORDS, |
| 'hashComments': 3, // ### style block comments |
| 'cStyleComments': true, |
| 'multilineStrings': true, |
| 'tripleQuotedStrings': true, |
| 'regexLiterals': true |
| }), ['coffee']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CONFIG_KEYWORDS, |
| 'literals': CONFIG_OPTIONS, |
| 'strings': CONFIG_ENVS, |
| 'hashComments': true, |
| 'cStyleComments': false, |
| 'multiLineStrings': false, |
| 'regexLiterals': false, |
| 'httpdComments': true |
| }), ['config']); |
| |
| function applyDecorator(job) { |
| var opt_langExtension = job.langExtension; |
| |
| try { |
| // Extract tags, and convert the source code to plain text. |
| var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre); |
| /** Plain text. @type {string} */ |
| var source = sourceAndSpans.sourceCode; |
| job.sourceCode = source; |
| job.spans = sourceAndSpans.spans; |
| job.basePos = 0; |
| |
| // Apply the appropriate language handler |
| langHandlerForExtension(opt_langExtension, source)(job); |
| |
| // Integrate the decorations and tags back into the source code, |
| // modifying the sourceNode in place. |
| recombineTagsAndDecorations(job); |
| } catch (e) { |
| if (win['console']) { |
| console['log'](e && e['stack'] ? e['stack'] : e); |
| } |
| } |
| } |
| |
| /** |
| * @param sourceCodeHtml {string} The HTML to pretty print. |
| * @param opt_langExtension {string} The language name to use. |
| * Typically, a filename extension like 'cpp' or 'java'. |
| * @param opt_numberLines {number|boolean} True to number lines, |
| * or the 1-indexed number of the first line in sourceCodeHtml. |
| */ |
| function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { |
| var container = document.createElement('pre'); |
| // This could cause images to load and onload listeners to fire. |
| // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. |
| // We assume that the inner HTML is from a trusted source. |
| container.innerHTML = sourceCodeHtml; |
| if (opt_numberLines) { |
| numberLines(container, opt_numberLines, true); |
| } |
| |
| var job = { |
| langExtension: opt_langExtension, |
| numberLines: opt_numberLines, |
| sourceNode: container, |
| pre: 1 |
| }; |
| applyDecorator(job); |
| return container.innerHTML; |
| } |
| |
| function prettyPrint(opt_whenDone) { |
| function byTagName(tn) { return document.getElementsByTagName(tn); } |
| // fetch a list of nodes to rewrite |
| var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; |
| var elements = []; |
| for (var i = 0; i < codeSegments.length; ++i) { |
| for (var j = 0, n = codeSegments[i].length; j < n; ++j) { |
| elements.push(codeSegments[i][j]); |
| } |
| } |
| codeSegments = null; |
| |
| var clock = Date; |
| if (!clock['now']) { |
| clock = { 'now': function () { return +(new Date); } }; |
| } |
| |
| // The loop is broken into a series of continuations to make sure that we |
| // don't make the browser unresponsive when rewriting a large page. |
| var k = 0; |
| var prettyPrintingJob; |
| |
| var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; |
| var prettyPrintRe = /\bprettyprint\b/; |
| var prettyPrintedRe = /\bprettyprinted\b/; |
| var preformattedTagNameRe = /pre|xmp/i; |
| var codeRe = /^code$/i; |
| var preCodeXmpRe = /^(?:pre|code|xmp)$/i; |
| |
| function doWork() { |
| var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ? |
| clock['now']() + 250 /* ms */ : |
| Infinity); |
| for (; k < elements.length && clock['now']() < endTime; k++) { |
| var cs = elements[k]; |
| var className = cs.className; |
| if (prettyPrintRe.test(className) |
| // Don't redo this if we've already done it. |
| // This allows recalling pretty print to just prettyprint elements |
| // that have been added to the page since last call. |
| && !prettyPrintedRe.test(className)) { |
| |
| // make sure this is not nested in an already prettified element |
| var nested = false; |
| for (var p = cs.parentNode; p; p = p.parentNode) { |
| var tn = p.tagName; |
| if (preCodeXmpRe.test(tn) |
| && p.className && prettyPrintRe.test(p.className)) { |
| nested = true; |
| break; |
| } |
| } |
| if (!nested) { |
| // Mark done. If we fail to prettyprint for whatever reason, |
| // we shouldn't try again. |
| cs.className += ' prettyprinted'; |
| |
| // If the classes includes a language extensions, use it. |
| // Language extensions can be specified like |
| // <pre class="prettyprint lang-cpp"> |
| // the language extension "cpp" is used to find a language handler |
| // as passed to PR.registerLangHandler. |
| // HTML5 recommends that a language be specified using "language-" |
| // as the prefix instead. Google Code Prettify supports both. |
| // http://dev.w3.org/html5/spec-author-view/the-code-element.html |
| var langExtension = className.match(langExtensionRe); |
| // Support <pre class="prettyprint"><code class="language-c"> |
| var wrapper; |
| if (!langExtension && (wrapper = childContentWrapper(cs)) |
| && codeRe.test(wrapper.tagName)) { |
| langExtension = wrapper.className.match(langExtensionRe); |
| } |
| |
| if (langExtension) { langExtension = langExtension[1]; } |
| |
| var preformatted; |
| if (preformattedTagNameRe.test(cs.tagName)) { |
| preformatted = 1; |
| } else { |
| var currentStyle = cs['currentStyle']; |
| var whitespace = ( |
| currentStyle |
| ? currentStyle['whiteSpace'] |
| : (document.defaultView |
| && document.defaultView.getComputedStyle) |
| ? document.defaultView.getComputedStyle(cs, null) |
| .getPropertyValue('white-space') |
| : 0); |
| preformatted = whitespace |
| && 'pre' === whitespace.substring(0, 3); |
| } |
| |
| // Look for a class like linenums or linenums:<n> where <n> is the |
| // 1-indexed number of the first line. |
| var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/); |
| lineNums = lineNums |
| ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true |
| : false; |
| if (lineNums) { numberLines(cs, lineNums, preformatted); } |
| |
| // do the pretty printing |
| prettyPrintingJob = { |
| langExtension: langExtension, |
| sourceNode: cs, |
| numberLines: lineNums, |
| pre: preformatted |
| }; |
| applyDecorator(prettyPrintingJob); |
| } |
| } |
| } |
| if (k < elements.length) { |
| // finish up in a continuation |
| setTimeout(doWork, 250); |
| } else if (opt_whenDone) { |
| opt_whenDone(); |
| } |
| } |
| |
| doWork(); |
| } |
| |
| /** |
| * Contains functions for creating and registering new language handlers. |
| * @type {Object} |
| */ |
| var PR = win['PR'] = { |
| 'createSimpleLexer': createSimpleLexer, |
| 'registerLangHandler': registerLangHandler, |
| 'sourceDecorator': sourceDecorator, |
| 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, |
| 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, |
| 'PR_COMMENT': PR_COMMENT, |
| 'PR_DECLARATION': PR_DECLARATION, |
| 'PR_KEYWORD': PR_KEYWORD, |
| 'PR_LITERAL': PR_LITERAL, |
| 'PR_NOCODE': PR_NOCODE, |
| 'PR_PLAIN': PR_PLAIN, |
| 'PR_PUNCTUATION': PR_PUNCTUATION, |
| 'PR_SOURCE': PR_SOURCE, |
| 'PR_STRING': PR_STRING, |
| 'PR_TAG': PR_TAG, |
| 'PR_TYPE': PR_TYPE, |
| 'prettyPrintOne': win['prettyPrintOne'] = prettyPrintOne, |
| 'prettyPrint': win['prettyPrint'] = prettyPrint |
| }; |
| |
| |
| /* Register Lua syntaxes */ |
| PR['registerLangHandler']( |
| PR['createSimpleLexer']( |
| [ |
| // Whitespace |
| [PR['PR_PLAIN'], /^[\t\n\r \xA0]+/, null, '\t\n\r \xA0'], |
| // A double or single quoted, possibly multi-line, string. |
| [PR['PR_STRING'], /^(?:\"(?:[^\"\\]|\\[\s\S])*(?:\"|$)|\'(?:[^\'\\]|\\[\s\S])*(?:\'|$))/, null, '"\''] |
| ], |
| [ |
| // A comment is either a line comment that starts with two dashes, or |
| // two dashes preceding a long bracketed block. |
| [PR['PR_COMMENT'], /^--(?:\[(=*)\[[\s\S]*?(?:\]\1\]|$)|[^\r\n]*)/], |
| [PR['PR_TYPE'], /^nil|false|true/], |
| // A long bracketed block not preceded by -- is a string. |
| [PR['PR_STRING'], /^\[(=*)\[[\s\S]*?(?:\]\1\]|$)/], |
| [PR['PR_KEYWORD'], /^(?:and|break|do|else|elseif|end|for|function|if|in|local|not|or|repeat|require|return|then|until|while)\b/, null], |
| // A number is a hex integer literal, a decimal real literal, or in |
| // scientific notation. |
| [PR['PR_LITERAL'], |
| /^[+-]?(?:0x[\da-f]+|(?:(?:\.\d+|\d+(?:\.\d*)?)(?:e[+\-]?\d+)?))/i], |
| // An identifier |
| [PR['PR_PLAIN'], /^[a-z_]\w*/i], |
| // A run of punctuation |
| [PR['PR_PUNCTUATION'], /^[^\w\t\n\r \xA0][^\w\t\n\r \xA0\"\'\-\+=]*/] |
| ]), |
| ['lua']); |
| |
| |
| // Make PR available via the Asynchronous Module Definition (AMD) API. |
| // Per https://github.com/amdjs/amdjs-api/wiki/AMD: |
| // The Asynchronous Module Definition (AMD) API specifies a |
| // mechanism for defining modules such that the module and its |
| // dependencies can be asynchronously loaded. |
| // ... |
| // To allow a clear indicator that a global define function (as |
| // needed for script src browser loading) conforms to the AMD API, |
| // any global define function SHOULD have a property called "amd" |
| // whose value is an object. This helps avoid conflict with any |
| // other existing JavaScript code that could have defined a define() |
| // function that does not conform to the AMD API. |
| if (typeof define === "function" && define['amd']) { |
| define("google-code-prettify", [], function () { |
| return PR; |
| }); |
| } |
| })(); |