blob: f80da2fefa10183bc95a2281527bcf8ca2f867e0 [file] [log] [blame]
# -*- coding: utf-8 -*- #
module Rouge
module Lexers
class HTML < RegexLexer
title "HTML"
desc "HTML, the markup language of the web"
tag 'html'
filenames '*.htm', '*.html', '*.xhtml'
mimetypes 'text/html', 'application/xhtml+xml'
def self.analyze_text(text)
return 1 if text.doctype?(/\bhtml\b/i)
return 1 if text =~ /<\s*html\b/
end
state :root do
rule /[^<&]+/m, Text
rule /&\S*?;/, Name::Entity
rule /<!DOCTYPE .*?>/im, Comment::Preproc
rule /<!\[CDATA\[.*?\]\]>/m, Comment::Preproc
rule /<!--/, Comment, :comment
rule /<\?.*?\?>/m, Comment::Preproc # php? really?
rule /<\s*script\s*/m do
token Name::Tag
push :script_content
push :tag
end
rule /<\s*style\s*/m do
token Name::Tag
push :style_content
push :tag
end
rule %r(<\s*[a-zA-Z0-9:-]+), Name::Tag, :tag # opening tags
rule %r(<\s*/\s*[a-zA-Z0-9:-]+\s*>), Name::Tag # closing tags
end
state :comment do
rule /[^-]+/, Comment
rule /-->/, Comment, :pop!
rule /-/, Comment
end
state :tag do
rule /\s+/m, Text
rule /[a-zA-Z0-9_:-]+\s*=/m, Name::Attribute, :attr
rule /[a-zA-Z0-9_:-]+/, Name::Attribute
rule %r(/?\s*>)m, Name::Tag, :pop!
end
state :attr do
# TODO: are backslash escapes valid here?
rule /"/ do
token Str
goto :dq
end
rule /'/ do
token Str
goto :sq
end
rule /[^\s>]+/, Str, :pop!
end
state :dq do
rule /"/, Str, :pop!
rule /[^"]+/, Str
end
state :sq do
rule /'/, Str, :pop!
rule /[^']+/, Str
end
state :script_content do
rule %r(<\s*/\s*script\s*>)m, Name::Tag, :pop!
rule %r(.*?(?=<\s*/\s*script\s*>))m do
delegate Javascript
end
end
state :style_content do
rule %r(<\s*/\s*style\s*>)m, Name::Tag, :pop!
rule %r(.*(?=<\s*/\s*style\s*>))m do
delegate CSS
end
end
end
end
end