| # -*- coding: utf-8 -*- # |
| |
| module Rouge |
| module Lexers |
| class YAML < RegexLexer |
| title "YAML" |
| desc "Yaml Ain't Markup Language (yaml.org)" |
| mimetypes 'text/x-yaml' |
| tag 'yaml' |
| aliases 'yml' |
| |
| def self.analyze_text(text) |
| # look for the %YAML directive |
| return 1 if text =~ /\A\s*%YAML/m |
| end |
| |
| filenames '*.yaml', '*.yml' |
| # NB: Tabs are forbidden in YAML, which is why you see things |
| # like /[ ]+/. |
| |
| # reset the indentation levels |
| def reset_indent |
| puts " yaml: reset_indent" if @debug |
| @indent_stack = [0] |
| @next_indent = 0 |
| @block_scalar_indent = nil |
| end |
| |
| def indent |
| raise 'empty indent stack!' if @indent_stack.empty? |
| @indent_stack.last |
| end |
| |
| def dedent?(level) |
| level < self.indent |
| end |
| |
| def indent?(level) |
| level > self.indent |
| end |
| |
| # Save a possible indentation level |
| def save_indent(match) |
| @next_indent = match.size |
| puts " yaml: indent: #{self.indent}/#@next_indent" if @debug |
| puts " yaml: popping indent stack - before: #@indent_stack" if @debug |
| if dedent?(@next_indent) |
| @indent_stack.pop while dedent?(@next_indent) |
| puts " yaml: popping indent stack - after: #@indent_stack" if @debug |
| puts " yaml: indent: #{self.indent}/#@next_indent" if @debug |
| |
| # dedenting to a state not previously indented to is an error |
| [match[0...self.indent], match[self.indent..-1]] |
| else |
| [match, ''] |
| end |
| end |
| |
| def continue_indent(match) |
| puts " yaml: continue_indent" if @debug |
| @next_indent += match.size |
| end |
| |
| def set_indent(match, opts={}) |
| if indent < @next_indent |
| @indent_stack << @next_indent |
| end |
| |
| @next_indent += match.size unless opts[:implicit] |
| end |
| |
| plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/ |
| |
| start { reset_indent } |
| |
| state :basic do |
| rule /#.*$/, Comment::Single |
| end |
| |
| state :root do |
| mixin :basic |
| |
| rule /\n+/, Text |
| |
| # trailing or pre-comment whitespace |
| rule /[ ]+(?=#|$)/, Text |
| |
| rule /^%YAML\b/ do |
| token Name::Tag |
| reset_indent |
| push :yaml_directive |
| end |
| |
| rule /^%TAG\b/ do |
| token Name::Tag |
| reset_indent |
| push :tag_directive |
| end |
| |
| # doc-start and doc-end indicators |
| rule /^(?:---|\.\.\.)(?= |$)/ do |
| token Name::Namespace |
| reset_indent |
| push :block_line |
| end |
| |
| # indentation spaces |
| rule /[ ]*(?!\s|$)/ do |m| |
| text, err = save_indent(m[0]) |
| token Text, text |
| token Error, err |
| push :block_line; push :indentation |
| end |
| end |
| |
| state :indentation do |
| rule(/\s*?\n/) { token Text; pop! 2 } |
| # whitespace preceding block collection indicators |
| rule /[ ]+(?=[-:?](?:[ ]|$))/ do |m| |
| token Text |
| continue_indent(m[0]) |
| end |
| |
| # block collection indicators |
| rule(/[?:-](?=[ ]|$)/) { |m| token Punctuation::Indicator; set_indent m[0] } |
| |
| # the beginning of a block line |
| rule(/[ ]*/) { |m| token Text; continue_indent(m[0]); pop! } |
| end |
| |
| # indented line in the block context |
| state :block_line do |
| # line end |
| rule /[ ]*(?=#|$)/, Text, :pop! |
| rule /[ ]+/, Text |
| # tags, anchors, and aliases |
| mixin :descriptors |
| # block collections and scalars |
| mixin :block_nodes |
| # flow collections and quoed scalars |
| mixin :flow_nodes |
| |
| # a plain scalar |
| rule /(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do |
| token Name::Variable |
| push :plain_scalar_in_block_context |
| end |
| end |
| |
| state :descriptors do |
| # a full-form tag |
| rule /!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, Keyword::Type |
| |
| # a tag in the form '!', '!suffix' or '!handle!suffix' |
| rule %r( |
| (?:![\w-]+)? # handle |
| !(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix |
| )x, Keyword::Type |
| |
| # an anchor |
| rule /&[\w-]+/, Name::Label |
| |
| # an alias |
| rule /\*[\w-]+/, Name::Variable |
| end |
| |
| state :block_nodes do |
| # implicit key |
| rule /:(?=\s|$)/ do |m| |
| token Punctuation::Indicator |
| set_indent m[0], :implicit => true |
| end |
| |
| # literal and folded scalars |
| rule /[\|>]/ do |
| token Punctuation::Indicator |
| push :block_scalar_content |
| push :block_scalar_header |
| end |
| end |
| |
| state :flow_nodes do |
| rule /\[/, Punctuation::Indicator, :flow_sequence |
| rule /\{/, Punctuation::Indicator, :flow_mapping |
| rule /'/, Str::Single, :single_quoted_scalar |
| rule /"/, Str::Double, :double_quoted_scalar |
| end |
| |
| state :flow_collection do |
| rule /\s+/m, Text |
| mixin :basic |
| rule /[?:,]/, Punctuation::Indicator |
| mixin :descriptors |
| mixin :flow_nodes |
| |
| rule /(?=#{plain_scalar_start})/ do |
| push :plain_scalar_in_flow_context |
| end |
| end |
| |
| state :flow_sequence do |
| rule /\]/, Punctuation::Indicator, :pop! |
| mixin :flow_collection |
| end |
| |
| state :flow_mapping do |
| rule /\}/, Punctuation::Indicator, :pop! |
| mixin :flow_collection |
| end |
| |
| state :block_scalar_content do |
| rule /\n+/, Text |
| |
| # empty lines never dedent, but they might be part of the scalar. |
| rule /^[ ]+$/ do |m| |
| text = m[0] |
| indent_size = text.size |
| |
| indent_mark = @block_scalar_indent || indent_size |
| |
| token Text, text[0...indent_mark] |
| token Name::Constant, text[indent_mark..-1] |
| end |
| |
| # TODO: ^ doesn't actually seem to affect the match at all. |
| # Find a way to work around this limitation. |
| rule /^[ ]*/ do |m| |
| token Text |
| |
| indent_size = m[0].size |
| |
| dedent_level = @block_scalar_indent || self.indent |
| @block_scalar_indent ||= indent_size |
| |
| if indent_size < dedent_level |
| pop! 2 |
| end |
| end |
| |
| rule /[^\n\r\f\v]+/, Name::Constant |
| end |
| |
| state :block_scalar_header do |
| # optional indentation indicator and chomping flag, in either order |
| rule %r( |
| ( |
| ([1-9])[+-]? | [+-]?([1-9])? |
| )(?=[ ]|$) |
| )x do |m| |
| @block_scalar_indent = nil |
| goto :ignored_line |
| next if m[0].empty? |
| |
| increment = m[1] || m[2] |
| if increment |
| @block_scalar_indent = indent + increment.to_i |
| end |
| |
| token Punctuation::Indicator |
| end |
| end |
| |
| state :ignored_line do |
| mixin :basic |
| rule /[ ]+/, Text |
| rule /\n/, Text, :pop! |
| end |
| |
| state :quoted_scalar_whitespaces do |
| # leading and trailing whitespace is ignored |
| rule /^[ ]+/, Text |
| rule /[ ]+$/, Text |
| |
| rule /\n+/m, Text |
| |
| rule /[ ]+/, Name::Variable |
| end |
| |
| state :single_quoted_scalar do |
| mixin :quoted_scalar_whitespaces |
| rule /\\'/, Str::Escape |
| rule /'/, Str, :pop! |
| rule /[^\s']+/, Str |
| end |
| |
| state :double_quoted_scalar do |
| rule /"/, Str, :pop! |
| mixin :quoted_scalar_whitespaces |
| # escapes |
| rule /\\[0abt\tn\nvfre "\\N_LP]/, Str::Escape |
| rule /\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, |
| Str::Escape |
| rule /[^ \t\n\r\f\v"\\]+/, Str |
| end |
| |
| state :plain_scalar_in_block_context_new_line do |
| rule /^[ ]+\n/, Text |
| rule /\n+/m, Text |
| rule /^(?=---|\.\.\.)/ do |
| pop! 3 |
| end |
| |
| # dedent detection |
| rule /^[ ]*/ do |m| |
| token Text |
| pop! |
| |
| indent_size = m[0].size |
| |
| # dedent = end of scalar |
| if indent_size <= self.indent |
| pop! |
| save_indent(m[0]) |
| push :indentation |
| end |
| end |
| end |
| |
| state :plain_scalar_in_block_context do |
| # the : indicator ends a scalar |
| rule /[ ]*(?=:[ \n]|:$)/, Text, :pop! |
| rule /[ ]*:/, Str |
| rule /[ ]+(?=#)/, Text, :pop! |
| rule /[ ]+$/, Text |
| # check for new documents or dedents at the new line |
| rule /\n+/ do |
| token Text |
| push :plain_scalar_in_block_context_new_line |
| end |
| |
| rule /[ ]+/, Str |
| # regular non-whitespace characters |
| rule /[^\s:]+/, Str |
| end |
| |
| state :plain_scalar_in_flow_context do |
| rule /[ ]*(?=[,:?\[\]{}])/, Text, :pop! |
| rule /[ ]+(?=#)/, Text, :pop! |
| rule /^[ ]+/, Text |
| rule /[ ]+$/, Text |
| rule /\n+/, Text |
| rule /[ ]+/, Name::Variable |
| rule /[^\s,:?\[\]{}]+/, Name::Variable |
| end |
| |
| state :yaml_directive do |
| rule /([ ]+)(\d+\.\d+)/ do |
| groups Text, Num |
| goto :ignored_line |
| end |
| end |
| |
| state :tag_directive do |
| rule %r( |
| ([ ]+)(!|![\w-]*!) # prefix |
| ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle |
| )x do |
| groups Text, Keyword::Type, Text, Keyword::Type |
| goto :ignored_line |
| end |
| end |
| end |
| end |
| end |