# -*- coding: utf-8 -*- # # frozen_string_literal: true module Rouge module Lexers class YAML < RegexLexer title "YAML" desc "Yaml Ain't Markup Language (yaml.org)" mimetypes 'text/x-yaml' tag 'yaml' aliases 'yml' filenames '*.yaml', '*.yml' def self.detect?(text) # look for the %YAML directive return true if text =~ /\A\s*%YAML/m end SPECIAL_VALUES = Regexp.union(%w(true false null)) # NB: Tabs are forbidden in YAML, which is why you see things # like /[ ]+/. # reset the indentation levels def reset_indent puts " yaml: reset_indent" if @debug @indent_stack = [0] @next_indent = 0 @block_scalar_indent = nil end def indent raise 'empty indent stack!' if @indent_stack.empty? @indent_stack.last end def dedent?(level) level < self.indent end def indent?(level) level > self.indent end # Save a possible indentation level def save_indent(match) @next_indent = match.size puts " yaml: indent: #{self.indent}/#@next_indent" if @debug puts " yaml: popping indent stack - before: #@indent_stack" if @debug if dedent?(@next_indent) @indent_stack.pop while dedent?(@next_indent) puts " yaml: popping indent stack - after: #@indent_stack" if @debug puts " yaml: indent: #{self.indent}/#@next_indent" if @debug # dedenting to a state not previously indented to is an error [match[0...self.indent], match[self.indent..-1]] else [match, ''] end end def continue_indent(match) puts " yaml: continue_indent" if @debug @next_indent += match.size end def set_indent(match, opts={}) if indent < @next_indent puts " yaml: indenting #{indent}/#{@next_indent}" if @debug @indent_stack << @next_indent end @next_indent += match.size unless opts[:implicit] end plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/ start { reset_indent } state :basic do rule %r/#.*$/, Comment::Single end state :root do mixin :basic rule %r/\n+/, Text # trailing or pre-comment whitespace rule %r/[ ]+(?=#|$)/, Text rule %r/^%YAML\b/ do token Name::Tag reset_indent push :yaml_directive end rule %r/^%TAG\b/ do token Name::Tag reset_indent push :tag_directive end # doc-start and doc-end indicators rule %r/^(?:---|\.\.\.)(?= |$)/ do token Name::Namespace reset_indent push :block_line end # indentation spaces rule %r/[ ]*(?!\s|$)/ do |m| text, err = save_indent(m[0]) token Text, text token Error, err push :block_line; push :indentation end end state :indentation do rule(/\s*?\n/) { token Text; pop! 2 } # whitespace preceding block collection indicators rule %r/[ ]+(?=[-:?](?:[ ]|$))/ do |m| token Text continue_indent(m[0]) end # block collection indicators rule(/[?:-](?=[ ]|$)/) do |m| set_indent m[0] token Punctuation::Indicator end # the beginning of a block line rule(/[ ]*/) { |m| token Text; continue_indent(m[0]); pop! } end # indented line in the block context state :block_line do # line end rule %r/[ ]*(?=#|$)/, Text, :pop! rule %r/[ ]+/, Text # tags, anchors, and aliases mixin :descriptors # block collections and scalars mixin :block_nodes # flow collections and quoed scalars mixin :flow_nodes # a plain scalar rule %r/(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do token Name::Variable push :plain_scalar_in_block_context end end state :descriptors do # a full-form tag rule %r/!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, Keyword::Type # a tag in the form '!', '!suffix' or '!handle!suffix' rule %r( (?:![\w-]+)? # handle !(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix )x, Keyword::Type # an anchor rule %r/&[\w-]+/, Name::Label # an alias rule %r/\*[\w-]+/, Name::Variable end state :block_nodes do # implicit key rule %r/((?:\w[\w -]*)?)(:)(?=\s|$)/ do |m| groups Name::Attribute, Punctuation::Indicator set_indent m[0], :implicit => true end # literal and folded scalars rule %r/[\|>][+-]?/ do token Punctuation::Indicator push :block_scalar_content push :block_scalar_header end end state :flow_nodes do rule %r/\[/, Punctuation::Indicator, :flow_sequence rule %r/\{/, Punctuation::Indicator, :flow_mapping rule %r/'/, Str::Single, :single_quoted_scalar rule %r/"/, Str::Double, :double_quoted_scalar end state :flow_collection do rule %r/\s+/m, Text mixin :basic rule %r/[?:,]/, Punctuation::Indicator mixin :descriptors mixin :flow_nodes rule %r/(?=#{plain_scalar_start})/ do push :plain_scalar_in_flow_context end end state :flow_sequence do rule %r/\]/, Punctuation::Indicator, :pop! mixin :flow_collection end state :flow_mapping do rule %r/\}/, Punctuation::Indicator, :pop! mixin :flow_collection end state :block_scalar_content do rule %r/\n+/, Text # empty lines never dedent, but they might be part of the scalar. rule %r/^[ ]+$/ do |m| text = m[0] indent_size = text.size indent_mark = @block_scalar_indent || indent_size token Text, text[0...indent_mark] token Name::Constant, text[indent_mark..-1] end # TODO: ^ doesn't actually seem to affect the match at all. # Find a way to work around this limitation. rule %r/^[ ]*/ do |m| token Text indent_size = m[0].size dedent_level = @block_scalar_indent || self.indent @block_scalar_indent ||= indent_size if indent_size < dedent_level save_indent m[0] pop! push :indentation end end rule %r/[^\n\r\f\v]+/, Str end state :block_scalar_header do # optional indentation indicator and chomping flag, in either order rule %r( ( ([1-9])[+-]? | [+-]?([1-9])? )(?=[ ]|$) )x do |m| @block_scalar_indent = nil goto :ignored_line next if m[0].empty? increment = m[1] || m[2] if increment @block_scalar_indent = indent + increment.to_i end token Punctuation::Indicator end end state :ignored_line do mixin :basic rule %r/[ ]+/, Text rule %r/\n/, Text, :pop! end state :quoted_scalar_whitespaces do # leading and trailing whitespace is ignored rule %r/^[ ]+/, Text rule %r/[ ]+$/, Text rule %r/\n+/m, Text rule %r/[ ]+/, Name::Variable end state :single_quoted_scalar do mixin :quoted_scalar_whitespaces rule %r/\\'/, Str::Escape rule %r/'/, Str, :pop! rule %r/[^\s']+/, Str end state :double_quoted_scalar do rule %r/"/, Str, :pop! mixin :quoted_scalar_whitespaces # escapes rule %r/\\[0abt\tn\nvfre "\\N_LP]/, Str::Escape rule %r/\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, Str::Escape rule %r/[^ \t\n\r\f\v"\\]+/, Str end state :plain_scalar_in_block_context_new_line do rule %r/^[ ]+\n/, Text rule %r/\n+/m, Text rule %r/^(?=---|\.\.\.)/ do pop! 3 end # dedent detection rule %r/^[ ]*/ do |m| token Text pop! indent_size = m[0].size # dedent = end of scalar if indent_size <= self.indent pop! save_indent(m[0]) push :indentation end end end state :plain_scalar_in_block_context do # the : indicator ends a scalar rule %r/[ ]*(?=:[ \n]|:$)/, Text, :pop! rule %r/[ ]*:\S+/, Str rule %r/[ ]+(?=#)/, Text, :pop! rule %r/[ ]+$/, Text # check for new documents or dedents at the new line rule %r/\n+/ do token Text push :plain_scalar_in_block_context_new_line end rule %r/[ ]+/, Str rule SPECIAL_VALUES, Name::Constant rule %r/\d+(?:\.\d+)?(?=(\r?\n)| +#)/, Literal::Number, :pop! # regular non-whitespace characters rule %r/[^\s:]+/, Str end state :plain_scalar_in_flow_context do rule %r/[ ]*(?=[,:?\[\]{}])/, Text, :pop! rule %r/[ ]+(?=#)/, Text, :pop! rule %r/^[ ]+/, Text rule %r/[ ]+$/, Text rule %r/\n+/, Text rule %r/[ ]+/, Name::Variable rule %r/[^\s,:?\[\]{}]+/, Name::Variable end state :yaml_directive do rule %r/([ ]+)(\d+\.\d+)/ do groups Text, Num goto :ignored_line end end state :tag_directive do rule %r( ([ ]+)(!|![\w-]*!) # prefix ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle )x do groups Text, Keyword::Type, Text, Keyword::Type goto :ignored_line end end end end end