377 lines
9.8 KiB
Ruby
377 lines
9.8 KiB
Ruby
|
# -*- coding: utf-8 -*- #
|
||
|
# frozen_string_literal: true
|
||
|
|
||
|
module Rouge
|
||
|
module Lexers
|
||
|
class YAML < RegexLexer
|
||
|
title "YAML"
|
||
|
desc "Yaml Ain't Markup Language (yaml.org)"
|
||
|
mimetypes 'text/x-yaml'
|
||
|
tag 'yaml'
|
||
|
aliases 'yml'
|
||
|
filenames '*.yaml', '*.yml'
|
||
|
|
||
|
def self.detect?(text)
|
||
|
# look for the %YAML directive
|
||
|
return true if text =~ /\A\s*%YAML/m
|
||
|
end
|
||
|
|
||
|
SPECIAL_VALUES = Regexp.union(%w(true false null))
|
||
|
|
||
|
# NB: Tabs are forbidden in YAML, which is why you see things
|
||
|
# like /[ ]+/.
|
||
|
|
||
|
# reset the indentation levels
|
||
|
def reset_indent
|
||
|
puts " yaml: reset_indent" if @debug
|
||
|
@indent_stack = [0]
|
||
|
@next_indent = 0
|
||
|
@block_scalar_indent = nil
|
||
|
end
|
||
|
|
||
|
def indent
|
||
|
raise 'empty indent stack!' if @indent_stack.empty?
|
||
|
@indent_stack.last
|
||
|
end
|
||
|
|
||
|
def dedent?(level)
|
||
|
level < self.indent
|
||
|
end
|
||
|
|
||
|
def indent?(level)
|
||
|
level > self.indent
|
||
|
end
|
||
|
|
||
|
# Save a possible indentation level
|
||
|
def save_indent(match)
|
||
|
@next_indent = match.size
|
||
|
puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
|
||
|
puts " yaml: popping indent stack - before: #@indent_stack" if @debug
|
||
|
if dedent?(@next_indent)
|
||
|
@indent_stack.pop while dedent?(@next_indent)
|
||
|
puts " yaml: popping indent stack - after: #@indent_stack" if @debug
|
||
|
puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
|
||
|
|
||
|
# dedenting to a state not previously indented to is an error
|
||
|
[match[0...self.indent], match[self.indent..-1]]
|
||
|
else
|
||
|
[match, '']
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def continue_indent(match)
|
||
|
puts " yaml: continue_indent" if @debug
|
||
|
@next_indent += match.size
|
||
|
end
|
||
|
|
||
|
def set_indent(match, opts={})
|
||
|
if indent < @next_indent
|
||
|
puts " yaml: indenting #{indent}/#{@next_indent}" if @debug
|
||
|
@indent_stack << @next_indent
|
||
|
end
|
||
|
|
||
|
@next_indent += match.size unless opts[:implicit]
|
||
|
end
|
||
|
|
||
|
plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/
|
||
|
|
||
|
start { reset_indent }
|
||
|
|
||
|
state :basic do
|
||
|
rule %r/#.*$/, Comment::Single
|
||
|
end
|
||
|
|
||
|
state :root do
|
||
|
mixin :basic
|
||
|
|
||
|
rule %r/\n+/, Text
|
||
|
|
||
|
# trailing or pre-comment whitespace
|
||
|
rule %r/[ ]+(?=#|$)/, Text
|
||
|
|
||
|
rule %r/^%YAML\b/ do
|
||
|
token Name::Tag
|
||
|
reset_indent
|
||
|
push :yaml_directive
|
||
|
end
|
||
|
|
||
|
rule %r/^%TAG\b/ do
|
||
|
token Name::Tag
|
||
|
reset_indent
|
||
|
push :tag_directive
|
||
|
end
|
||
|
|
||
|
# doc-start and doc-end indicators
|
||
|
rule %r/^(?:---|\.\.\.)(?= |$)/ do
|
||
|
token Name::Namespace
|
||
|
reset_indent
|
||
|
push :block_line
|
||
|
end
|
||
|
|
||
|
# indentation spaces
|
||
|
rule %r/[ ]*(?!\s|$)/ do |m|
|
||
|
text, err = save_indent(m[0])
|
||
|
token Text, text
|
||
|
token Error, err
|
||
|
push :block_line; push :indentation
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :indentation do
|
||
|
rule(/\s*?\n/) { token Text; pop! 2 }
|
||
|
# whitespace preceding block collection indicators
|
||
|
rule %r/[ ]+(?=[-:?](?:[ ]|$))/ do |m|
|
||
|
token Text
|
||
|
continue_indent(m[0])
|
||
|
end
|
||
|
|
||
|
# block collection indicators
|
||
|
rule(/[?:-](?=[ ]|$)/) do |m|
|
||
|
set_indent m[0]
|
||
|
token Punctuation::Indicator
|
||
|
end
|
||
|
|
||
|
# the beginning of a block line
|
||
|
rule(/[ ]*/) { |m| token Text; continue_indent(m[0]); pop! }
|
||
|
end
|
||
|
|
||
|
# indented line in the block context
|
||
|
state :block_line do
|
||
|
# line end
|
||
|
rule %r/[ ]*(?=#|$)/, Text, :pop!
|
||
|
rule %r/[ ]+/, Text
|
||
|
# tags, anchors, and aliases
|
||
|
mixin :descriptors
|
||
|
# block collections and scalars
|
||
|
mixin :block_nodes
|
||
|
# flow collections and quoed scalars
|
||
|
mixin :flow_nodes
|
||
|
|
||
|
# a plain scalar
|
||
|
rule %r/(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do
|
||
|
token Name::Variable
|
||
|
push :plain_scalar_in_block_context
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :descriptors do
|
||
|
# a full-form tag
|
||
|
rule %r/!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, Keyword::Type
|
||
|
|
||
|
# a tag in the form '!', '!suffix' or '!handle!suffix'
|
||
|
rule %r(
|
||
|
(?:![\w-]+)? # handle
|
||
|
!(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix
|
||
|
)x, Keyword::Type
|
||
|
|
||
|
# an anchor
|
||
|
rule %r/&[\w-]+/, Name::Label
|
||
|
|
||
|
# an alias
|
||
|
rule %r/\*[\w-]+/, Name::Variable
|
||
|
end
|
||
|
|
||
|
state :block_nodes do
|
||
|
# implicit key
|
||
|
rule %r/((?:\w[\w -]*)?)(:)(?=\s|$)/ do |m|
|
||
|
groups Name::Attribute, Punctuation::Indicator
|
||
|
set_indent m[0], :implicit => true
|
||
|
end
|
||
|
|
||
|
# literal and folded scalars
|
||
|
rule %r/[\|>][+-]?/ do
|
||
|
token Punctuation::Indicator
|
||
|
push :block_scalar_content
|
||
|
push :block_scalar_header
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :flow_nodes do
|
||
|
rule %r/\[/, Punctuation::Indicator, :flow_sequence
|
||
|
rule %r/\{/, Punctuation::Indicator, :flow_mapping
|
||
|
rule %r/'/, Str::Single, :single_quoted_scalar
|
||
|
rule %r/"/, Str::Double, :double_quoted_scalar
|
||
|
end
|
||
|
|
||
|
state :flow_collection do
|
||
|
rule %r/\s+/m, Text
|
||
|
mixin :basic
|
||
|
rule %r/[?:,]/, Punctuation::Indicator
|
||
|
mixin :descriptors
|
||
|
mixin :flow_nodes
|
||
|
|
||
|
rule %r/(?=#{plain_scalar_start})/ do
|
||
|
push :plain_scalar_in_flow_context
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :flow_sequence do
|
||
|
rule %r/\]/, Punctuation::Indicator, :pop!
|
||
|
mixin :flow_collection
|
||
|
end
|
||
|
|
||
|
state :flow_mapping do
|
||
|
rule %r/\}/, Punctuation::Indicator, :pop!
|
||
|
mixin :flow_collection
|
||
|
end
|
||
|
|
||
|
state :block_scalar_content do
|
||
|
rule %r/\n+/, Text
|
||
|
|
||
|
# empty lines never dedent, but they might be part of the scalar.
|
||
|
rule %r/^[ ]+$/ do |m|
|
||
|
text = m[0]
|
||
|
indent_size = text.size
|
||
|
|
||
|
indent_mark = @block_scalar_indent || indent_size
|
||
|
|
||
|
token Text, text[0...indent_mark]
|
||
|
token Name::Constant, text[indent_mark..-1]
|
||
|
end
|
||
|
|
||
|
# TODO: ^ doesn't actually seem to affect the match at all.
|
||
|
# Find a way to work around this limitation.
|
||
|
rule %r/^[ ]*/ do |m|
|
||
|
token Text
|
||
|
|
||
|
indent_size = m[0].size
|
||
|
|
||
|
dedent_level = @block_scalar_indent || self.indent
|
||
|
@block_scalar_indent ||= indent_size
|
||
|
|
||
|
if indent_size < dedent_level
|
||
|
save_indent m[0]
|
||
|
pop!
|
||
|
push :indentation
|
||
|
end
|
||
|
end
|
||
|
|
||
|
rule %r/[^\n\r\f\v]+/, Str
|
||
|
end
|
||
|
|
||
|
state :block_scalar_header do
|
||
|
# optional indentation indicator and chomping flag, in either order
|
||
|
rule %r(
|
||
|
(
|
||
|
([1-9])[+-]? | [+-]?([1-9])?
|
||
|
)(?=[ ]|$)
|
||
|
)x do |m|
|
||
|
@block_scalar_indent = nil
|
||
|
goto :ignored_line
|
||
|
next if m[0].empty?
|
||
|
|
||
|
increment = m[1] || m[2]
|
||
|
if increment
|
||
|
@block_scalar_indent = indent + increment.to_i
|
||
|
end
|
||
|
|
||
|
token Punctuation::Indicator
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :ignored_line do
|
||
|
mixin :basic
|
||
|
rule %r/[ ]+/, Text
|
||
|
rule %r/\n/, Text, :pop!
|
||
|
end
|
||
|
|
||
|
state :quoted_scalar_whitespaces do
|
||
|
# leading and trailing whitespace is ignored
|
||
|
rule %r/^[ ]+/, Text
|
||
|
rule %r/[ ]+$/, Text
|
||
|
|
||
|
rule %r/\n+/m, Text
|
||
|
|
||
|
rule %r/[ ]+/, Name::Variable
|
||
|
end
|
||
|
|
||
|
state :single_quoted_scalar do
|
||
|
mixin :quoted_scalar_whitespaces
|
||
|
rule %r/\\'/, Str::Escape
|
||
|
rule %r/'/, Str, :pop!
|
||
|
rule %r/[^\s']+/, Str
|
||
|
end
|
||
|
|
||
|
state :double_quoted_scalar do
|
||
|
rule %r/"/, Str, :pop!
|
||
|
mixin :quoted_scalar_whitespaces
|
||
|
# escapes
|
||
|
rule %r/\\[0abt\tn\nvfre "\\N_LP]/, Str::Escape
|
||
|
rule %r/\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
|
||
|
Str::Escape
|
||
|
rule %r/[^ \t\n\r\f\v"\\]+/, Str
|
||
|
end
|
||
|
|
||
|
state :plain_scalar_in_block_context_new_line do
|
||
|
rule %r/^[ ]+\n/, Text
|
||
|
rule %r/\n+/m, Text
|
||
|
rule %r/^(?=---|\.\.\.)/ do
|
||
|
pop! 3
|
||
|
end
|
||
|
|
||
|
# dedent detection
|
||
|
rule %r/^[ ]*/ do |m|
|
||
|
token Text
|
||
|
pop!
|
||
|
|
||
|
indent_size = m[0].size
|
||
|
|
||
|
# dedent = end of scalar
|
||
|
if indent_size <= self.indent
|
||
|
pop!
|
||
|
save_indent(m[0])
|
||
|
push :indentation
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :plain_scalar_in_block_context do
|
||
|
# the : indicator ends a scalar
|
||
|
rule %r/[ ]*(?=:[ \n]|:$)/, Text, :pop!
|
||
|
rule %r/[ ]*:\S+/, Str
|
||
|
rule %r/[ ]+(?=#)/, Text, :pop!
|
||
|
rule %r/[ ]+$/, Text
|
||
|
# check for new documents or dedents at the new line
|
||
|
rule %r/\n+/ do
|
||
|
token Text
|
||
|
push :plain_scalar_in_block_context_new_line
|
||
|
end
|
||
|
|
||
|
rule %r/[ ]+/, Str
|
||
|
rule SPECIAL_VALUES, Name::Constant
|
||
|
rule %r/\d+(?:\.\d+)?(?=(\r?\n)| +#)/, Literal::Number, :pop!
|
||
|
|
||
|
# regular non-whitespace characters
|
||
|
rule %r/[^\s:]+/, Str
|
||
|
end
|
||
|
|
||
|
state :plain_scalar_in_flow_context do
|
||
|
rule %r/[ ]*(?=[,:?\[\]{}])/, Text, :pop!
|
||
|
rule %r/[ ]+(?=#)/, Text, :pop!
|
||
|
rule %r/^[ ]+/, Text
|
||
|
rule %r/[ ]+$/, Text
|
||
|
rule %r/\n+/, Text
|
||
|
rule %r/[ ]+/, Name::Variable
|
||
|
rule %r/[^\s,:?\[\]{}]+/, Name::Variable
|
||
|
end
|
||
|
|
||
|
state :yaml_directive do
|
||
|
rule %r/([ ]+)(\d+\.\d+)/ do
|
||
|
groups Text, Num
|
||
|
goto :ignored_line
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :tag_directive do
|
||
|
rule %r(
|
||
|
([ ]+)(!|![\w-]*!) # prefix
|
||
|
([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
|
||
|
)x do
|
||
|
groups Text, Keyword::Type, Text, Keyword::Type
|
||
|
goto :ignored_line
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|