142 lines
3.1 KiB
Ruby
142 lines
3.1 KiB
Ruby
|
# -*- coding: utf-8 -*- #
|
||
|
# frozen_string_literal: true
|
||
|
|
||
|
module Rouge
|
||
|
module Lexers
|
||
|
class HTML < RegexLexer
|
||
|
title "HTML"
|
||
|
desc "HTML, the markup language of the web"
|
||
|
tag 'html'
|
||
|
filenames '*.htm', '*.html', '*.xhtml'
|
||
|
mimetypes 'text/html', 'application/xhtml+xml'
|
||
|
|
||
|
def self.detect?(text)
|
||
|
return true if text.doctype?(/\bhtml\b/i)
|
||
|
return false if text =~ /\A<\?xml\b/
|
||
|
return true if text =~ /<\s*html\b/
|
||
|
end
|
||
|
|
||
|
start do
|
||
|
@javascript = Javascript.new(options)
|
||
|
@css = CSS.new(options)
|
||
|
end
|
||
|
|
||
|
state :root do
|
||
|
rule %r/[^<&]+/m, Text
|
||
|
rule %r/&\S*?;/, Name::Entity
|
||
|
rule %r/<!DOCTYPE .*?>/im, Comment::Preproc
|
||
|
rule %r/<!\[CDATA\[.*?\]\]>/m, Comment::Preproc
|
||
|
rule %r/<!--/, Comment, :comment
|
||
|
rule %r/<\?.*?\?>/m, Comment::Preproc # php? really?
|
||
|
|
||
|
rule %r/<\s*script\s*/m do
|
||
|
token Name::Tag
|
||
|
@javascript.reset!
|
||
|
push :script_content
|
||
|
push :tag
|
||
|
end
|
||
|
|
||
|
rule %r/<\s*style\s*/m do
|
||
|
token Name::Tag
|
||
|
@css.reset!
|
||
|
@lang = @css
|
||
|
push :style_content
|
||
|
push :tag
|
||
|
end
|
||
|
|
||
|
rule %r(</), Name::Tag, :tag_end
|
||
|
rule %r/</, Name::Tag, :tag_start
|
||
|
|
||
|
rule %r(<\s*[a-zA-Z0-9:-]+), Name::Tag, :tag # opening tags
|
||
|
rule %r(<\s*/\s*[a-zA-Z0-9:-]+\s*>), Name::Tag # closing tags
|
||
|
end
|
||
|
|
||
|
state :tag_end do
|
||
|
mixin :tag_end_end
|
||
|
rule %r/[a-zA-Z0-9:-]+/ do
|
||
|
token Name::Tag
|
||
|
goto :tag_end_end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :tag_end_end do
|
||
|
rule %r/\s+/, Text
|
||
|
rule %r/>/, Name::Tag, :pop!
|
||
|
end
|
||
|
|
||
|
state :tag_start do
|
||
|
rule %r/\s+/, Text
|
||
|
|
||
|
rule %r/[a-zA-Z0-9:-]+/ do
|
||
|
token Name::Tag
|
||
|
goto :tag
|
||
|
end
|
||
|
|
||
|
rule(//) { goto :tag }
|
||
|
end
|
||
|
|
||
|
state :comment do
|
||
|
rule %r/[^-]+/, Comment
|
||
|
rule %r/-->/, Comment, :pop!
|
||
|
rule %r/-/, Comment
|
||
|
end
|
||
|
|
||
|
state :tag do
|
||
|
rule %r/\s+/m, Text
|
||
|
rule %r/[a-zA-Z0-9_:\[\]()*.-]+\s*=\s*/m, Name::Attribute, :attr
|
||
|
rule %r/[a-zA-Z0-9_:#*-]+/, Name::Attribute
|
||
|
rule %r(/?\s*>)m, Name::Tag, :pop!
|
||
|
end
|
||
|
|
||
|
state :attr do
|
||
|
# TODO: are backslash escapes valid here?
|
||
|
rule %r/"/ do
|
||
|
token Str
|
||
|
goto :dq
|
||
|
end
|
||
|
|
||
|
rule %r/'/ do
|
||
|
token Str
|
||
|
goto :sq
|
||
|
end
|
||
|
|
||
|
rule %r/[^\s>]+/, Str, :pop!
|
||
|
end
|
||
|
|
||
|
state :dq do
|
||
|
rule %r/"/, Str, :pop!
|
||
|
rule %r/[^"]+/, Str
|
||
|
end
|
||
|
|
||
|
state :sq do
|
||
|
rule %r/'/, Str, :pop!
|
||
|
rule %r/[^']+/, Str
|
||
|
end
|
||
|
|
||
|
state :script_content do
|
||
|
rule %r([^<]+) do
|
||
|
delegate @javascript
|
||
|
end
|
||
|
|
||
|
rule %r(<\s*/\s*script\s*>)m, Name::Tag, :pop!
|
||
|
|
||
|
rule %r(<) do
|
||
|
delegate @javascript
|
||
|
end
|
||
|
end
|
||
|
|
||
|
state :style_content do
|
||
|
rule %r/[^<]+/ do
|
||
|
delegate @lang
|
||
|
end
|
||
|
|
||
|
rule %r(<\s*/\s*style\s*>)m, Name::Tag, :pop!
|
||
|
|
||
|
rule %r/</ do
|
||
|
delegate @lang
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|