# -*- coding: utf-8 -*- # # frozen_string_literal: true module Rouge module Lexers class HTML < RegexLexer title "HTML" desc "HTML, the markup language of the web" tag 'html' filenames '*.htm', '*.html', '*.xhtml' mimetypes 'text/html', 'application/xhtml+xml' def self.detect?(text) return true if text.doctype?(/\bhtml\b/i) return false if text =~ /\A<\?xml\b/ return true if text =~ /<\s*html\b/ end start do @javascript = Javascript.new(options) @css = CSS.new(options) end state :root do rule %r/[^<&]+/m, Text rule %r/&\S*?;/, Name::Entity rule %r//im, Comment::Preproc rule %r//m, Comment::Preproc rule %r//, Comment, :pop! rule %r/-/, Comment end state :tag do rule %r/\s+/m, Text rule %r/[a-zA-Z0-9_:\[\]()*.-]+\s*=\s*/m, Name::Attribute, :attr rule %r/[a-zA-Z0-9_:#*-]+/, Name::Attribute rule %r(/?\s*>)m, Name::Tag, :pop! end state :attr do # TODO: are backslash escapes valid here? rule %r/"/ do token Str goto :dq end rule %r/'/ do token Str goto :sq end rule %r/[^\s>]+/, Str, :pop! end state :dq do rule %r/"/, Str, :pop! rule %r/[^"]+/, Str end state :sq do rule %r/'/, Str, :pop! rule %r/[^']+/, Str end state :script_content do rule %r([^<]+) do delegate @javascript end rule %r(<\s*/\s*script\s*>)m, Name::Tag, :pop! rule %r(<) do delegate @javascript end end state :style_content do rule %r/[^<]+/ do delegate @lang end rule %r(<\s*/\s*style\s*>)m, Name::Tag, :pop! rule %r/