191 lines
5.5 KiB
Ruby
191 lines
5.5 KiB
Ruby
# -*- coding: utf-8 -*- #
|
|
# frozen_string_literal: true
|
|
|
|
module Rouge
|
|
module Lexers
|
|
class C < RegexLexer
|
|
tag 'c'
|
|
filenames '*.c', '*.h', '*.idc'
|
|
mimetypes 'text/x-chdr', 'text/x-csrc'
|
|
|
|
title "C"
|
|
desc "The C programming language"
|
|
|
|
# optional comment or whitespace
|
|
ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
|
|
id = /[a-zA-Z_][a-zA-Z0-9_]*/
|
|
|
|
def self.keywords
|
|
@keywords ||= Set.new %w(
|
|
auto break case const continue default do else enum extern
|
|
for goto if register restricted return sizeof static struct
|
|
switch typedef union volatile virtual while
|
|
|
|
_Alignas _Alignof _Atomic _Generic _Imaginary
|
|
_Noreturn _Static_assert _Thread_local
|
|
)
|
|
end
|
|
|
|
def self.keywords_type
|
|
@keywords_type ||= Set.new %w(
|
|
int long float short double char unsigned signed void
|
|
|
|
jmp_buf FILE DIR div_t ldiv_t mbstate_t sig_atomic_t fpos_t
|
|
clock_t time_t va_list size_t ssize_t off_t wchar_t ptrdiff_t
|
|
wctrans_t wint_t wctype_t
|
|
|
|
_Bool _Complex int8_t int16_t int32_t int64_t
|
|
uint8_t uint16_t uint32_t uint64_t int_least8_t
|
|
int_least16_t int_least32_t int_least64_t
|
|
uint_least8_t uint_least16_t uint_least32_t
|
|
uint_least64_t int_fast8_t int_fast16_t int_fast32_t
|
|
int_fast64_t uint_fast8_t uint_fast16_t uint_fast32_t
|
|
uint_fast64_t intptr_t uintptr_t intmax_t
|
|
uintmax_t
|
|
|
|
char16_t char32_t
|
|
)
|
|
end
|
|
|
|
def self.reserved
|
|
@reserved ||= Set.new %w(
|
|
__asm __int8 __based __except __int16 __stdcall __cdecl
|
|
__fastcall __int32 __declspec __finally __int61 __try __leave
|
|
inline _inline __inline naked _naked __naked restrict _restrict
|
|
__restrict thread _thread __thread typename _typename __typename
|
|
)
|
|
end
|
|
|
|
def self.builtins
|
|
@builtins ||= []
|
|
end
|
|
|
|
start { push :bol }
|
|
|
|
state :expr_bol do
|
|
mixin :inline_whitespace
|
|
|
|
rule %r/#if\s0/, Comment, :if_0
|
|
rule %r/#/, Comment::Preproc, :macro
|
|
|
|
rule(//) { pop! }
|
|
end
|
|
|
|
# :expr_bol is the same as :bol but without labels, since
|
|
# labels can only appear at the beginning of a statement.
|
|
state :bol do
|
|
rule %r/#{id}:(?!:)/, Name::Label
|
|
mixin :expr_bol
|
|
end
|
|
|
|
state :inline_whitespace do
|
|
rule %r/[ \t\r]+/, Text
|
|
rule %r/\\\n/, Text # line continuation
|
|
rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline
|
|
end
|
|
|
|
state :whitespace do
|
|
rule %r/\n+/m, Text, :bol
|
|
rule %r(//(\\.|.)*?$), Comment::Single, :bol
|
|
mixin :inline_whitespace
|
|
end
|
|
|
|
state :expr_whitespace do
|
|
rule %r/\n+/m, Text, :expr_bol
|
|
mixin :whitespace
|
|
end
|
|
|
|
state :statements do
|
|
mixin :whitespace
|
|
rule %r/(u8|u|U|L)?"/, Str, :string
|
|
rule %r((u8|u|U|L)?'(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\'\n])')i, Str::Char
|
|
rule %r((\d+[.]\d*|[.]?\d+)e[+-]?\d+[lu]*)i, Num::Float
|
|
rule %r(\d+e[+-]?\d+[lu]*)i, Num::Float
|
|
rule %r/0x[0-9a-f]+[lu]*/i, Num::Hex
|
|
rule %r/0[0-7]+[lu]*/i, Num::Oct
|
|
rule %r/\d+[lu]*/i, Num::Integer
|
|
rule %r(\*/), Error
|
|
rule %r([~!%^&*+=\|?:<>/-]), Operator
|
|
rule %r/[()\[\],.;]/, Punctuation
|
|
rule %r/\bcase\b/, Keyword, :case
|
|
rule %r/(?:true|false|NULL)\b/, Name::Builtin
|
|
rule id do |m|
|
|
name = m[0]
|
|
|
|
if self.class.keywords.include? name
|
|
token Keyword
|
|
elsif self.class.keywords_type.include? name
|
|
token Keyword::Type
|
|
elsif self.class.reserved.include? name
|
|
token Keyword::Reserved
|
|
elsif self.class.builtins.include? name
|
|
token Name::Builtin
|
|
else
|
|
token Name
|
|
end
|
|
end
|
|
end
|
|
|
|
state :case do
|
|
rule %r/:/, Punctuation, :pop!
|
|
mixin :statements
|
|
end
|
|
|
|
state :root do
|
|
mixin :expr_whitespace
|
|
rule %r(
|
|
([\w*\s]+?[\s*]) # return arguments
|
|
(#{id}) # function name
|
|
(\s*\([^;]*?\)) # signature
|
|
(#{ws}?)({|;) # open brace or semicolon
|
|
)mx do |m|
|
|
# TODO: do this better.
|
|
recurse m[1]
|
|
token Name::Function, m[2]
|
|
recurse m[3]
|
|
recurse m[4]
|
|
token Punctuation, m[5]
|
|
if m[5] == ?{
|
|
push :function
|
|
end
|
|
end
|
|
rule %r/\{/, Punctuation, :function
|
|
mixin :statements
|
|
end
|
|
|
|
state :function do
|
|
mixin :whitespace
|
|
mixin :statements
|
|
rule %r/;/, Punctuation
|
|
rule %r/{/, Punctuation, :function
|
|
rule %r/}/, Punctuation, :pop!
|
|
end
|
|
|
|
state :string do
|
|
rule %r/"/, Str, :pop!
|
|
rule %r/\\([\\abfnrtv"']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})/, Str::Escape
|
|
rule %r/[^\\"\n]+/, Str
|
|
rule %r/\\\n/, Str
|
|
rule %r/\\/, Str # stray backslash
|
|
end
|
|
|
|
state :macro do
|
|
# NB: pop! goes back to :bol
|
|
rule %r/\n/, Comment::Preproc, :pop!
|
|
rule %r([^/\n\\]+), Comment::Preproc
|
|
rule %r/\\./m, Comment::Preproc
|
|
mixin :inline_whitespace
|
|
rule %r(/), Comment::Preproc
|
|
end
|
|
|
|
state :if_0 do
|
|
# NB: no \b here, to cover #ifdef and #ifndef
|
|
rule %r/^\s*#if/, Comment, :if_0
|
|
rule %r/^\s*#\s*el(?:se|if)/, Comment, :pop!
|
|
rule %r/^\s*#\s*endif\b.*?(?<!\\)\n/m, Comment, :pop!
|
|
rule %r/.*?\n/, Comment
|
|
end
|
|
end
|
|
end
|
|
end
|