248 lines
7.4 KiB
Ruby
248 lines
7.4 KiB
Ruby
|
# -*- coding: utf-8 -*- #
|
||
|
# frozen_string_literal: true
|
||
|
|
||
|
module Rouge
|
||
|
module Lexers
|
||
|
class Python < RegexLexer
|
||
|
title "Python"
|
||
|
desc "The Python programming language (python.org)"
|
||
|
tag 'python'
|
||
|
aliases 'py'
|
||
|
filenames '*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac'
|
||
|
mimetypes 'text/x-python', 'application/x-python'
|
||
|
|
||
|
def self.detect?(text)
|
||
|
return true if text.shebang?(/pythonw?(?:[23](?:\.\d+)?)?/)
|
||
|
end
|
||
|
|
||
|
def self.keywords
|
||
|
@keywords ||= %w(
|
||
|
assert break continue del elif else except exec
|
||
|
finally for global if lambda pass print raise
|
||
|
return try while yield as with from import yield
|
||
|
async await nonlocal
|
||
|
)
|
||
|
end
|
||
|
|
||
|
def self.builtins
|
||
|
@builtins ||= %w(
|
||
|
__import__ abs all any apply ascii basestring bin bool buffer
|
||
|
bytearray bytes callable chr classmethod cmp coerce compile
|
||
|
complex delattr dict dir divmod enumerate eval execfile exit
|
||
|
file filter float format frozenset getattr globals hasattr hash hex id
|
||
|
input int intern isinstance issubclass iter len list locals
|
||
|
long map max memoryview min next object oct open ord pow property range
|
||
|
raw_input reduce reload repr reversed round set setattr slice
|
||
|
sorted staticmethod str sum super tuple type unichr unicode
|
||
|
vars xrange zip
|
||
|
)
|
||
|
end
|
||
|
|
||
|
def self.builtins_pseudo
|
||
|
@builtins_pseudo ||= %w(self None Ellipsis NotImplemented False True)
|
||
|
end
|
||
|
|
||
|
def self.exceptions
|
||
|
@exceptions ||= %w(
|
||
|
ArithmeticError AssertionError AttributeError
|
||
|
BaseException BlockingIOError BrokenPipeError BufferError
|
||
|
BytesWarning ChildProcessError ConnectionAbortedError
|
||
|
ConnectionError ConnectionRefusedError ConnectionResetError
|
||
|
DeprecationWarning EOFError EnvironmentError
|
||
|
Exception FileExistsError FileNotFoundError
|
||
|
FloatingPointError FutureWarning GeneratorExit IOError
|
||
|
ImportError ImportWarning IndentationError IndexError
|
||
|
InterruptedError IsADirectoryError KeyError KeyboardInterrupt
|
||
|
LookupError MemoryError ModuleNotFoundError NameError
|
||
|
NotADirectoryError NotImplemented NotImplementedError OSError
|
||
|
OverflowError OverflowWarning PendingDeprecationWarning
|
||
|
ProcessLookupError RecursionError ReferenceError ResourceWarning
|
||
|
RuntimeError RuntimeWarning StandardError StopAsyncIteration
|
||
|
StopIteration SyntaxError SyntaxWarning SystemError SystemExit
|
||
|
TabError TimeoutError TypeError UnboundLocalError UnicodeDecodeError
|
||
|
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
||
|
UnicodeWarning UserWarning ValueError VMSError Warning
|
||
|
WindowsError ZeroDivisionError
|
||
|
)
|
||
|
end
|
||
|
|
||
|
identifier = /[a-z_][a-z0-9_]*/i
|
||
|
dotted_identifier = /[a-z_.][a-z0-9_.]*/i
|
||
|
state :root do
|
||
|
rule %r/\n+/m, Text
|
||
|
rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do
|
||
|
groups Punctuation, Text, Str::Doc
|
||
|
end
|
||
|
|
||
|
rule %r/[^\S\n]+/, Text
|
||
|
rule %r(#(.*)?\n?), Comment::Single
|
||
|
rule %r/[\[\]{}:(),;]/, Punctuation
|
||
|
rule %r/\\\n/, Text
|
||
|
rule %r/\\/, Text
|
||
|
|
||
|
rule %r/(in|is|and|or|not)\b/, Operator::Word
|
||
|
rule %r/(<<|>>|\/\/|\*\*)=?/, Operator
|
||
|
rule %r/[-~+\/*%=<>&^|@]=?|!=/, Operator
|
||
|
rule %r/\.(?![0-9])/, Operator # so it doesn't match float literals
|
||
|
|
||
|
rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(import)/ do
|
||
|
groups Keyword::Namespace,
|
||
|
Text,
|
||
|
Name::Namespace,
|
||
|
Text,
|
||
|
Keyword::Namespace
|
||
|
end
|
||
|
|
||
|
rule %r/(import)(\s+)(#{dotted_identifier})/ do
|
||
|
groups Keyword::Namespace, Text, Name::Namespace
|
||
|
end
|
||
|
|
||
|
rule %r/(def)((?:\s|\\\s)+)/ do
|
||
|
groups Keyword, Text
|
||
|
push :funcname
|
||
|
end
|
||
|
|
||
|
rule %r/(class)((?:\s|\\\s)+)/ do
|
||
|
groups Keyword, Text
|
||
|
push :classname
|
||
|
end
|
||
|
|
||
|
# TODO: not in python 3
|
||
|
rule %r/`.*?`/, Str::Backtick
|
||
|
rule %r/(?:r|ur|ru)"""/i, Str, :raw_tdqs
|
||
|
rule %r/(?:r|ur|ru)'''/i, Str, :raw_tsqs
|
||
|
rule %r/(?:r|ur|ru)"/i, Str, :raw_dqs
|
||
|
rule %r/(?:r|ur|ru)'/i, Str, :raw_sqs
|
||
|
rule %r/u?"""/i, Str, :tdqs
|
||
|
rule %r/u?'''/i, Str, :tsqs
|
||
|
rule %r/u?"/i, Str, :dqs
|
||
|
rule %r/u?'/i, Str, :sqs
|
||
|
|
||
|
rule %r/@#{dotted_identifier}/i, Name::Decorator
|
||
|
|
||
|
# using negative lookbehind so we don't match property names
|
||
|
rule %r/(?<!\.)#{identifier}/ do |m|
|
||
|
if self.class.keywords.include? m[0]
|
||
|
token Keyword
|
||
|
elsif self.class.exceptions.include? m[0]
|
||
|
token Name::Builtin
|
||
|
elsif self.class.builtins.include? m[0]
|
||
|
token Name::Builtin
|
||
|
elsif self.class.builtins_pseudo.include? m[0]
|
||
|
token Name::Builtin::Pseudo
|
||
|
else
|
||
|
token Name
|
||
|
end
|
||
|
end
|
||
|
|
||
|
rule identifier, Name
|
||
|
|
||
|
digits = /[0-9](_?[0-9])*/
|
||
|
decimal = /((#{digits})?\.#{digits}|#{digits}\.)/
|
||
|
exponent = /e[+-]?#{digits}/i
|
||
|
rule %r/#{decimal}(#{exponent})?j?/i, Num::Float
|
||
|
rule %r/#{digits}#{exponent}j?/i, Num::Float
|
||
|
rule %r/#{digits}j/i, Num::Float
|
||
|
|
||
|
rule %r/0b(_?[0-1])+/i, Num::Bin
|
||
|
rule %r/0o(_?[0-7])+/i, Num::Oct
|
||
|
rule %r/0x(_?[a-f0-9])+/i, Num::Hex
|
||
|
rule %r/\d+L/, Num::Integer::Long
|
||
|
rule %r/([1-9](_?[0-9])*|0(_?0)*)/, Num::Integer
|
||
|
end
|
||
|
|
||
|
state :funcname do
|
||
|
rule identifier, Name::Function, :pop!
|
||
|
end
|
||
|
|
||
|
state :classname do
|
||
|
rule identifier, Name::Class, :pop!
|
||
|
end
|
||
|
|
||
|
state :raise do
|
||
|
rule %r/from\b/, Keyword
|
||
|
rule %r/raise\b/, Keyword
|
||
|
rule %r/yield\b/, Keyword
|
||
|
rule %r/\n/, Text, :pop!
|
||
|
rule %r/;/, Punctuation, :pop!
|
||
|
mixin :root
|
||
|
end
|
||
|
|
||
|
state :yield do
|
||
|
mixin :raise
|
||
|
end
|
||
|
|
||
|
state :strings do
|
||
|
rule %r/%(\([a-z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?/i, Str::Interpol
|
||
|
end
|
||
|
|
||
|
state :strings_double do
|
||
|
rule %r/[^\\"%\n]+/, Str
|
||
|
mixin :strings
|
||
|
end
|
||
|
|
||
|
state :strings_single do
|
||
|
rule %r/[^\\'%\n]+/, Str
|
||
|
mixin :strings
|
||
|
end
|
||
|
|
||
|
state :nl do
|
||
|
rule %r/\n/, Str
|
||
|
end
|
||
|
|
||
|
state :escape do
|
||
|
rule %r(\\
|
||
|
( [\\abfnrtv"']
|
||
|
| \n
|
||
|
| N{[a-zA-Z][a-zA-Z ]+[a-zA-Z]}
|
||
|
| u[a-fA-F0-9]{4}
|
||
|
| U[a-fA-F0-9]{8}
|
||
|
| x[a-fA-F0-9]{2}
|
||
|
| [0-7]{1,3}
|
||
|
)
|
||
|
)x, Str::Escape
|
||
|
end
|
||
|
|
||
|
state :raw_escape do
|
||
|
rule %r/\\./, Str
|
||
|
end
|
||
|
|
||
|
state :dqs do
|
||
|
rule %r/"/, Str, :pop!
|
||
|
mixin :escape
|
||
|
mixin :strings_double
|
||
|
end
|
||
|
|
||
|
state :sqs do
|
||
|
rule %r/'/, Str, :pop!
|
||
|
mixin :escape
|
||
|
mixin :strings_single
|
||
|
end
|
||
|
|
||
|
state :tdqs do
|
||
|
rule %r/"""/, Str, :pop!
|
||
|
rule %r/"/, Str
|
||
|
mixin :escape
|
||
|
mixin :strings_double
|
||
|
mixin :nl
|
||
|
end
|
||
|
|
||
|
state :tsqs do
|
||
|
rule %r/'''/, Str, :pop!
|
||
|
rule %r/'/, Str
|
||
|
mixin :escape
|
||
|
mixin :strings_single
|
||
|
mixin :nl
|
||
|
end
|
||
|
|
||
|
%w(tdqs tsqs dqs sqs).each do |qtype|
|
||
|
state :"raw_#{qtype}" do
|
||
|
mixin :raw_escape
|
||
|
mixin :"#{qtype}"
|
||
|
end
|
||
|
end
|
||
|
|
||
|
end
|
||
|
end
|
||
|
end
|