140 lines
6.4 KiB
Ruby
140 lines
6.4 KiB
Ruby
# -*- coding: utf-8 -*- #
|
|
|
|
module Rouge
|
|
module Lexers
|
|
load_lexer 'sql.rb'
|
|
|
|
class HQL < SQL
|
|
title "HQL"
|
|
desc "Hive Query Language SQL dialect"
|
|
tag 'hql'
|
|
filenames '*.hql'
|
|
|
|
def self.keywords
|
|
# sources:
|
|
# https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
|
|
# https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
|
|
@keywords ||= Set.new(%w(
|
|
ADD ADMIN AFTER ANALYZE ARCHIVE ASC BEFORE BUCKET BUCKETS CASCADE
|
|
CHANGE CLUSTER CLUSTERED CLUSTERSTATUS COLLECTION COLUMNS COMMENT
|
|
COMPACT COMPACTIONS COMPUTE CONCATENATE CONTINUE DATA DATABASES
|
|
DATETIME DAY DBPROPERTIES DEFERRED DEFINED DELIMITED DEPENDENCY DESC
|
|
DIRECTORIES DIRECTORY DISABLE DISTRIBUTE ELEM_TYPE ENABLE ESCAPED
|
|
EXCLUSIVE EXPLAIN EXPORT FIELDS FILE FILEFORMAT FIRST FORMAT FORMATTED
|
|
FUNCTIONS HOLD_DDLTIME HOUR IDXPROPERTIES IGNORE INDEX INDEXES INPATH
|
|
INPUTDRIVER INPUTFORMAT ITEMS JAR KEYS KEY_TYPE LIMIT LINES LOAD
|
|
LOCATION LOCK LOCKS LOGICAL LONG MAPJOIN MATERIALIZED METADATA MINUS
|
|
MINUTE MONTH MSCK NOSCAN NO_DROP OFFLINE OPTION OUTPUTDRIVER
|
|
OUTPUTFORMAT OVERWRITE OWNER PARTITIONED PARTITIONS PLUS PRETTY
|
|
PRINCIPALS PROTECTION PURGE READ READONLY REBUILD RECORDREADER
|
|
RECORDWRITER REGEXP RELOAD RENAME REPAIR REPLACE REPLICATION RESTRICT
|
|
REWRITE RLIKE ROLE ROLES SCHEMA SCHEMAS SECOND SEMI SERDE
|
|
SERDEPROPERTIES SERVER SETS SHARED SHOW SHOW_DATABASE SKEWED SORT
|
|
SORTED SSL STATISTICS STORED STREAMTABLE STRING STRUCT TABLES
|
|
TBLPROPERTIES TEMPORARY TERMINATED TINYINT TOUCH TRANSACTIONS UNARCHIVE
|
|
UNDO UNIONTYPE UNLOCK UNSET UNSIGNED URI USE UTC UTCTIMESTAMP
|
|
VALUE_TYPE VIEW WHILE YEAR IF
|
|
|
|
ALL ALTER AND ARRAY AS AUTHORIZATION BETWEEN BIGINT BINARY BOOLEAN
|
|
BOTH BY CASE CAST CHAR COLUMN CONF CREATE CROSS CUBE CURRENT
|
|
CURRENT_DATE CURRENT_TIMESTAMP CURSOR DATABASE DATE DECIMAL DELETE
|
|
DESCRIBE DISTINCT DOUBLE DROP ELSE END EXCHANGE EXISTS EXTENDED
|
|
EXTERNAL FALSE FETCH FLOAT FOLLOWING FOR FROM FULL FUNCTION GRANT
|
|
GROUP GROUPING HAVING IF IMPORT IN INNER INSERT INT INTERSECT
|
|
INTERVAL INTO IS JOIN LATERAL LEFT LESS LIKE LOCAL MACRO MAP MORE
|
|
NONE NOT NULL OF ON OR ORDER OUT OUTER OVER PARTIALSCAN PARTITION
|
|
PERCENT PRECEDING PRESERVE PROCEDURE RANGE READS REDUCE REVOKE RIGHT
|
|
ROLLUP ROW ROWS SELECT SET SMALLINT TABLE TABLESAMPLE THEN TIMESTAMP
|
|
TO TRANSFORM TRIGGER TRUE TRUNCATE UNBOUNDED UNION UNIQUEJOIN UPDATE
|
|
USER USING UTC_TMESTAMP VALUES VARCHAR WHEN WHERE WINDOW WITH
|
|
|
|
AUTOCOMMIT ISOLATION LEVEL OFFSET SNAPSHOT TRANSACTION WORK WRITE
|
|
|
|
COMMIT ONLY REGEXP RLIKE ROLLBACK START
|
|
|
|
ABORT KEY LAST NORELY NOVALIDATE NULLS RELY VALIDATE
|
|
|
|
CACHE CONSTRAINT FOREIGN PRIMARY REFERENCES
|
|
|
|
DETAIL DOW EXPRESSION OPERATOR QUARTER SUMMARY VECTORIZATION WEEK YEARS MONTHS WEEKS DAYS HOURS MINUTES SECONDS
|
|
|
|
DAYOFWEEK EXTRACT FLOOR INTEGER PRECISION VIEWS
|
|
|
|
TIMESTAMPTZ ZONE
|
|
|
|
TIME NUMERIC
|
|
|
|
NAMED_STRUCT CREATE_UNION
|
|
|
|
ROUND BROUND FLOOR CEIL CEILING RAND EXP LN LOG10 LOG2 LOG POW POWER SQRT BIN
|
|
HEX UNHEX CONV ABS PMOD SIN ASIN COS ACOS TAN ATAN DEGREES RADIANS POSITIVE
|
|
NEGATIVE SIGN E PI FACTORIAL CBRT SHIFTLEFT SHIFTRIGHT SHIFTRIGHTUNSIGNED
|
|
GREATEST LEAST WIDTH_BUCKET SIZE SIZE MAP_KEYS MAP_VALUES ARRAY_CONTAINS
|
|
SORT_ARRAY BINARY CAST FROM_UNIXTIME UNIX_TIMESTAMP UNIX_TIMESTAMP
|
|
UNIX_TIMESTAMP TO_DATE YEAR QUARTER MONTH DAY DAYOFMONTH HOUR MINUTE SECOND
|
|
WEEKOFYEAR EXTRACT DATEDIFF DATE_ADD DATE_SUB FROM_UTC_TIMESTAMP
|
|
TO_UTC_TIMESTAMP CURRENT_DATE CURRENT_TIMESTAMP ADD_MONTHS LAST_DAY NEXT_DAY
|
|
TRUNC MONTHS_BETWEEN DATE_FORMAT IF ISNULL ISNOTNULL NVL COALESCE CASE WHEN
|
|
then else end NULLIF ASSERT_TRUE ASCII BASE64 CHARACTER_LENGTH CHR CONCAT
|
|
CONTEXT_NGRAMS CONCAT_WS CONCAT_WS DECODE ELT ENCODE FIELD FIND_IN_SET
|
|
FORMAT_NUMBER GET_JSON_OBJECT IN_FILE INSTR LENGTH LOCATE LOWER LCASE LPAD LTRIM
|
|
NGRAMS OCTET_LENGTH PARSE_URL PRINTF REGEXP_EXTRACT REGEXP_REPLACE REPEAT
|
|
REPLACE REVERSE RPAD RTRIM SENTENCES SPACE SPLIT STR_TO_MAP SUBSTR SUBSTRING
|
|
SUBSTRING_INDEX TRANSLATE TRIM UNBASE64 UPPER UCASE INITCAP LEVENSHTEIN SOUNDEX
|
|
MASK MASK_FIRST_N MASK_LAST_N MASK_SHOW_FIRST_N MASK_SHOW_LAST_N MASK_HASH
|
|
JAVA_METHOD REFLECT HASH CURRENT_USER LOGGED_IN_USER CURRENT_DATABASE MD5 SHA1
|
|
SHA CRC32 SHA2 AES_ENCRYPT AES_DECRYPT VERSION COUNT SUM AVG MIN MAX VARIANCE
|
|
VAR_POP VAR_SAMP STDDEV_POP STDDEV_SAMP COVAR_POP COVAR_SAMP CORR PERCENTILE
|
|
PERCENTILE_APPROX PERCENTILE_APPROX REGR_AVGX REGR_AVGY REGR_COUNT
|
|
REGR_INTERCEPT REGR_R2 REGR_SLOPE REGR_SXX REGR_SXY REGR_SYY HISTOGRAM_NUMERIC
|
|
COLLECT_SET COLLECT_LIST NTILE EXPLODE EXPLODE POSEXPLODE INLINE STACK
|
|
|
|
JSON_TUPLE PARSE_URL_TUPLE
|
|
|
|
XPATH XPATH_SHORT XPATH_INT XPATH_LONG XPATH_FLOAT XPATH_DOUBLE
|
|
XPATH_NUMBER XPATH_STRING GET_JSON_OBJECT JSON_TUPLE
|
|
|
|
PARSE_URL_TUPLE
|
|
))
|
|
end
|
|
|
|
def self.keywords_type
|
|
# source: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types
|
|
@keywords_type ||= Set.new(%w(
|
|
TINYINT SMALLINT INT INTEGER BIGINT FLOAT DOUBLE PRECISION DECIMAL NUMERIC
|
|
TIMESTAMP DATE INTERVAL
|
|
STRING VARCHAR CHAR
|
|
BOOLEAN BINARY
|
|
ARRAY MAP STRUCT UNIONTYPE
|
|
))
|
|
end
|
|
|
|
prepend :root do
|
|
# a double-quoted string is a string literal in Hive QL.
|
|
rule %r/"/, Str::Double, :double_string
|
|
|
|
# interpolation of variables through ${...}
|
|
rule %r/\$\{/, Name::Variable, :hive_variable
|
|
end
|
|
|
|
prepend :single_string do
|
|
rule %r/\$\{/, Name::Variable, :hive_variable
|
|
rule %r/[^\\'\$]+/, Str::Single
|
|
end
|
|
|
|
prepend :double_string do
|
|
rule %r/\$\{/, Name::Variable, :hive_variable
|
|
# double-quoted strings are string literals so need to change token
|
|
rule %r/"/, Str::Double, :pop!
|
|
rule %r/[^\\"\$]+/, Str::Double
|
|
end
|
|
|
|
state :hive_variable do
|
|
rule %r/\}/, Name::Variable, :pop!
|
|
rule %r/[^\}]+/, Name::Variable
|
|
end
|
|
|
|
end
|
|
end
|
|
end
|