gleam.lua (4282B)
- -- Copyright 2021-2024 Mitchell. See LICENSE.
- -- Gleam LPeg lexer
- -- https://gleam.run/
- -- Contributed by Tynan Beatty
- local lexer = require('lexer')
- local token, word_match = lexer.token, lexer.word_match
- local P, S = lpeg.P, lpeg.S
- local KEY, OP = lexer.KEYWORD, lexer.OPERATOR
- local lex = lexer.new('gleam')
- -- Whitespace.
- local gleam_ws = token(lexer.WHITESPACE, lexer.space^1)
- lex:add_rule('whitespace', gleam_ws)
- -- Types.
- local typ_tok = token(lexer.TYPE, lexer.upper * lexer.alnum^0)
- lex:add_rule('type', typ_tok)
- -- Modules.
- local name = (lexer.lower + '_') * (lexer.lower + lexer.digit + '_')^0
- local fn_name = token(lexer.FUNCTION, name)
- local mod_name = token('module', name)
- local typ_or_fn = typ_tok + fn_name
- local function mod_tok(ws)
- return token(KEY, 'import') * ws^1 * mod_name * (ws^0 * token(OP, '/') * ws^0 * mod_name)^0 *
- (ws^1 * token(KEY, 'as') * ws^1 * mod_name)^-1 *
- (ws^0 * token(OP, '.') * ws^0 * token(OP, '{') * ws^0 * typ_or_fn *
- (ws^0 * token(OP, ',') * ws^0 * typ_or_fn)^0 * ws^0 * token(OP, '}'))^-1
- end
- lex:add_rule('module', mod_tok(gleam_ws))
- lex:add_style('module', lexer.styles.constant)
- -- Keywords.
- local key_tok = token(KEY, word_match(
- 'as assert case const external fn if import let opaque pub todo try tuple type'))
- lex:add_rule('keyword', key_tok)
- -- Functions.
- local function fn_tok(ws)
- local mod_name_op = mod_name * ws^0 * token(OP, '.')
- local fn_def_call = mod_name_op^-1 * ws^0 * fn_name * ws^0 * #P('(')
- local fn_pipe = token(OP, '|>') * ws^0 * (token(KEY, 'fn') + mod_name_op^-1 * fn_name)
- return fn_def_call + fn_pipe
- end
- lex:add_rule('function', fn_tok(gleam_ws))
- -- Labels.
- local id = token(lexer.IDENTIFIER, name)
- local function lab_tok(ws)
- return token(OP, S('(,')) * ws^0 * token(lexer.LABEL, name) * #(ws^1 * id)
- end
- lex:add_rule('label', lab_tok(gleam_ws))
- -- Identifiers.
- local discard_id = token('discard', '_' * name)
- local id_tok = discard_id + id
- lex:add_rule('identifier', id_tok)
- lex:add_style('discard', lexer.styles.comment)
- -- Strings.
- local str_tok = token(lexer.STRING, lexer.range('"'))
- lex:add_rule('string', str_tok)
- -- Comments.
- local com_tok = token(lexer.COMMENT, lexer.to_eol('//'))
- lex:add_rule('comment', com_tok)
- -- Numbers.
- local function can_neg(patt) return (lpeg.B(lexer.space + S('+-/*%<>=&|:,.')) * '-')^-1 * patt end
- local function can_sep(patt) return (P('_')^-1 * patt^1)^1 end
- local dec = lexer.digit * can_sep(lexer.digit)^0
- local float = dec * '.' * dec^0
- local bin = '0' * S('bB') * can_sep(S('01')) * -lexer.xdigit
- local oct = '0' * S('oO') * can_sep(lpeg.R('07'))
- local hex = '0' * S('xX') * can_sep(lexer.xdigit)
- local num_tok = token(lexer.NUMBER, can_neg(float) + bin + oct + hex + can_neg(dec))
- lex:add_rule('number', num_tok)
- -- Operators.
- local op_tok = token(OP, S('+-*/%#!=<>&|.,:;{}[]()'))
- lex:add_rule('operator', op_tok)
- -- Errors.
- local err_tok = token(lexer.ERROR, lexer.any)
- lex:add_rule('error', err_tok)
- -- Fold points.
- lex:add_fold_point(lexer.OPERATOR, '{', '}')
- lex:add_fold_point(lexer.OPERATOR, '[', ']')
- lex:add_fold_point(lexer.OPERATOR, '(', ')')
- -- Embedded Bit Strings.
- -- Mimic lexer.load() by creating a bitstring-specific whitespace style.
- local bitstring = lexer.new(lex._name .. '_bitstring')
- local bitstring_ws = token(bitstring._name .. '_whitespace', lexer.space^1)
- bitstring:add_rule('whitespace', bitstring_ws)
- bitstring:add_style(bitstring._name .. '_whitespace', lexer.styles.whitespace)
- bitstring:add_rule('type', typ_tok)
- bitstring:add_rule('module', mod_tok(bitstring_ws))
- bitstring:add_rule('keyword', key_tok + token(KEY, word_match{
- 'binary', 'bytes', 'int', 'float', 'bit_string', 'bits', 'utf8', 'utf16', 'utf32',
- 'utf8_codepoint', 'utf16_codepoint', 'utf32_codepoint', 'signed', 'unsigned', 'big', 'little',
- 'native', 'unit', 'size'
- }))
- bitstring:add_rule('function', fn_tok(bitstring_ws))
- bitstring:add_rule('label', lab_tok(bitstring_ws))
- bitstring:add_rule('identifier', id_tok)
- bitstring:add_rule('string', str_tok)
- bitstring:add_rule('comment', com_tok)
- bitstring:add_rule('number', num_tok)
- bitstring:add_rule('operator', op_tok)
- bitstring:add_rule('error', err_tok)
- lex:embed(bitstring, token(OP, '<<'), token(OP, '>>'))
- lexer.property['scintillua.comment'] = '//'
- return lex