logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

perl.lua (7483B)


  1. -- Copyright 2006-2024 Mitchell. See LICENSE.
  2. -- Perl LPeg lexer.
  3. local lexer = lexer
  4. local P, S = lpeg.P, lpeg.S
  5. local lex = lexer.new(...)
  6. -- Keywords.
  7. lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))
  8. -- Markers.
  9. lex:add_rule('marker', lex:tag(lexer.COMMENT, lexer.word_match('__DATA__ __END__') * lexer.any^0))
  10. -- Strings.
  11. local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
  12. local literal_delimited = P(function(input, index) -- for single delimiter sets
  13. local delimiter = input:sub(index, index)
  14. if not delimiter:find('%w') then -- only non alpha-numerics
  15. local patt
  16. if delimiter_matches[delimiter] then
  17. -- Handle nested delimiter/matches in strings.
  18. local s, e = delimiter, delimiter_matches[delimiter]
  19. patt = lexer.range(s, e, false, true, true)
  20. else
  21. patt = lexer.range(delimiter)
  22. end
  23. local match_pos = lpeg.match(patt, input, index)
  24. return match_pos or #input + 1
  25. end
  26. end)
  27. local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
  28. local delimiter = input:sub(index, index)
  29. -- Only consider non-alpha-numerics and non-spaces as delimiters. The non-spaces are used to
  30. -- ignore operators like "-s".
  31. if not delimiter:find('[%w ]') then
  32. local patt
  33. if delimiter_matches[delimiter] then
  34. -- Handle nested delimiter/matches in strings.
  35. local s, e = delimiter, delimiter_matches[delimiter]
  36. patt = lexer.range(s, e, false, true, true)
  37. else
  38. patt = lexer.range(delimiter)
  39. end
  40. local first_match_pos = lpeg.match(patt, input, index)
  41. local final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
  42. if not final_match_pos then -- using (), [], {}, or <> notation
  43. final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos)
  44. end
  45. if final_match_pos and final_match_pos < index then final_match_pos = index end
  46. return final_match_pos or #input + 1
  47. end
  48. end)
  49. local sq_str = lexer.range("'")
  50. local dq_str = lexer.range('"')
  51. local cmd_str = lexer.range('`')
  52. local heredoc = '<<' * P(function(input, index)
  53. local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
  54. if s == index and delimiter then
  55. local end_heredoc = '[\n\r\f]+'
  56. e = select(2, input:find(end_heredoc .. delimiter, e))
  57. return e and e + 1 or #input + 1
  58. end
  59. end)
  60. local lit_str = 'q' * P('q')^-1 * literal_delimited
  61. local lit_array = 'qw' * literal_delimited
  62. local lit_cmd = 'qx' * literal_delimited
  63. local string = lex:tag(lexer.STRING,
  64. sq_str + dq_str + cmd_str + heredoc + lit_str + lit_array + lit_cmd)
  65. local regex_str = lexer.after_set('-<>+*!~\\=%&|^?:;([{', lexer.range('/', true) * S('imosx')^0)
  66. local lit_regex = 'qr' * literal_delimited * S('imosx')^0
  67. local lit_match = 'm' * literal_delimited * S('cgimosx')^0
  68. local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
  69. local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
  70. local regex = lex:tag(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub + lit_tr)
  71. lex:add_rule('string', string + regex)
  72. -- Functions.
  73. lex:add_rule('function_builtin',
  74. lex:tag(lexer.FUNCTION_BUILTIN, lex:word_match(lexer.FUNCTION_BUILTIN)) *
  75. #(lexer.space^0 * P('(')^-1))
  76. local func = lex:tag(lexer.FUNCTION, lexer.word)
  77. local method = lpeg.B('->') * lex:tag(lexer.FUNCTION_METHOD, lexer.word)
  78. lex:add_rule('function', (method + func) * #(lexer.space^0 * '('))
  79. -- Constants.
  80. lex:add_rule('constant', lex:tag(lexer.CONSTANT_BUILTIN, lex:word_match(lexer.CONSTANT_BUILTIN)))
  81. -- Identifiers.
  82. lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word))
  83. -- Comments.
  84. local line_comment = lexer.to_eol('#', true)
  85. local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), lexer.starts_line('=cut'))
  86. lex:add_rule('comment', lex:tag(lexer.COMMENT, block_comment + line_comment))
  87. -- Numbers.
  88. lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number_('_')))
  89. -- Variables.
  90. local builtin_var_s = '$' *
  91. (lpeg.R('09') + S('!"$%&\'()+,-./:;<=>?@\\]_`|~') + '^' * S('ACDEFHILMNOPRSTVWX')^-1 + 'ARGV')
  92. local builtin_var_a = '@' * (S('+-_F') + 'ARGV' + 'INC' + 'ISA')
  93. local builtin_var_h = '%' * (S('+-!') + '^' * S('H')^-1 + 'ENV' + 'INC' + 'SIG')
  94. lex:add_rule('variable_builtin',
  95. lex:tag(lexer.VARIABLE_BUILTIN, builtin_var_s + builtin_var_a + builtin_var_h))
  96. local special_var = '$' *
  97. ('^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + ':' * (lexer.any - ':') +
  98. (P('$') * -lexer.word) + lexer.digit^1)
  99. local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
  100. lex:add_rule('variable', lex:tag(lexer.VARIABLE, special_var + plain_var))
  101. -- Operators.
  102. lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.,?:;()[]{}')))
  103. -- Fold points.
  104. lex:add_fold_point(lexer.OPERATOR, '[', ']')
  105. lex:add_fold_point(lexer.OPERATOR, '{', '}')
  106. -- Word lists.
  107. lex:set_word_list(lexer.KEYWORD, {
  108. 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT', --
  109. 'require', 'use', --
  110. 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if', 'last', 'local', 'my',
  111. 'next', 'our', 'package', 'return', 'sub', 'unless', 'until', 'while', '__FILE__', '__LINE__',
  112. '__PACKAGE__', --
  113. 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
  114. })
  115. lex:set_word_list(lexer.FUNCTION_BUILTIN, {
  116. 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 'chmod',
  117. 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir', 'close', 'connect', 'cos', 'crypt',
  118. 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent',
  119. 'endnetent', 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit',
  120. 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
  121. 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 'getnetbyaddr',
  122. 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
  123. 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
  124. 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 'goto', 'grep',
  125. 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length',
  126. 'link', 'listen', 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
  127. 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop', 'pos', 'printf', 'print',
  128. 'prototype', 'push', 'quotemeta', 'rand', 'readdir', 'read', 'readlink', 'recv', 'redo', 'ref',
  129. 'rename', 'reset', 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek',
  130. 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 'setpgrp',
  131. 'setpriority', 'setprotoent', 'setpwent', 'setservent', 'setsockopt', 'shift', 'shmctl', 'shmget',
  132. 'shmread', 'shmwrite', 'shutdown', 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice',
  133. 'split', 'sprintf', 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
  134. 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time', 'times', 'truncate',
  135. 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 'utime', 'values',
  136. 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'
  137. })
  138. lex:set_word_list(lexer.CONSTANT_BUILTIN, {
  139. 'ARGV', 'ARGVOUT', 'DATA', 'ENV', 'INC', 'SIG', 'STDERR', 'STDIN', 'STDOUT'
  140. })
  141. lexer.property['scintillua.comment'] = '#'
  142. return lex