logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

rest.lua (9356B)


  1. -- Copyright 2006-2024 Mitchell. See LICENSE.
  2. -- reStructuredText LPeg lexer.
  3. local lexer = require('lexer')
  4. local token, word_match, starts_line = lexer.token, lexer.word_match, lexer.starts_line
  5. local P, S = lpeg.P, lpeg.S
  6. local lex = lexer.new('rest')
  7. -- Literal block.
  8. local block = '::' * (lexer.newline + -1) * function(input, index)
  9. local rest = input:sub(index)
  10. local level, quote = #rest:match('^([ \t]*)')
  11. for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
  12. local no_indent = (indent - pos < level and line ~= ' ' or level == 0)
  13. local quoted = no_indent and line:find(quote or '^%s*%W')
  14. if quoted and not quote then quote = '^%s*%' .. line:match('^%s*(%W)') end
  15. if no_indent and not quoted and pos > 1 then return index + pos - 1 end
  16. end
  17. return #input + 1
  18. end
  19. lex:add_rule('literal_block', token('literal_block', block))
  20. lex:add_style('literal_block', lexer.styles.embedded .. {eolfilled = true})
  21. -- Lists.
  22. local option_word = lexer.alnum * (lexer.alnum + '-')^0
  23. local option = S('-/') * option_word * (' ' * option_word)^-1 +
  24. ('--' * option_word * ('=' * option_word)^-1)
  25. local option_list = option * (',' * lexer.space^1 * option)^-1
  26. local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
  27. local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)')
  28. local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1
  29. lex:add_rule('list', #(lexer.space^0 * (S('*+-:/') + enum_list)) *
  30. starts_line(token(lexer.LIST,
  31. lexer.space^0 * (option_list + bullet_list + enum_list + field_list) * lexer.space)))
  32. local any_indent = S(' \t')^0
  33. local word = lexer.alpha * (lexer.alnum + S('-.+'))^0
  34. local prefix = any_indent * '.. '
  35. -- Explicit markup blocks.
  36. local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']'
  37. local footnote = token('footnote_block', prefix * footnote_label * lexer.space)
  38. local citation_label = '[' * word * ']'
  39. local citation = token('citation_block', prefix * citation_label * lexer.space)
  40. local link = token('link_block', prefix * '_' *
  41. (lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space)
  42. lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link))
  43. lex:add_style('footnote_block', lexer.styles.label)
  44. lex:add_style('citation_block', lexer.styles.label)
  45. lex:add_style('link_block', lexer.styles.label)
  46. -- Sphinx code block.
  47. local indented_block = function(input, index)
  48. local rest = input:sub(index)
  49. local level = #rest:match('^([ \t]*)')
  50. for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
  51. if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then return index + pos - 1 end
  52. end
  53. return #input + 1
  54. end
  55. local code_block =
  56. prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block
  57. lex:add_rule('code_block', #prefix * token('code_block', starts_line(code_block)))
  58. lex:add_style('code_block', lexer.styles.embedded .. {eolfilled = true})
  59. -- Directives.
  60. local known_directive = token('directive', prefix * word_match{
  61. -- Admonitions
  62. 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning',
  63. 'admonition',
  64. -- Images
  65. 'image', 'figure',
  66. -- Body elements
  67. 'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric', 'epigraph',
  68. 'highlights', 'pull-quote', 'compound', 'container',
  69. -- Table
  70. 'table', 'csv-table', 'list-table',
  71. -- Document parts
  72. 'contents', 'sectnum', 'section-autonumbering', 'header', 'footer',
  73. -- References
  74. 'target-notes', 'footnotes', 'citations',
  75. -- HTML-specific
  76. 'meta',
  77. -- Directives for substitution definitions
  78. 'replace', 'unicode', 'date',
  79. -- Miscellaneous
  80. 'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive'
  81. } * '::' * lexer.space)
  82. local sphinx_directive = token('sphinx_directive', prefix * word_match{
  83. -- The TOC tree.
  84. 'toctree',
  85. -- Paragraph-level markup.
  86. 'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso', 'rubric',
  87. 'centered', 'hlist', 'glossary', 'productionlist',
  88. -- Showing code examples.
  89. 'highlight', 'literalinclude',
  90. -- Miscellaneous
  91. 'sectionauthor', 'index', 'only', 'tabularcolumns'
  92. } * '::' * lexer.space)
  93. local unknown_directive = token('unknown_directive', prefix * word * '::' * lexer.space)
  94. lex:add_rule('directive',
  95. #prefix * starts_line(known_directive + sphinx_directive + unknown_directive))
  96. lex:add_style('directive', lexer.styles.keyword)
  97. lex:add_style('sphinx_directive', lexer.styles.keyword .. {bold = true})
  98. lex:add_style('unknown_directive', lexer.styles.keyword .. {italics = true})
  99. -- Substitution definitions.
  100. lex:add_rule('substitution', #prefix * token('substitution', starts_line(prefix * lexer.range('|') *
  101. lexer.space^1 * word * '::' * lexer.space)))
  102. lex:add_style('substitution', lexer.styles.variable)
  103. -- Comments.
  104. local line_comment = lexer.to_eol(prefix)
  105. local bprefix = any_indent * '..'
  106. local block_comment = bprefix * lexer.newline * indented_block
  107. lex:add_rule('comment', #bprefix * token(lexer.COMMENT, starts_line(line_comment + block_comment)))
  108. -- Section titles (2 or more characters).
  109. local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
  110. local adornment = lpeg.C(adornment_chars^2 * any_indent) * (lexer.newline + -1)
  111. local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
  112. if not adm:find('^%' .. c .. '+%s*$') then return nil end
  113. local rest = input:sub(index)
  114. local lines = 1
  115. for line, e in rest:gmatch('([^\r\n]+)()') do
  116. if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then return index + e - 1 end
  117. if lines > 3 or #line > #adm then return nil end
  118. lines = lines + 1
  119. end
  120. return #input + 1
  121. end)
  122. local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
  123. local pos = adm:match('^%' .. c .. '+%s*()$')
  124. return pos and index - #adm + pos - 1 or nil
  125. end)
  126. -- Token needs to be a predefined one in order for folder to work.
  127. lex:add_rule('title', token(lexer.HEADING, overline + underline))
  128. -- Line block.
  129. lex:add_rule('line_block_char', token(lexer.OPERATOR, starts_line(any_indent * '|')))
  130. -- Whitespace.
  131. lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1))
  132. -- Inline markup.
  133. local strong = token(lexer.BOLD, lexer.range('**'))
  134. local em = token(lexer.ITALIC, lexer.range('*'))
  135. local inline_literal = token('inline_literal', lexer.range('``'))
  136. local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1
  137. local prefix_link = '_' * lexer.range('`')
  138. local link_ref = token(lexer.LINK, postfix_link + prefix_link)
  139. local role = token('role', ':' * word * ':' * (word * ':')^-1)
  140. local interpreted = role^-1 * token('interpreted', lexer.range('`')) * role^-1
  141. local footnote_ref = token(lexer.REFERENCE, footnote_label * '_')
  142. local citation_ref = token(lexer.REFERENCE, citation_label * '_')
  143. local substitution_ref = token('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1)
  144. local link = token(lexer.LINK,
  145. lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1)
  146. lex:add_rule('inline_markup',
  147. (strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref +
  148. substitution_ref + link) * -lexer.alnum)
  149. lex:add_style('inline_literal', lexer.styles.embedded)
  150. lex:add_style('role', lexer.styles.class)
  151. lex:add_style('interpreted', lexer.styles.string)
  152. -- Other.
  153. lex:add_rule('non_space', token(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0))
  154. lex:add_rule('escape', token(lexer.DEFAULT, '\\' * lexer.any))
  155. -- Section-based folding.
  156. local sphinx_levels = {
  157. ['#'] = 0, ['*'] = 1, ['='] = 2, ['-'] = 3, ['^'] = 4, ['"'] = 5
  158. }
  159. function lex:fold(text, start_pos, start_line, start_level)
  160. local folds, line_starts = {}, {}
  161. for pos in (text .. '\n'):gmatch('().-\r?\n') do line_starts[#line_starts + 1] = pos end
  162. local style_at, CONSTANT, level = lexer.style_at, lexer.CONSTANT, start_level
  163. local sphinx = lexer.property_int['fold.scintillua.rest.by.sphinx.convention'] > 0
  164. local FOLD_BASE = lexer.FOLD_BASE
  165. local FOLD_HEADER, FOLD_BLANK = lexer.FOLD_HEADER, lexer.FOLD_BLANK
  166. for i = 1, #line_starts do
  167. local pos, next_pos = line_starts[i], line_starts[i + 1]
  168. local c = text:sub(pos, pos)
  169. local line_num = start_line + i - 1
  170. folds[line_num] = level
  171. if style_at[start_pos + pos - 1] == CONSTANT and c:find('^[^%w%s]') then
  172. local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels)
  173. level = not sphinx and level - 1 or sphinx_level
  174. if level < FOLD_BASE then level = FOLD_BASE end
  175. folds[line_num - 1], folds[line_num] = level, level + FOLD_HEADER
  176. level = (not sphinx and level or sphinx_level) + 1
  177. elseif c == '\r' or c == '\n' then
  178. folds[line_num] = level + FOLD_BLANK
  179. end
  180. end
  181. return folds
  182. end
  183. -- lexer.property['fold.by.sphinx.convention'] = '0'
  184. --[[ Embedded languages.
  185. local bash = lexer.load('bash')
  186. local bash_indent_level
  187. local start_rule =
  188. #(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) *
  189. sphinx_directive * token('bash_begin', P(function(input, index)
  190. bash_indent_level = #input:match('^([ \t]*)', index)
  191. return index
  192. end))]]
  193. lexer.property['scintillua.comment'] = '.. '
  194. return lex