logo

pleroma-fe

My custom branche(s) on git.pleroma.social/pleroma/pleroma-fe git clone https://hacktivis.me/git/pleroma-fe.git

html_line_converter.service.js (4448B)


  1. import { getTagName } from './utility.service.js'
  2. /**
  3. * This is a tiny purpose-built HTML parser/processor. This basically detects
  4. * any type of visual newline and converts entire HTML into a array structure.
  5. *
  6. * Text nodes are represented as object with single property - text - containing
  7. * the visual line. Intended usage is to process the array with .map() in which
  8. * map function returns a string and resulting array can be converted back to html
  9. * with a .join('').
  10. *
  11. * Generally this isn't very useful except for when you really need to either
  12. * modify visual lines (greentext i.e. simple quoting) or do something with
  13. * first/last line.
  14. *
  15. * known issue: doesn't handle CDATA so nested CDATA might not work well
  16. *
  17. * @param {Object} input - input data
  18. * @return {(string|{ text: string })[]} processed html in form of a list.
  19. */
  20. export const convertHtmlToLines = (html = '') => {
  21. // Elements that are implicitly self-closing
  22. // https://developer.mozilla.org/en-US/docs/Glossary/empty_element
  23. const emptyElements = new Set([
  24. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
  25. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  26. ])
  27. // Block-level element (they make a visual line)
  28. // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
  29. const blockElements = new Set([
  30. 'address', 'article', 'aside', 'blockquote', 'details', 'dialog', 'dd',
  31. 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form',
  32. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main',
  33. 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul'
  34. ])
  35. // br is very weird in a way that it's technically not block-level, it's
  36. // essentially converted to a \n (or \r\n). There's also wbr but it doesn't
  37. // guarantee linebreak, only suggest it.
  38. const linebreakElements = new Set(['br'])
  39. const visualLineElements = new Set([
  40. ...blockElements.values(),
  41. ...linebreakElements.values()
  42. ])
  43. // All block-level elements that aren't empty elements, i.e. not <hr>
  44. const nonEmptyElements = new Set(visualLineElements)
  45. // Difference
  46. for (const elem of emptyElements) {
  47. nonEmptyElements.delete(elem)
  48. }
  49. // All elements that we are recognizing
  50. const allElements = new Set([
  51. ...nonEmptyElements.values(),
  52. ...emptyElements.values()
  53. ])
  54. const buffer = [] // Current output buffer
  55. const level = [] // How deep we are in tags and which tags were there
  56. let textBuffer = '' // Current line content
  57. let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
  58. const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
  59. if (textBuffer.trim().length > 0) {
  60. buffer.push({ level: [...level], text: textBuffer })
  61. } else {
  62. buffer.push(textBuffer)
  63. }
  64. textBuffer = ''
  65. }
  66. const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
  67. flush()
  68. buffer.push(tag)
  69. }
  70. const handleOpen = (tag) => { // handles opening tags
  71. flush()
  72. buffer.push(tag)
  73. level.unshift(getTagName(tag))
  74. }
  75. const handleClose = (tag) => { // handles closing tags
  76. if (level[0] === getTagName(tag)) {
  77. flush()
  78. buffer.push(tag)
  79. level.shift()
  80. } else { // Broken case
  81. textBuffer += tag
  82. }
  83. }
  84. for (let i = 0; i < html.length; i++) {
  85. const char = html[i]
  86. if (char === '<' && tagBuffer === null) {
  87. tagBuffer = char
  88. } else if (char !== '>' && tagBuffer !== null) {
  89. tagBuffer += char
  90. } else if (char === '>' && tagBuffer !== null) {
  91. tagBuffer += char
  92. const tagFull = tagBuffer
  93. tagBuffer = null
  94. const tagName = getTagName(tagFull)
  95. if (allElements.has(tagName)) {
  96. if (linebreakElements.has(tagName)) {
  97. handleBr(tagFull)
  98. } else if (nonEmptyElements.has(tagName)) {
  99. if (tagFull[1] === '/') {
  100. handleClose(tagFull)
  101. } else if (tagFull[tagFull.length - 2] === '/') {
  102. // self-closing
  103. handleBr(tagFull)
  104. } else {
  105. handleOpen(tagFull)
  106. }
  107. } else {
  108. textBuffer += tagFull
  109. }
  110. } else {
  111. textBuffer += tagFull
  112. }
  113. } else if (char === '\n') {
  114. handleBr(char)
  115. } else {
  116. textBuffer += char
  117. }
  118. }
  119. if (tagBuffer) {
  120. textBuffer += tagBuffer
  121. }
  122. flush()
  123. return buffer
  124. }