logo

pleroma-fe

My custom branche(s) on git.pleroma.social/pleroma/pleroma-fe git clone https://hacktivis.me/git/pleroma-fe.git

html_tree_converter.service.js (3116B)


  1. import { getTagName } from './utility.service.js'
  2. import { unescape } from 'lodash'
  3. /**
  4. * This is a not-so-tiny purpose-built HTML parser/processor. This parses html
  5. * and converts it into a tree structure representing tag openers/closers and
  6. * children.
  7. *
  8. * Structure follows this pattern: [opener, [...children], closer] except root
  9. * node which is just [...children]. Text nodes can only be within children and
  10. * are represented as strings.
  11. *
  12. * Intended use is to convert HTML structure and then recursively iterate over it
  13. * most likely using a map. Very useful for dynamically rendering html replacing
  14. * tags with JSX elements in a render function.
  15. *
  16. * known issue: doesn't handle CDATA so CDATA might not work well
  17. * known issue: doesn't handle HTML comments
  18. *
  19. * @param {Object} input - input data
  20. * @return {string} processed html
  21. */
  22. export const convertHtmlToTree = (html = '') => {
  23. // Elements that are implicitly self-closing
  24. // https://developer.mozilla.org/en-US/docs/Glossary/empty_element
  25. const emptyElements = new Set([
  26. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
  27. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  28. ])
  29. // TODO For future - also parse HTML5 multi-source components?
  30. const buffer = [] // Current output buffer
  31. const levels = [['', buffer]] // How deep we are in tags and which tags were there
  32. let textBuffer = '' // Current line content
  33. let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
  34. const getCurrentBuffer = () => {
  35. return levels[levels.length - 1][1]
  36. }
  37. const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
  38. if (textBuffer === '') return
  39. getCurrentBuffer().push(textBuffer)
  40. textBuffer = ''
  41. }
  42. const handleSelfClosing = (tag) => {
  43. getCurrentBuffer().push([tag])
  44. }
  45. const handleOpen = (tag) => {
  46. const curBuf = getCurrentBuffer()
  47. const newLevel = [unescape(tag), []]
  48. levels.push(newLevel)
  49. curBuf.push(newLevel)
  50. }
  51. const handleClose = (tag) => {
  52. const currentTag = levels[levels.length - 1]
  53. if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) {
  54. currentTag.push(tag)
  55. levels.pop()
  56. } else {
  57. getCurrentBuffer().push(tag)
  58. }
  59. }
  60. for (let i = 0; i < html.length; i++) {
  61. const char = html[i]
  62. if (char === '<' && tagBuffer === null) {
  63. flushText()
  64. tagBuffer = char
  65. } else if (char !== '>' && tagBuffer !== null) {
  66. tagBuffer += char
  67. } else if (char === '>' && tagBuffer !== null) {
  68. tagBuffer += char
  69. const tagFull = tagBuffer
  70. tagBuffer = null
  71. const tagName = getTagName(tagFull)
  72. if (tagFull[1] === '/') {
  73. handleClose(tagFull)
  74. } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') {
  75. // self-closing
  76. handleSelfClosing(tagFull)
  77. } else {
  78. handleOpen(tagFull)
  79. }
  80. } else {
  81. textBuffer += char
  82. }
  83. }
  84. if (tagBuffer) {
  85. textBuffer += tagBuffer
  86. }
  87. flushText()
  88. return buffer
  89. }