logo

pleroma-fe

My custom branche(s) on git.pleroma.social/pleroma/pleroma-fe git clone https://hacktivis.me/git/pleroma-fe.git
commit: cc00af7a3102034b05ebcd4aa1fd01c6f467184a
parent 0f73e96194fb13e70be0222a7ab718d7894b62c2
Author: Henry Jameson <me@hjkos.com>
Date:   Thu, 10 Jun 2021 18:52:01 +0300

Hellthread(tm) Certified

Diffstat:

Msrc/components/mention_link/mention_link.js3---
Msrc/components/mention_link/mention_link.vue1-
Msrc/components/rich_content/rich_content.jsx176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/components/status/status.js3+++
Msrc/components/status/status.vue1+
Msrc/components/status_body/status_body.js8++++++--
Msrc/components/status_body/status_body.vue12++++++++++--
Msrc/components/status_content/status_content.js1+
Msrc/components/status_content/status_content.vue3++-
Asrc/services/html_converter/html_line_converter.service.js102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/services/html_converter/html_tree_converter.service.js146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/services/mini_html_converter/mini_html_converter.service.js138-------------------------------------------------------------------------------
Dsrc/services/tiny_post_html_processor/tiny_post_html_processor.service.js94-------------------------------------------------------------------------------
Atest/unit/specs/services/html_converter/html_line_converter.spec.js130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/unit/specs/services/html_converter/html_tree_converter.spec.js166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dtest/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js166-------------------------------------------------------------------------------
Dtest/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js96-------------------------------------------------------------------------------
17 files changed, 707 insertions(+), 539 deletions(-)

diff --git a/src/components/mention_link/mention_link.js b/src/components/mention_link/mention_link.js @@ -70,9 +70,6 @@ const MentionLink = { highlightClass () { if (this.highlight) return highlightClass(this.user) }, - oldPlace () { - return !this.mergedConfig.mentionsOwnLine - }, oldStyle () { return !this.mergedConfig.mentionsNewStyle }, diff --git a/src/components/mention_link/mention_link.vue b/src/components/mention_link/mention_link.vue @@ -1,7 +1,6 @@ <template> <span class="MentionLink" - :class="{ '-oldPlace': oldPlace }" > <!-- eslint-disable vue/no-v-html --> <a diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx @@ -1,7 +1,7 @@ import Vue from 'vue' import { unescape, flattenDeep } from 'lodash' -import { convertHtml, getTagName, processTextForEmoji, getAttrs } from 'src/services/mini_html_converter/mini_html_converter.service.js' -import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js' +import { convertHtmlToTree, getTagName, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js' +import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js' import StillImage from 'src/components/still-image/still-image.vue' import MentionLink from 'src/components/mention_link/mention_link.vue' @@ -31,11 +31,24 @@ export default Vue.component('RichContent', { required: false, type: Boolean, default: false + }, + // Whether to hide last mentions (hellthreads) + hideLastMentions: { + required: false, + type: Boolean, + default: false + }, + // Whether to hide first mentions + hideFirstMentions: { + required: false, + type: Boolean, + default: false } }, render (h) { // Pre-process HTML - const html = this.greentext ? addGreentext(this.html) : this.html + const html = preProcessPerLine(this.html, this.greentext, this.hideLastMentions) + console.log(this.hideFirstMentions, this.hideLastMentions) const renderImage = (tag) => { return <StillImage @@ -45,18 +58,20 @@ export default Vue.component('RichContent', { } const renderMention = (attrs, children, encounteredText) => { - return <MentionLink - url={attrs.href} - content={flattenDeep(children).join('')} - firstMention={!encounteredText} - /> + return (this.hideFirstMentions && !encounteredText) + ? '' + : <MentionLink + url={attrs.href} + content={flattenDeep(children).join('')} + firstMention={!encounteredText} + /> } // We stop treating mentions as "first" ones when we encounter // non-whitespace text let encounteredText = false // Processor to use with mini_html_converter - const processItem = (item) => { + const processItem = (item, index, array, what) => { // Handle text nodes - just add emoji if (typeof item === 'string') { const emptyText = item.trim() === '' @@ -69,7 +84,7 @@ export default Vue.component('RichContent', { encounteredText = true } if (item.includes(':')) { - return processTextForEmoji( + unescapedItem = processTextForEmoji( unescapedItem, this.emoji, ({ shortcode, url }) => { @@ -81,9 +96,8 @@ export default Vue.component('RichContent', { /> } ) - } else { - return unescapedItem } + return unescapedItem } // Handle tag nodes @@ -98,6 +112,8 @@ export default Vue.component('RichContent', { const attrs = getAttrs(opener) if (attrs['class'] && attrs['class'].includes('mention')) { return renderMention(attrs, children, encounteredText) + } else if (attrs['class'] && attrs['class'].includes('hashtag')) { + return item // We'll handle it later } else { attrs.target = '_blank' return <a {...{ attrs }}> @@ -116,43 +132,129 @@ export default Vue.component('RichContent', { } } } + // Processor for back direction (for finding "last" stuff, just easier this way) + let encounteredTextReverse = false + const renderHashtag = (attrs, children, encounteredTextReverse) => { + attrs.target = '_blank' + if (!encounteredTextReverse) { + attrs['data-parser-last'] = true + } + return <a {...{ attrs }}> + { children.map(processItem) } + </a> + } + const processItemReverse = (item, index, array, what) => { + // Handle text nodes - just add emoji + if (typeof item === 'string') { + const emptyText = item.trim() === '' + if (emptyText) return encounteredTextReverse ? item : item.trim() + if (!encounteredTextReverse) encounteredTextReverse = true + return item + } else if (Array.isArray(item)) { + // Handle tag nodes + const [opener, children] = item + const Tag = getTagName(opener) + switch (Tag) { + case 'a': // replace mentions with MentionLink + if (!this.handleLinks) break + const attrs = getAttrs(opener) + // should only be this + if (attrs['class'] && attrs['class'].includes('hashtag')) { + return renderHashtag(attrs, children, encounteredTextReverse) + } + } + } + return item + } return <span class="RichContent"> { this.$slots.prefix } - { convertHtml(html).map(processItem) } + { convertHtmlToTree(html).map(processItem).reverse().map(processItemReverse).reverse() } { this.$slots.suffix } </span> } }) -export const addGreentext = (html) => { - try { - if (html.includes('&gt;')) { - // This checks if post has '>' at the beginning, excluding mentions so that @mention >impying works - return processHtml(html, (string) => { - if ( - string.includes('&gt;') && string - .replace(/<[^>]+?>/gi, '') // remove all tags - .replace(/@\w+/gi, '') // remove mentions (even failed ones) - .trim() - .startsWith('&gt;') - ) { - return `<span class='greentext'>${string}</span>` +/** Pre-processing HTML + * + * Currently this does two things: + * - add green/cyantexting + * - wrap and mark last line containing only mentions as ".lastMentionsLine" for + * more compact hellthreads. + * + * @param {String} html - raw HTML to process + * @param {Boolean} greentext - whether to enable greentexting or not + * @param {Boolean} removeLastMentions - whether to remove last mentions + */ +export const preProcessPerLine = (html, greentext, removeLastMentions) => { + // Only mark first (last) encounter + let lastMentionsMarked = false + + return convertHtmlToLines(html).reverse().map((item, index, array) => { + if (!item.text) return item + const string = item.text + + // Greentext stuff + if (greentext && (string.includes('&gt;') || string.includes('&lt;'))) { + const cleanedString = string.replace(/<[^>]+?>/gi, '') // remove all tags + .replace(/@\w+/gi, '') // remove mentions (even failed ones) + .trim() + if (cleanedString.startsWith('&gt;')) { + return `<span class='greentext'>${string}</span>` + } else if (cleanedString.startsWith('&lt;')) { + return `<span class='cyantext'>${string}</span>` + } + } + + const tree = convertHtmlToTree(string) + + // If line has loose text, i.e. text outside a mention or a tag + // we won't touch mentions. + let hasLooseText = false + let hasMentions = false + const process = (item) => { + if (Array.isArray(item)) { + const [opener, children, closer] = item + const tag = getTagName(opener) + if (tag === 'a') { + const attrs = getAttrs(opener) + if (attrs['class'] && attrs['class'].includes('mention')) { + hasMentions = true + return [opener, children, closer] + } else { + hasLooseText = true + return [opener, children, closer] + } + } else if (tag === 'span' || tag === 'p') { + return [opener, [...children].reverse().map(process).reverse(), closer] } else { - return string + hasLooseText = true + return [opener, children, closer] + } + } + + if (typeof item === 'string') { + if (item.trim() !== '') { + hasLooseText = true } - }) + return item + } + } + + const result = [...tree].reverse().map(process).reverse() + + if (removeLastMentions && hasMentions && !hasLooseText && !lastMentionsMarked) { + lastMentionsMarked = true + return '' } else { - return html + return flattenDeep(result).join('') } - } catch (e) { - console.error('Failed to process status html', e) - return html - } + }).reverse().join('') } export const getHeadTailLinks = (html) => { // Exported object properties const firstMentions = [] // Mentions that appear in the beginning of post body + const lastMentions = [] // Mentions that appear at the end of post body const lastTags = [] // Tags that appear at the end of post body const writtenMentions = [] // All mentions that appear in post body const writtenTags = [] // All tags that appear in post body @@ -170,7 +272,7 @@ export const getHeadTailLinks = (html) => { } } - // Processor to use with mini_html_converter + // Processor to use with html_tree_converter const processItem = (item) => { // Handle text nodes - stop treating mentions as "first" when text encountered if (typeof item === 'string') { @@ -182,6 +284,7 @@ export const getHeadTailLinks = (html) => { } // Encountered text? That means tags we've been collectings aren't "last"! lastTags.splice(0) + lastMentions.splice(0) return } // Handle tag nodes @@ -197,6 +300,7 @@ export const getHeadTailLinks = (html) => { firstMentions.push(linkData) } writtenMentions.push(linkData) + lastMentions.push(linkData) } else if (attrs['class'].includes('hashtag')) { lastTags.push(linkData) writtenTags.push(linkData) @@ -206,6 +310,6 @@ export const getHeadTailLinks = (html) => { children && children.forEach(processItem) } } - convertHtml(html).forEach(processItem) - return { firstMentions, writtenMentions, writtenTags, lastTags } + convertHtmlToTree(html).forEach(processItem) + return { firstMentions, writtenMentions, writtenTags, lastTags, lastMentions } } diff --git a/src/components/status/status.js b/src/components/status/status.js @@ -196,6 +196,9 @@ const Status = { hasMentionsLine () { return this.mentionsLine.length > 0 }, + hideLastMentions () { + return this.headTailLinks.firstMentions.length === 0 + }, muted () { if (this.statusoid.user.id === this.currentUser.id) return false const { status } = this diff --git a/src/components/status/status.vue b/src/components/status/status.vue @@ -306,6 +306,7 @@ :highlight="highlight" :focused="isFocused" :hide-first-mentions="mentionsOwnLine && isReply" + :hide-last-mentions="hideLastMentions" :head-tail-links="headTailLinks" @mediaplay="addMediaPlaying($event)" @mediapause="removeMediaPlaying($event)" diff --git a/src/components/status_body/status_body.js b/src/components/status_body/status_body.js @@ -30,7 +30,8 @@ const StatusContent = { // if this was computed at upper level it can be passed here, otherwise // it will be in this component 'headTailLinks', - 'hideFirstMentions' + 'hideFirstMentions', + 'hideLastMentions' ], data () { return { @@ -80,9 +81,12 @@ const StatusContent = { attachmentTypes () { return this.status.attachments.map(file => fileType.fileType(file.mimetype)) }, - mentions () { + mentionsFirst () { return this.headTailLinksComputed.firstMentions }, + mentionsLast () { + return this.headTailLinksComputed.lastMentions + }, ...mapGetters(['mergedConfig']) }, components: { diff --git a/src/components/status_body/status_body.vue b/src/components/status_body/status_body.vue @@ -49,11 +49,19 @@ :emoji="status.emojis" :handle-links="true" :greentext="mergedConfig.greentext" + :hide-first-mentions="hideFirstMentions" + :hide-last-mentions="hideLastMentions" > <template v-slot:prefix> <MentionsLine - v-if="!hideFirstMentions" - :mentions="mentions" + v-if="!hideFirstMentions && mentionsFirst" + :mentions="mentionsFirst" + /> + </template> + <template v-slot:suffix> + <MentionsLine + v-if="!hideFirstMentions && mentionsLast" + :mentions="mentionsLast" /> </template> </RichContent> diff --git a/src/components/status_content/status_content.js b/src/components/status_content/status_content.js @@ -33,6 +33,7 @@ const StatusContent = { 'fullContent', 'singleLine', 'hideFirstMentions', + 'hideLastMentions', 'headTailLinks' ], computed: { diff --git a/src/components/status_content/status_content.vue b/src/components/status_content/status_content.vue @@ -5,7 +5,8 @@ :status="status" :single-line="singleLine" :hide-first-mentions="hideFirstMentions" - :headTailLinks="headTailLinks" + :hide-last-mentions="hideLastMentions" + :head-tail-links="headTailLinks" > <div v-if="status.poll && status.poll.options"> <poll :base-poll="status.poll" /> diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js @@ -0,0 +1,102 @@ +/** + * This is a tiny purpose-built HTML parser/processor. This basically detects + * any type of visual newline and converts entire HTML into a array structure. + * + * Text nodes are represented as object with single property - text - containing + * the visual line. Intended usage is to process the array with .map() in which + * map function returns a string and resulting array can be converted back to html + * with a .join(''). + * + * Generally this isn't very useful except for when you really need to either + * modify visual lines (greentext i.e. simple quoting) or do something with + * first/last line. + * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * + * @param {Object} input - input data + * @return {(string|{ text: string })[]} processed html in form of a list. + */ +export const convertHtmlToLines = (html) => { + const handledTags = new Set(['p', 'br', 'div']) + const openCloseTags = new Set(['p', 'div']) + + let buffer = [] // Current output buffer + const level = [] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + // Extracts tag name from tag, i.e. <span a="b"> => span + const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) + } + + const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer.trim().length > 0) { + buffer.push({ text: textBuffer }) + } else { + buffer.push(textBuffer) + } + textBuffer = '' + } + + const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing + flush() + buffer.push(tag) + } + + const handleOpen = (tag) => { // handles opening tags + flush() + buffer.push(tag) + level.push(tag) + } + + const handleClose = (tag) => { // handles closing tags + flush() + buffer.push(tag) + if (level[level.length - 1] === tag) { + level.pop() + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (handledTags.has(tagName)) { + if (tagName === 'br') { + handleBr(tagFull) + } else if (openCloseTags.has(tagName)) { + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (tagFull[tagFull.length - 2] === '/') { + // self-closing + handleBr(tagFull) + } else { + handleOpen(tagFull) + } + } + } else { + textBuffer += tagFull + } + } else if (char === '\n') { + handleBr(char) + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flush() + + return buffer +} diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js @@ -0,0 +1,146 @@ +/** + * This is a not-so-tiny purpose-built HTML parser/processor. This parses html + * and converts it into a tree structure representing tag openers/closers and + * children. + * + * Structure follows this pattern: [opener, [...children], closer] except root + * node which is just [...children]. Text nodes can only be within children and + * are represented as strings. + * + * Intended use is to convert HTML structure and then recursively iterate over it + * most likely using a map. Very useful for dynamically rendering html replacing + * tags with JSX elements in a render function. + * + * known issue: doesn't handle CDATA so CDATA might not work well + * known issue: doesn't handle HTML comments + * + * @param {Object} input - input data + * @return {string} processed html + */ +export const convertHtmlToTree = (html) => { + // Elements that are implicitly self-closing + // https://developer.mozilla.org/en-US/docs/Glossary/empty_element + const emptyElements = new Set([ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', + 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' + ]) + // TODO For future - also parse HTML5 multi-source components? + + const buffer = [] // Current output buffer + const levels = [['', buffer]] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + const getCurrentBuffer = () => { + return levels[levels.length - 1][1] + } + + const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer === '') return + getCurrentBuffer().push(textBuffer) + textBuffer = '' + } + + const handleSelfClosing = (tag) => { + getCurrentBuffer().push([tag]) + } + + const handleOpen = (tag) => { + const curBuf = getCurrentBuffer() + const newLevel = [tag, []] + levels.push(newLevel) + curBuf.push(newLevel) + } + + const handleClose = (tag) => { + const currentTag = levels[levels.length - 1] + if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) { + currentTag.push(tag) + levels.pop() + } else { + getCurrentBuffer().push(tag) + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + flushText() + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') { + // self-closing + handleSelfClosing(tagFull) + } else { + handleOpen(tagFull) + } + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flushText() + return buffer +} + +// Extracts tag name from tag, i.e. <span a="b"> => span +export const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) +} + +export const processTextForEmoji = (text, emojis, processor) => { + const buffer = [] + let textBuffer = '' + for (let i = 0; i < text.length; i++) { + const char = text[i] + if (char === ':') { + const next = text.slice(i + 1) + let found = false + for (let emoji of emojis) { + if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { + found = emoji + break + } + } + if (found) { + buffer.push(textBuffer) + textBuffer = '' + buffer.push(processor(found)) + i += found.shortcode.length + 1 + } else { + textBuffer += char + } + } else { + textBuffer += char + } + } + if (textBuffer) buffer.push(textBuffer) + return buffer +} + +export const getAttrs = tag => { + const innertag = tag + .substring(1, tag.length - 1) + .replace(new RegExp('^' + getTagName(tag)), '') + .replace(/\/?$/, '') + .trim() + const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) + .map(([trash, key, value]) => [key, value]) + .map(([k, v]) => { + if (!v) return [k, true] + return [k, v.substring(1, v.length - 1)] + }) + return Object.fromEntries(attrs) +} diff --git a/src/services/mini_html_converter/mini_html_converter.service.js b/src/services/mini_html_converter/mini_html_converter.service.js @@ -1,138 +0,0 @@ -/** - * This is a not-so-tiny purpose-built HTML parser/processor. It was made for use - * with StatusBody component for purpose of replacing tags with vue components - * - * known issue: doesn't handle CDATA so nested CDATA might not work well - * - * @param {Object} input - input data - * @param {(string) => string} lineProcessor - function that will be called on every line - * @param {{ key[string]: (string) => string}} tagProcessor - map of processors for tags - * @return {string} processed html - */ -export const convertHtml = (html) => { - // Elements that are implicitly self-closing - // https://developer.mozilla.org/en-US/docs/Glossary/empty_element - const emptyElements = new Set([ - 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', - 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' - ]) - // TODO For future - also parse HTML5 multi-source components? - - const buffer = [] // Current output buffer - const levels = [['', buffer]] // How deep we are in tags and which tags were there - let textBuffer = '' // Current line content - let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag - - const getCurrentBuffer = () => { - return levels[levels.length - 1][1] - } - - const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer - if (textBuffer === '') return - getCurrentBuffer().push(textBuffer) - textBuffer = '' - } - - const handleSelfClosing = (tag) => { - getCurrentBuffer().push([tag]) - } - - const handleOpen = (tag) => { - const curBuf = getCurrentBuffer() - const newLevel = [tag, []] - levels.push(newLevel) - curBuf.push(newLevel) - } - - const handleClose = (tag) => { - const currentTag = levels[levels.length - 1] - if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) { - currentTag.push(tag) - levels.pop() - } else { - getCurrentBuffer().push(tag) - } - } - - for (let i = 0; i < html.length; i++) { - const char = html[i] - if (char === '<' && tagBuffer === null) { - flushText() - tagBuffer = char - } else if (char !== '>' && tagBuffer !== null) { - tagBuffer += char - } else if (char === '>' && tagBuffer !== null) { - tagBuffer += char - const tagFull = tagBuffer - tagBuffer = null - const tagName = getTagName(tagFull) - if (tagFull[1] === '/') { - handleClose(tagFull) - } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') { - // self-closing - handleSelfClosing(tagFull) - } else { - handleOpen(tagFull) - } - } else { - textBuffer += char - } - } - if (tagBuffer) { - textBuffer += tagBuffer - } - - flushText() - return buffer -} - -// Extracts tag name from tag, i.e. <span a="b"> => span -export const getTagName = (tag) => { - const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) - return result && (result[1] || result[2]) -} - -export const processTextForEmoji = (text, emojis, processor) => { - const buffer = [] - let textBuffer = '' - for (let i = 0; i < text.length; i++) { - const char = text[i] - if (char === ':') { - const next = text.slice(i + 1) - let found = false - for (let emoji of emojis) { - if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { - found = emoji - break - } - } - if (found) { - buffer.push(textBuffer) - textBuffer = '' - buffer.push(processor(found)) - i += found.shortcode.length + 1 - } else { - textBuffer += char - } - } else { - textBuffer += char - } - } - if (textBuffer) buffer.push(textBuffer) - return buffer -} - -export const getAttrs = tag => { - const innertag = tag - .substring(1, tag.length - 1) - .replace(new RegExp('^' + getTagName(tag)), '') - .replace(/\/?$/, '') - .trim() - const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) - .map(([trash, key, value]) => [key, value]) - .map(([k, v]) => { - if (!v) return [k, true] - return [k, v.substring(1, v.length - 1)] - }) - return Object.fromEntries(attrs) -} diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js @@ -1,94 +0,0 @@ -/** - * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and - * allows it to be processed, useful for greentexting, mostly - * - * known issue: doesn't handle CDATA so nested CDATA might not work well - * - * @param {Object} input - input data - * @param {(string) => string} processor - function that will be called on every line - * @return {string} processed html - */ -export const processHtml = (html, processor) => { - const handledTags = new Set(['p', 'br', 'div']) - const openCloseTags = new Set(['p', 'div']) - - let buffer = '' // Current output buffer - const level = [] // How deep we are in tags and which tags were there - let textBuffer = '' // Current line content - let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag - - // Extracts tag name from tag, i.e. <span a="b"> => span - const getTagName = (tag) => { - const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) - return result && (result[1] || result[2]) - } - - const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer - if (textBuffer.trim().length > 0) { - buffer += processor(textBuffer) - } else { - buffer += textBuffer - } - textBuffer = '' - } - - const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing - flush() - buffer += tag - } - - const handleOpen = (tag) => { // handles opening tags - flush() - buffer += tag - level.push(tag) - } - - const handleClose = (tag) => { // handles closing tags - flush() - buffer += tag - if (level[level.length - 1] === tag) { - level.pop() - } - } - - for (let i = 0; i < html.length; i++) { - const char = html[i] - if (char === '<' && tagBuffer === null) { - tagBuffer = char - } else if (char !== '>' && tagBuffer !== null) { - tagBuffer += char - } else if (char === '>' && tagBuffer !== null) { - tagBuffer += char - const tagFull = tagBuffer - tagBuffer = null - const tagName = getTagName(tagFull) - if (handledTags.has(tagName)) { - if (tagName === 'br') { - handleBr(tagFull) - } else if (openCloseTags.has(tagName)) { - if (tagFull[1] === '/') { - handleClose(tagFull) - } else if (tagFull[tagFull.length - 2] === '/') { - // self-closing - handleBr(tagFull) - } else { - handleOpen(tagFull) - } - } - } else { - textBuffer += tagFull - } - } else if (char === '\n') { - handleBr(char) - } else { - textBuffer += char - } - } - if (tagBuffer) { - textBuffer += tagBuffer - } - - flush() - - return buffer -} diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js @@ -0,0 +1,130 @@ +import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js' + +const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input + +describe('TinyPostHTMLProcessor', () => { + describe('with processor that keeps original line should not make any changes to HTML when', () => { + const processorKeep = (line) => line + it('fed with regular HTML with newlines', () => { + const inputOutput = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const inputOutput = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with very broken HTML with broken composition', () => { + const inputOutput = '</p> lmao what </div> whats going on <div> wha <p>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const inputOutput = 'just leaving a <div> hanging' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const inputOutput = 'do you expect me to finish this <div class=' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with dubiously valid HTML (p within p and also div inside p)', () => { + const inputOutput = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const inputOutput = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const inputOutput = 'Yes, it is me, <![CDATA[DIO]]>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + }) + describe('with processor that replaces lines with word "_" should match expected line when', () => { + const processorReplace = (line) => '_' + it('fed with regular HTML with newlines', () => { + const input = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' + const output = '_<br/>_<p class="lol">_</p>_\n_<p >_<br>_</p> <br>\n<br/>' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const input = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' + const output = '_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with very broken HTML with broken composition', () => { + const input = '</p> lmao what </div> whats going on <div> wha <p>' + const output = '</p>_</div>_<div>_<p>' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const input = 'just leaving a <div> hanging' + const output = '_<div>_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const input = 'do you expect me to finish this <div class=' + const output = '_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with dubiously valid HTML (p within p and also div inside p)', () => { + const input = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' + const output = '_<p>_\n_<p>_</p>_<br/><div>_</div></p>' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const input = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' + const output = '_<div class="what"/>_<p aria-label="wtf"/>_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const input = 'Yes, it is me, <![CDATA[DIO]]>' + const output = '_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + }) +}) diff --git a/test/unit/specs/services/html_converter/html_tree_converter.spec.js b/test/unit/specs/services/html_converter/html_tree_converter.spec.js @@ -0,0 +1,166 @@ +import { convertHtmlToTree, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js' + +describe('MiniHtmlConverter', () => { + describe('convertHtmlToTree', () => { + it('converts html into a tree structure', () => { + const input = '1 <p>2</p> <b>3<img src="a">4</b>5' + expect(convertHtmlToTree(input)).to.eql([ + '1 ', + [ + '<p>', + ['2'], + '</p>' + ], + ' ', + [ + '<b>', + [ + '3', + ['<img src="a">'], + '4' + ], + '</b>' + ], + '5' + ]) + }) + it('converts html to tree while preserving tag formatting', () => { + const input = '1 <p >2</p><b >3<img src="a">4</b>5' + expect(convertHtmlToTree(input)).to.eql([ + '1 ', + [ + '<p >', + ['2'], + '</p>' + ], + [ + '<b >', + [ + '3', + ['<img src="a">'], + '4' + ], + '</b>' + ], + '5' + ]) + }) + it('converts semi-broken html', () => { + const input = '1 <br> 2 <p> 42' + expect(convertHtmlToTree(input)).to.eql([ + '1 ', + ['<br>'], + ' 2 ', + [ + '<p>', + [' 42'] + ] + ]) + }) + it('realistic case 1', () => { + const input = '<p><span class="h-card"><a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">@<span>benis</span></a></span> <span class="h-card"><a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">@<span>hj</span></a></span> nice</p>' + expect(convertHtmlToTree(input)).to.eql([ + [ + '<p>', + [ + [ + '<span class="h-card">', + [ + [ + '<a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">', + [ + '@', + [ + '<span>', + [ + 'benis' + ], + '</span>' + ] + ], + '</a>' + ] + ], + '</span>' + ], + ' ', + [ + '<span class="h-card">', + [ + [ + '<a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">', + [ + '@', + [ + '<span>', + [ + 'hj' + ], + '</span>' + ] + ], + '</a>' + ] + ], + '</span>' + ], + ' nice' + ], + '</p>' + ] + ]) + }) + it('realistic case 2', () => { + const inputOutput = 'Country improv: give me a city<br/>Audience: Memphis<br/>Improv troupe: come on, a better one<br/>Audience: el paso' + expect(convertHtmlToTree(inputOutput)).to.eql([ + 'Country improv: give me a city', + [ + '<br/>' + ], + 'Audience: Memphis', + [ + '<br/>' + ], + 'Improv troupe: come on, a better one', + [ + '<br/>' + ], + 'Audience: el paso' + ]) + }) + }) + + describe('processTextForEmoji', () => { + it('processes all emoji in text', () => { + const input = 'Hello from finland! :lol: We have best water! :lmao:' + const emojis = [ + { shortcode: 'lol', src: 'LOL' }, + { shortcode: 'lmao', src: 'LMAO' } + ] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(input, emojis, processor)).to.eql([ + 'Hello from finland! ', + { shortcode: 'lol', src: 'LOL' }, + ' We have best water! ', + { shortcode: 'lmao', src: 'LMAO' } + ]) + }) + it('leaves text as is', () => { + const input = 'Number one: that\'s terror' + const emojis = [] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(input, emojis, processor)).to.eql([ + 'Number one: that\'s terror' + ]) + }) + }) + + describe('getAttrs', () => { + it('extracts arguments from tag', () => { + const input = '<img src="boop" cool ebin=\'true\'>' + const output = { src: 'boop', cool: true, ebin: 'true' } + + expect(getAttrs(input)).to.eql(output) + }) + }) +}) diff --git a/test/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js b/test/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js @@ -1,166 +0,0 @@ -import { convertHtml, processTextForEmoji, getAttrs } from 'src/services/mini_html_converter/mini_html_converter.service.js' - -describe('MiniHtmlConverter', () => { - describe('convertHtml', () => { - it('converts html into a tree structure', () => { - const input = '1 <p>2</p> <b>3<img src="a">4</b>5' - expect(convertHtml(input)).to.eql([ - '1 ', - [ - '<p>', - ['2'], - '</p>' - ], - ' ', - [ - '<b>', - [ - '3', - ['<img src="a">'], - '4' - ], - '</b>' - ], - '5' - ]) - }) - it('converts html to tree while preserving tag formatting', () => { - const input = '1 <p >2</p><b >3<img src="a">4</b>5' - expect(convertHtml(input)).to.eql([ - '1 ', - [ - '<p >', - ['2'], - '</p>' - ], - [ - '<b >', - [ - '3', - ['<img src="a">'], - '4' - ], - '</b>' - ], - '5' - ]) - }) - it('converts semi-broken html', () => { - const input = '1 <br> 2 <p> 42' - expect(convertHtml(input)).to.eql([ - '1 ', - ['<br>'], - ' 2 ', - [ - '<p>', - [' 42'] - ] - ]) - }) - it('realistic case 1', () => { - const input = '<p><span class="h-card"><a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">@<span>benis</span></a></span> <span class="h-card"><a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">@<span>hj</span></a></span> nice</p>' - expect(convertHtml(input)).to.eql([ - [ - '<p>', - [ - [ - '<span class="h-card">', - [ - [ - '<a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">', - [ - '@', - [ - '<span>', - [ - 'benis' - ], - '</span>' - ] - ], - '</a>' - ] - ], - '</span>' - ], - ' ', - [ - '<span class="h-card">', - [ - [ - '<a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">', - [ - '@', - [ - '<span>', - [ - 'hj' - ], - '</span>' - ] - ], - '</a>' - ] - ], - '</span>' - ], - ' nice' - ], - '</p>' - ] - ]) - }) - it('realistic case 2', () => { - const inputOutput = 'Country improv: give me a city<br/>Audience: Memphis<br/>Improv troupe: come on, a better one<br/>Audience: el paso' - expect(convertHtml(inputOutput)).to.eql([ - 'Country improv: give me a city', - [ - '<br/>' - ], - 'Audience: Memphis', - [ - '<br/>' - ], - 'Improv troupe: come on, a better one', - [ - '<br/>' - ], - 'Audience: el paso' - ]) - }) - }) - - describe('processTextForEmoji', () => { - it('processes all emoji in text', () => { - const input = 'Hello from finland! :lol: We have best water! :lmao:' - const emojis = [ - { shortcode: 'lol', src: 'LOL' }, - { shortcode: 'lmao', src: 'LMAO' } - ] - const processor = ({ shortcode, src }) => ({ shortcode, src }) - expect(processTextForEmoji(input, emojis, processor)).to.eql([ - 'Hello from finland! ', - { shortcode: 'lol', src: 'LOL' }, - ' We have best water! ', - { shortcode: 'lmao', src: 'LMAO' } - ]) - }) - it('leaves text as is', () => { - const input = 'Number one: that\'s terror' - const emojis = [] - const processor = ({ shortcode, src }) => ({ shortcode, src }) - expect(processTextForEmoji(input, emojis, processor)).to.eql([ - 'Number one: that\'s terror' - ]) - }) - }) - - describe('getAttrs', () => { - it('extracts arguments from tag', () => { - const input = '<img src="boop" cool ebin=\'true\'>' - const output = { src: 'boop', cool: true, ebin: 'true' } - - expect(getAttrs(input)).to.eql(output) - }) - }) -}) diff --git a/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js @@ -1,96 +0,0 @@ -import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js' - -describe('TinyPostHTMLProcessor', () => { - describe('with processor that keeps original line should not make any changes to HTML when', () => { - const processorKeep = (line) => line - it('fed with regular HTML with newlines', () => { - const inputOutput = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with possibly broken HTML with invalid tags/composition', () => { - const inputOutput = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with very broken HTML with broken composition', () => { - const inputOutput = '</p> lmao what </div> whats going on <div> wha <p>' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with sorta valid HTML but tags aren\'t closed', () => { - const inputOutput = 'just leaving a <div> hanging' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with not really HTML at this point... tags that aren\'t finished', () => { - const inputOutput = 'do you expect me to finish this <div class=' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with dubiously valid HTML (p within p and also div inside p)', () => { - const inputOutput = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with maybe valid HTML? self-closing divs and ps', () => { - const inputOutput = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - - it('fed with valid XHTML containing a CDATA', () => { - const inputOutput = 'Yes, it is me, <![CDATA[DIO]]>' - expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) - }) - }) - describe('with processor that replaces lines with word "_" should match expected line when', () => { - const processorReplace = (line) => '_' - it('fed with regular HTML with newlines', () => { - const input = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' - const output = '_<br/>_<p class="lol">_</p>_\n_<p >_<br>_</p> <br>\n<br/>' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with possibly broken HTML with invalid tags/composition', () => { - const input = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' - const output = '_' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with very broken HTML with broken composition', () => { - const input = '</p> lmao what </div> whats going on <div> wha <p>' - const output = '</p>_</div>_<div>_<p>' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with sorta valid HTML but tags aren\'t closed', () => { - const input = 'just leaving a <div> hanging' - const output = '_<div>_' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with not really HTML at this point... tags that aren\'t finished', () => { - const input = 'do you expect me to finish this <div class=' - const output = '_' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with dubiously valid HTML (p within p and also div inside p)', () => { - const input = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' - const output = '_<p>_\n_<p>_</p>_<br/><div>_</div></p>' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with maybe valid HTML? self-closing divs and ps', () => { - const input = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' - const output = '_<div class="what"/>_<p aria-label="wtf"/>_' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - - it('fed with valid XHTML containing a CDATA', () => { - const input = 'Yes, it is me, <![CDATA[DIO]]>' - const output = '_' - expect(processHtml(input, processorReplace)).to.eql(output) - }) - }) -})