Fix parsing non-ascii tags - pleroma-fe - My custom branche(s) on git.pleroma.social/pleroma/pleroma-fe

commit: 0b0b1dabdfa2051d0b1d1ad846892da4801ca588
parent 510392e4ca67fee4bf6fed9346f1ff708966e734
Author: tusooa <tusooa@kazv.moe>
Date:   Fri, 21 Jul 2023 13:54:10 -0400

Fix parsing non-ascii tags

Diffstat:
A changelog.d/nonascii-tags.fix 1 +
M src/services/matcher/matcher.service.js 7 +++++--
M test/unit/specs/services/matcher/matcher.spec.js 6 ++++++

3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/changelog.d/nonascii-tags.fix b/changelog.d/nonascii-tags.fix
@@ -0,0 +1 @@
+Fix parsing non-ascii tags
diff --git a/src/services/matcher/matcher.service.js b/src/services/matcher/matcher.service.js
@@ -14,8 +14,11 @@ export const mentionMatchesUrl = (attention, url) => {
  * @param {string} url
  */
 export const extractTagFromUrl = (url) => {
-  const regex = /tag[s]*\/(\w+)$/g
-  const result = regex.exec(url)
+  const decoded = decodeURI(url)
+  // https://git.pleroma.social/pleroma/elixir-libraries/linkify/-/blob/master/lib/linkify/parser.ex
+  // https://www.pcre.org/original/doc/html/pcrepattern.html
+  const regex = /tag[s]*\/([\p{L}\p{N}_]*[\p{Alphabetic}_·\u{200c}][\p{L}\p{N}_·\p{M}\u{200c}]*)$/ug
+  const result = regex.exec(decoded)
   if (!result) {
     return false
   }
diff --git a/test/unit/specs/services/matcher/matcher.spec.js b/test/unit/specs/services/matcher/matcher.spec.js
@@ -78,5 +78,11 @@ describe('MatcherService', () => {
 
       expect(MatcherService.extractTagFromUrl(url)).to.eql(false)
     })
+
+    it('should return tag name from non-ascii tags', () => {
+      const url = encodeURI('https://website.com/tag/喵喵喵')
+
+      expect(MatcherService.extractTagFromUrl(url)).to.eql('喵喵喵')
+    })
   })
 })

A	changelog.d/nonascii-tags.fix	1	+
M	src/services/matcher/matcher.service.js	7	+++++--
M	test/unit/specs/services/matcher/matcher.spec.js	6	++++++