commit: 8015fd76003419bd3ccca585ffe4416312800fe0
parent: 4919b89ab83f8eee43b89ff4786eae46bf2fa30e
Author: Eugen Rochko <eugen@zeonfederated.com>
Date: Sat, 10 Jun 2017 15:06:50 +0200
Improve RTL detection (#3682)
- Use plaintext
- Strip out URLs
- Strip out mentions
- Strip out hashtags
- Strip out whitespace from "overall" count
- Consistent between JS and Ruby
Diffstat:
5 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/app/helpers/stream_entries_helper.rb b/app/helpers/stream_entries_helper.rb
@@ -47,11 +47,16 @@ module StreamEntriesHelper
end
end
+ def rtl_status?(status)
+ status.local? ? rtl?(status.text) : rtl?(strip_tags(status.text))
+ end
+
def rtl?(text)
+ text = simplified_text(text)
rtl_characters = /[\p{Hebrew}|\p{Arabic}|\p{Syriac}|\p{Thaana}|\p{Nko}]+/m.match(text)
if rtl_characters.present?
- total_size = text.strip.size.to_f
+ total_size = text.size.to_f
rtl_size(rtl_characters.to_a) / total_size > 0.3
else
false
@@ -60,6 +65,18 @@ module StreamEntriesHelper
private
+ def simplified_text(text)
+ text.dup.tap do |new_text|
+ URI.extract(new_text).each do |url|
+ new_text.gsub!(url, '')
+ end
+
+ new_text.gsub!(Account::MENTION_RE, '')
+ new_text.gsub!(Tag::HASHTAG_RE, '')
+ new_text.gsub!(/\s+/, '')
+ end
+ end
+
def rtl_size(characters)
characters.reduce(0) { |acc, elem| acc + elem.size }.to_f
end
diff --git a/app/javascript/mastodon/components/status_content.js b/app/javascript/mastodon/components/status_content.js
@@ -100,7 +100,7 @@ class StatusContent extends React.PureComponent {
const spoilerContent = { __html: emojify(escapeTextContentForBrowser(status.get('spoiler_text', ''))) };
const directionStyle = { direction: 'ltr' };
- if (isRtl(status.get('content'))) {
+ if (isRtl(status.get('search_index'))) {
directionStyle.direction = 'rtl';
}
diff --git a/app/javascript/mastodon/rtl.js b/app/javascript/mastodon/rtl.js
@@ -17,11 +17,15 @@ export function isRtl(text) {
return false;
}
+ text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, '');
+ text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, '');
+ text = text.replace(/\s+/g, '');
+
const matches = text.match(rtlChars);
if (!matches) {
return false;
}
- return matches.length / text.trim().length > 0.3;
+ return matches.length / text.length > 0.3;
};
diff --git a/app/views/stream_entries/_detailed_status.html.haml b/app/views/stream_entries/_detailed_status.html.haml
@@ -12,7 +12,7 @@
%p{ style: 'margin-bottom: 0' }<
%span.p-summary> #{status.spoiler_text}
%a.status__content__spoiler-link{ href: '#' }= t('statuses.show_more')
- .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl?(status.content) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status)
+ .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status)
- unless status.media_attachments.empty?
- if status.media_attachments.first.video?
diff --git a/app/views/stream_entries/_simple_status.html.haml b/app/views/stream_entries/_simple_status.html.haml
@@ -18,7 +18,7 @@
%p{ style: 'margin-bottom: 0' }<
%span.p-summary> #{status.spoiler_text}
%a.status__content__spoiler-link{ href: '#' }= t('statuses.show_more')
- .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl?(status.content) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status)
+ .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status)
- unless status.media_attachments.empty?
.status__attachments