commit: 70891a99a97bc1ca14a8ded13a5cd45b648b92b3
parent: 3572f4423f1b0a50c06129a106a0dc599cba0335
Author: abcang <>
Date: Wed, 19 Apr 2017 21:52:18 +0900
Fix html escape characters in the URL (#2138)
* fix character escaping in URL
* add tests
* put a comma after the last item
* add HTML escape test
2 files changed, 84 insertions(+), 19 deletions(-)
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
@@ -13,10 +13,9 @@ class Formatter
return reformat(status.content) unless status.local?
html = status.text
- html = encode(html)
+ html = encode_and_link_urls(html)
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
- html = link_urls(html)
html = link_mentions(html, status.mentions)
html = link_hashtags(html)
@@ -35,8 +34,7 @@ class Formatter
def simplified_format(account)
return reformat(account.note) unless account.local?
- html = encode(account.note)
- html = link_urls(html)
+ html = encode_and_link_urls(account.note)
html = link_accounts(html)
html = link_hashtags(html)
@@ -49,6 +47,26 @@ class Formatter
+ def encode_and_link_urls(html)
+ entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false)
+ entities = entities.sort_by { |entity| entity[:indices].first }
+ chars = html.to_s.to_char_a
+ html_attrs = {
+ target: '_blank',
+ rel: 'nofollow noopener',
+ }
+ result = ''
+ last_index = entities.reduce(0) do |index, entity|
+ indices = entity[:indices]
+ result += encode(chars[index...indices.first].join)
+ result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), entity[:url], html_attrs)
+ indices.last
+ end
+ result += encode(chars[last_index..-1].join)
+ end
def link_urls(html)
Twitter::Autolink.auto_link_urls(html, url_target: '_blank',
link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' },
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
@@ -2,7 +2,8 @@ require 'rails_helper'
RSpec.describe Formatter do
let(:account) { Fabricate(:account, username: 'alice') }
- let(:local_status) { Fabricate(:status, text: 'Hello world', account: account) }
+ let(:local_text) { 'Hello world' }
+ let(:local_status) { Fabricate(:status, text: local_text, account: account) }
let(:remote_status) { Fabricate(:status, text: '<script>alert("Hello")</script> Beep boop', uri: 'beepboop', account: account) }
describe '#format' do
@@ -20,35 +21,81 @@ RSpec.describe Formatter do
expect(subject).to match('<a href="" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class=""></span><span class="invisible"></span></a>')
- it 'matches a stand-alone medium URL' do
- expect(subject.match('')[0]).to eq ''
+ context 'matches a stand-alone medium URL' do
+ let(:local_text) { '' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
- it 'matches a stand-alone google URL' do
- expect(subject.match('')[0]).to eq ''
+ context 'matches a stand-alone google URL' do
+ let(:local_text) { '' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
- it 'matches a URL without trailing period' do
- expect(subject.match(' ')[0]).to eq ''
+ context 'matches a URL without trailing period' do
+ let(:local_text) { ' ' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
it 'matches a URL without closing paranthesis' do
expect(subject.match('(')[0]).to eq ''
+ context 'matches a URL without exclamation point' do
+ let(:local_text) { '!' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
+ end
- it 'matches a URL without exclamation point' do
- expect(subject.match('! ')[0]).to eq ''
+ context 'matches a URL without single quote' do
+ let(:local_text) { "'" }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
- it 'matches a URL with a query string' do
- expect(subject.match('')[0]).to eq ''
+ context 'matches a URL without angle brackets' do
+ let(:local_text) { '>' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
- it 'matches a URL with parenthesis in it' do
- expect(subject.match('')[0]).to eq ''
+ context 'matches a URL with a query string' do
+ let(:local_text) { '' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
+ end
+ context 'matches a URL with parenthesis in it' do
+ let(:local_text) { '' }
+ it 'has valid url' do
+ expect(subject).to include('href=""')
+ end
+ end
+ context 'contains html (script tag)' do
+ let(:local_text) { '<script>alert("Hello")</script>' }
+ it 'has valid url' do
+ expect(subject).to match '<p><script>alert("Hello")</script></p>'
+ end
+ end
+ context 'contains html (xss attack)' do
+ let(:local_text) { %q{<img src="javascript:alert('XSS');">} }
+ it 'has valid url' do
+ expect(subject).to match '<p><img src="javascript:alert('XSS');"></p>'
+ end
describe '#reformat' do