commit: e4af4898de8ab962bf39ced5d31d88e3fd510538
parent: 6d70a8026309c2d41e2402ec84875ced8e181105
Author: Eugen <eugen@zeonfederated.com>
Date: Sun, 16 Apr 2017 20:32:17 +0200
Add language detection (#1772)
* Add language detection via WhatLanguage and (de)serialization of it through Atom
* Fix default language in ProcessFeedService
* Re-add newline before 'react-rails' Gem to fix groupings
Fixes Code Climate issue
Diffstat:
7 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/Gemfile b/Gemfile
@@ -56,6 +56,7 @@ gem 'sprockets-rails', :require => 'sprockets/railtie'
gem 'statsd-instrument'
gem 'twitter-text'
gem 'tzinfo-data'
+gem 'whatlanguage'
gem 'react-rails'
gem 'browserify-rails'
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -455,6 +455,7 @@ GEM
websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2)
+ whatlanguage (1.0.6)
xpath (2.0.0)
nokogiri (~> 1.3)
@@ -541,6 +542,7 @@ DEPENDENCIES
tzinfo-data
uglifier (>= 1.3.0)
webmock
+ whatlanguage
RUBY VERSION
ruby 2.4.1p111
diff --git a/app/lib/atom_serializer.rb b/app/lib/atom_serializer.rb
@@ -327,8 +327,8 @@ class AtomSerializer
end
def serialize_status_attributes(entry, status)
- append_element(entry, 'summary', status.spoiler_text) if status.spoiler_text?
- append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html')
+ append_element(entry, 'summary', status.spoiler_text, 'xml:lang': status.language) if status.spoiler_text?
+ append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html', 'xml:lang': status.language)
status.mentions.each do |mentioned|
append_element(entry, 'link', nil, rel: :mentioned, 'ostatus:object-type': TagManager::TYPES[:person], href: TagManager.instance.uri_for(mentioned.account))
diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb
@@ -19,6 +19,7 @@ class PostStatusService < BaseService
sensitive: options[:sensitive],
spoiler_text: options[:spoiler_text] || '',
visibility: options[:visibility],
+ language: detect_language(text),
application: options[:application])
attach_media(status, media)
@@ -51,6 +52,10 @@ class PostStatusService < BaseService
media.update(status_id: status.id)
end
+ def detect_language(text)
+ WhatLanguage.new(:all).language_iso(text)
+ end
+
def process_mentions_service
@process_mentions_service ||= ProcessMentionsService.new
end
diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb
@@ -119,6 +119,7 @@ class ProcessFeedService < BaseService
spoiler_text: content_warning(entry),
created_at: published(entry),
reply: thread?(entry),
+ language: content_language(entry),
visibility: visibility_scope(entry)
)
@@ -238,6 +239,10 @@ class ProcessFeedService < BaseService
xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS).content
end
+ def content_language(xml = @xml)
+ xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS)['xml:lang']&.presence || 'en'
+ end
+
def content_warning(xml = @xml)
xml.at_xpath('./xmlns:summary', xmlns: TagManager::XMLNS)&.content || ''
end
diff --git a/db/migrate/20170414132105_add_language_to_statuses.rb b/db/migrate/20170414132105_add_language_to_statuses.rb
@@ -0,0 +1,5 @@
+class AddLanguageToStatuses < ActiveRecord::Migration[5.0]
+ def change
+ add_column :statuses, :language, :string, null: false, default: 'en'
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20170414080609) do
+ActiveRecord::Schema.define(version: 20170414132105) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -40,7 +40,6 @@ ActiveRecord::Schema.define(version: 20170414080609) do
t.datetime "header_updated_at"
t.string "avatar_remote_url"
t.datetime "subscription_expires_at"
- t.datetime "last_webfingered_at"
t.boolean "silenced", default: false, null: false
t.boolean "suspended", default: false, null: false
t.boolean "locked", default: false, null: false
@@ -48,6 +47,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do
t.integer "statuses_count", default: 0, null: false
t.integer "followers_count", default: 0, null: false
t.integer "following_count", default: 0, null: false
+ t.datetime "last_webfingered_at"
t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", using: :btree
t.index ["url"], name: "index_accounts_on_url", using: :btree
@@ -244,6 +244,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do
t.boolean "reply", default: false
t.integer "favourites_count", default: 0, null: false
t.integer "reblogs_count", default: 0, null: false
+ t.string "language", default: "en", null: false
t.index ["account_id"], name: "index_statuses_on_account_id", using: :btree
t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id", using: :btree
t.index ["reblog_of_id"], name: "index_statuses_on_reblog_of_id", using: :btree