commit: 3ebc0ad4d3c2fe0b0951a334642b769bd521a799
parent: 235c14c79d620d47012a08425324df222a136457
Author: Eugen Rochko <eugen@zeonfederated.com>
Date: Fri, 9 Feb 2018 23:04:47 +0100
Full-text search for authorized statuses (#6423)
* Add full-text search for authorized statuses
- Search API will return statuses that match the query
- Only for logged in users
- Only if you are author of the status,
- Or you were mentioned in it
- Or you favourited or reblogged it
- Configuration over `ES_ENABLED`, `ES_HOST`, `ES_PORT`, `ES_PREFIX`
- Run `rails chewy:deploy` to create & populate index
Fix #5880
Fix #4293
Fix #1152
* Add commented out docker-compose configuration for ES container
* Optimize index import, filter search results
* Add basic normalization to the index
* Add better stemming and normalization to the index
* Skip webfinger request if search query includes both @ and a space
* Fix code style
* Visually separate search result sections
* Fix code style issues
Diffstat:
13 files changed, 230 insertions(+), 5 deletions(-)
diff --git a/.env.production.sample b/.env.production.sample
@@ -9,6 +9,10 @@ DB_USER=postgres
DB_NAME=postgres
DB_PASS=
DB_PORT=5432
+# Optional ElasticSearch configuration
+# ES_ENABLED=true
+# ES_HOST=localhost
+# ES_PORT=9200
# Federation
# Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation.
diff --git a/Gemfile b/Gemfile
@@ -27,6 +27,7 @@ gem 'bootsnap'
gem 'browser'
gem 'charlock_holmes', '~> 0.7.5'
gem 'iso-639'
+gem 'chewy', '~> 0.10', git: 'https://github.com/toptal/chewy.git'
gem 'cld3', '~> 3.2.0'
gem 'devise', '~> 4.4'
gem 'devise-two-factor', '~> 3.0'
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,3 +1,12 @@
+GIT
+ remote: https://github.com/toptal/chewy.git
+ revision: a7d21eb4b0bd7415533ef134bb6d31b2df309701
+ specs:
+ chewy (0.10.1)
+ activesupport (>= 4.0)
+ elasticsearch (>= 2.0.0)
+ elasticsearch-dsl
+
GEM
remote: https://rubygems.org/
specs:
@@ -154,6 +163,15 @@ GEM
json
thread
thread_safe
+ elasticsearch (6.0.1)
+ elasticsearch-api (= 6.0.1)
+ elasticsearch-transport (= 6.0.1)
+ elasticsearch-api (6.0.1)
+ multi_json
+ elasticsearch-dsl (0.1.5)
+ elasticsearch-transport (6.0.1)
+ faraday
+ multi_json
encryptor (3.0.0)
erubi (1.7.0)
et-orbi (1.0.8)
@@ -163,6 +181,8 @@ GEM
fabrication (2.18.0)
faker (1.8.4)
i18n (~> 0.5)
+ faraday (0.14.0)
+ multipart-post (>= 1.2, < 3)
fast_blank (1.0.0)
ffi (1.9.18)
fog-core (1.45.0)
@@ -291,6 +311,7 @@ GEM
minitest (5.11.3)
msgpack (1.1.0)
multi_json (1.12.2)
+ multipart-post (2.0.0)
net-scp (1.2.1)
net-ssh (>= 2.6.5)
net-ssh (4.2.0)
@@ -583,6 +604,7 @@ DEPENDENCIES
capistrano-yarn (~> 2.0)
capybara (~> 2.15)
charlock_holmes (~> 0.7.5)
+ chewy (~> 0.10)!
cld3 (~> 3.2.0)
climate_control (~> 0.2)
devise (~> 4.4)
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+class StatusesIndex < Chewy::Index
+ settings index: { refresh_interval: '15m' }, analysis: {
+ filter: {
+ english_stop: {
+ type: 'stop',
+ stopwords: '_english_',
+ },
+ english_stemmer: {
+ type: 'stemmer',
+ language: 'english',
+ },
+ english_possessive_stemmer: {
+ type: 'stemmer',
+ language: 'possessive_english',
+ },
+ },
+ analyzer: {
+ content: {
+ tokenizer: 'uax_url_email',
+ filter: %w(
+ english_possessive_stemmer
+ lowercase
+ asciifolding
+ cjk_width
+ english_stop
+ english_stemmer
+ ),
+ },
+ },
+ }
+
+ define_type ::Status.without_reblogs do
+ crutch :mentions do |collection|
+ data = ::Mention.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
+
+ crutch :favourites do |collection|
+ data = ::Favourite.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
+
+ crutch :reblogs do |collection|
+ data = ::Status.where(reblog_of_id: collection.map(&:id)).pluck(:reblog_of_id, :account_id)
+ data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+ end
+
+ root date_detection: false do
+ field :account_id, type: 'long'
+
+ field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].join("\n\n") } do
+ field :stemmed, type: 'text', analyzer: 'content'
+ end
+
+ field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
+ field :created_at, type: 'date'
+ end
+ end
+end
diff --git a/app/javascript/mastodon/features/compose/components/search_results.js b/app/javascript/mastodon/features/compose/components/search_results.js
@@ -22,6 +22,8 @@ export default class SearchResults extends ImmutablePureComponent {
count += results.get('accounts').size;
accounts = (
<div className='search-results__section'>
+ <h5><FormattedMessage id='search_results.accounts' defaultMessage='People' /></h5>
+
{results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)}
</div>
);
@@ -31,6 +33,8 @@ export default class SearchResults extends ImmutablePureComponent {
count += results.get('statuses').size;
statuses = (
<div className='search-results__section'>
+ <h5><FormattedMessage id='search_results.statuses' defaultMessage='Toots' /></h5>
+
{results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)}
</div>
);
@@ -40,6 +44,8 @@ export default class SearchResults extends ImmutablePureComponent {
count += results.get('hashtags').size;
hashtags = (
<div className='search-results__section'>
+ <h5><FormattedMessage id='search_results.hashtags' defaultMessage='Hashtags' /></h5>
+
{results.get('hashtags').map(hashtag => (
<Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}>
#{hashtag}
diff --git a/app/javascript/styles/mastodon/components.scss b/app/javascript/styles/mastodon/components.scss
@@ -1786,7 +1786,7 @@
flex: 1;
min-height: 47px;
- > img {
+ > img {
display: block;
object-fit: contain;
object-position: bottom left;
@@ -3229,6 +3229,43 @@
font-weight: 500;
}
+.search-results__section {
+ margin-bottom: 20px;
+
+ h5 {
+ position: relative;
+
+ &::before {
+ content: "";
+ display: block;
+ position: absolute;
+ left: 0;
+ right: 0;
+ top: 50%;
+ width: 100%;
+ height: 0;
+ border-top: 1px solid lighten($ui-base-color, 8%);
+ }
+
+ span {
+ display: inline-block;
+ background: $ui-base-color;
+ color: $ui-primary-color;
+ font-size: 14px;
+ font-weight: 500;
+ padding: 10px;
+ position: relative;
+ z-index: 1;
+ cursor: default;
+ }
+ }
+
+ .account:last-child,
+ & > div:last-child .status {
+ border-bottom: 0;
+ }
+}
+
.search-results__hashtag {
display: block;
padding: 10px;
diff --git a/app/lib/status_filter.rb b/app/lib/status_filter.rb
@@ -9,6 +9,7 @@ class StatusFilter
end
def filtered?
+ return false if !account.nil? && account.id == status.account_id
blocked_by_policy? || (account_present? && filtered_status?) || silenced_account?
end
diff --git a/app/models/favourite.rb b/app/models/favourite.rb
@@ -13,6 +13,8 @@
class Favourite < ApplicationRecord
include Paginable
+ update_index('statuses#status', :status) if Chewy.enabled?
+
belongs_to :account, inverse_of: :favourites
belongs_to :status, inverse_of: :favourites, counter_cache: true
diff --git a/app/models/status.rb b/app/models/status.rb
@@ -31,6 +31,8 @@ class Status < ApplicationRecord
include Cacheable
include StatusThreadingConcern
+ update_index('statuses#status', :proper) if Chewy.enabled?
+
enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility
belongs_to :application, class_name: 'Doorkeeper::Application', optional: true
@@ -78,6 +80,22 @@ class Status < ApplicationRecord
delegate :domain, to: :account, prefix: true
+ def searchable_by(preloaded = nil)
+ ids = [account_id]
+
+ if preloaded.nil?
+ ids += mentions.pluck(:account_id)
+ ids += favourites.pluck(:account_id)
+ ids += reblogs.pluck(:account_id)
+ else
+ ids += preloaded.mentions[id] || []
+ ids += preloaded.favourites[id] || []
+ ids += preloaded.reblogs[id] || []
+ end
+
+ ids.uniq
+ end
+
def reply?
!in_reply_to_id.nil? || attributes['reply']
end
diff --git a/app/services/search_service.rb b/app/services/search_service.rb
@@ -1,21 +1,43 @@
# frozen_string_literal: true
class SearchService < BaseService
- attr_accessor :query
+ attr_accessor :query, :account, :limit, :resolve
def call(query, limit, resolve = false, account = nil)
- @query = query
+ @query = query
+ @account = account
+ @limit = limit
+ @resolve = resolve
default_results.tap do |results|
if url_query?
results.merge!(url_resource_results) unless url_resource.nil?
elsif query.present?
- results[:accounts] = AccountSearchService.new.call(query, limit, account, resolve: resolve)
- results[:hashtags] = Tag.search_for(query.gsub(/\A#/, ''), limit) unless query.start_with?('@')
+ results[:accounts] = perform_accounts_search! if account_searchable?
+ results[:statuses] = perform_statuses_search! if full_text_searchable?
+ results[:hashtags] = perform_hashtags_search! if hashtag_searchable?
end
end
end
+ private
+
+ def perform_accounts_search!
+ AccountSearchService.new.call(query, limit, account, resolve: resolve)
+ end
+
+ def perform_statuses_search!
+ statuses = StatusesIndex.filter(term: { searchable_by: account.id })
+ .query(multi_match: { type: 'most_fields', query: query, operator: 'and', fields: %w(text text.stemmed) })
+ .limit(limit).objects
+
+ statuses.reject { |status| StatusFilter.new(status, account).filtered? }
+ end
+
+ def perform_hashtags_search!
+ Tag.search_for(query.gsub(/\A#/, ''), limit)
+ end
+
def default_results
{ accounts: [], hashtags: [], statuses: [] }
end
@@ -35,4 +57,17 @@ class SearchService < BaseService
def url_resource_symbol
url_resource.class.name.downcase.pluralize.to_sym
end
+
+ def full_text_searchable?
+ return false unless Chewy.enabled?
+ !account.nil? && !((query.start_with?('#') || query.include?('@')) && !query.include?(' '))
+ end
+
+ def account_searchable?
+ !(query.include?('@') && query.include?(' '))
+ end
+
+ def hashtag_searchable?
+ !query.include?('@')
+ end
end
diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb
@@ -0,0 +1,22 @@
+enabled = ENV['ES_ENABLED'] == 'true'
+host = ENV.fetch('ES_HOST') { 'localhost' }
+port = ENV.fetch('ES_PORT') { 9200 }
+fallback_prefix = ENV.fetch('REDIS_NAMESPACE') { nil }
+prefix = ENV.fetch('ES_PREFIX') { fallback_prefix }
+
+Chewy.settings = {
+ host: "#{host}:#{port}",
+ prefix: prefix,
+ enabled: enabled,
+ journal: false,
+}
+
+Chewy.root_strategy = enabled ? :sidekiq : :bypass
+
+module Chewy
+ class << self
+ def enabled?
+ settings[:enabled]
+ end
+ end
+end
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -19,6 +19,17 @@ services:
# volumes:
# - ./redis:/data
+# es:
+# restart: always
+# image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.1.3
+# environment:
+# - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
+# networks:
+# - internal_network
+#### Uncomment to enable ES persistance
+## volumes:
+## - ./elasticsearch:/usr/share/elasticsearch/data
+
web:
build: .
image: gargron/mastodon
@@ -33,6 +44,7 @@ services:
depends_on:
- db
- redis
+# - es
volumes:
- ./public/assets:/mastodon/public/assets
- ./public/packs:/mastodon/public/packs
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
@@ -25,6 +25,10 @@ RSpec.configure do |config|
end
end
+ config.before :suite do
+ Chewy.strategy(:bypass)
+ end
+
config.after :suite do
gc_counter = 0
FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"])