commit: e17c2e5da5010beaefb46c8fffe35e87cb28f407
parent: 4a618908e836ecb94f70e99f2198ee7b3ba3b2ec
Author: Eugen Rochko <eugen@zeonfederated.com>
Date: Wed, 14 Jun 2017 18:01:35 +0200
Batched remove status service (#3735)
* Make Pubsubhubbub::DistributionWorker handle both single stream entry
arguments, as well as arrays of stream entries
* Add BatchedRemoveStatusService, make SuspendAccountService use it
* Improve method names
* Add test
* Add more tests
* Use PuSH payloads of 100 to have a clear mapping of
1000 input statuses -> 10 PuSH payloads
It was nice while it lasted
Diffstat:
5 files changed, 215 insertions(+), 22 deletions(-)
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
@@ -0,0 +1,114 @@
+# frozen_string_literal: true
+
+class BatchedRemoveStatusService < BaseService
+ include StreamEntryRenderer
+
+ # Delete given statuses and reblogs of them
+ # Dispatch PuSH updates of the deleted statuses, but only local ones
+ # Dispatch Salmon deletes, unique per domain, of the deleted statuses, but only local ones
+ # Remove statuses from home feeds
+ # Push delete events to streaming API for home feeds and public feeds
+ # @param [Status] statuses A preferably batched array of statuses
+ def call(statuses)
+ statuses = Status.where(id: statuses.map(&:id)).includes(:account, :stream_entry).flat_map { |status| [status] + status.reblogs.includes(:account, :stream_entry).to_a }
+
+ @mentions = statuses.map { |s| [s.id, s.mentions.includes(:account).to_a] }.to_h
+ @tags = statuses.map { |s| [s.id, s.tags.pluck(:name)] }.to_h
+
+ @stream_entry_batches = []
+ @salmon_batches = []
+ @json_payloads = statuses.map { |s| [s.id, Oj.dump(event: :delete, payload: s.id)] }.to_h
+
+ # Ensure that rendered XML reflects destroyed state
+ Status.where(id: statuses.map(&:id)).in_batches.destroy_all
+
+ # Batch by source account
+ statuses.group_by(&:account_id).each do |_, account_statuses|
+ account = account_statuses.first.account
+
+ unpush_from_home_timelines(account_statuses)
+ batch_stream_entries(account_statuses) if account.local?
+ end
+
+ # Cannot be batched
+ statuses.each do |status|
+ unpush_from_public_timelines(status)
+ batch_salmon_slaps(status) if status.local?
+ end
+
+ Pubsubhubbub::DistributionWorker.push_bulk(@stream_entry_batches) { |batch| batch }
+ NotificationWorker.push_bulk(@salmon_batches) { |batch| batch }
+ end
+
+ private
+
+ def batch_stream_entries(statuses)
+ stream_entry_ids = statuses.map { |s| s.stream_entry.id }
+
+ stream_entry_ids.each_slice(100) do |batch_of_stream_entry_ids|
+ @stream_entry_batches << [batch_of_stream_entry_ids]
+ end
+ end
+
+ def unpush_from_home_timelines(statuses)
+ account = statuses.first.account
+ recipients = account.followers.local.pluck(:id)
+
+ recipients << account.id if account.local?
+
+ recipients.each do |follower_id|
+ unpush(follower_id, statuses)
+ end
+ end
+
+ def unpush_from_public_timelines(status)
+ payload = @json_payloads[status.id]
+
+ redis.pipelined do
+ redis.publish('timeline:public', payload)
+ redis.publish('timeline:public:local', payload) if status.local?
+
+ @tags[status.id].each do |hashtag|
+ redis.publish("timeline:hashtag:#{hashtag}", payload)
+ redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
+ end
+ end
+ end
+
+ def batch_salmon_slaps(status)
+ return if @mentions[status.id].empty?
+
+ payload = stream_entry_to_xml(status.stream_entry.reload)
+ recipients = @mentions[status.id].map(&:account).reject(&:local?).uniq(&:domain).map(&:id)
+
+ recipients.each do |recipient_id|
+ @salmon_batches << [payload, status.account_id, recipient_id]
+ end
+ end
+
+ def unpush(follower_id, statuses)
+ key = FeedManager.instance.key(:home, follower_id)
+
+ originals = statuses.reject(&:reblog?)
+ reblogs = statuses.reject { |s| !s.reblog? }
+
+ # Quickly remove all originals
+ redis.pipelined do
+ originals.each do |status|
+ redis.zremrangebyscore(key, status.id, status.id)
+ redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
+ end
+ end
+
+ # For reblogs, re-add original status to feed, unless the reblog
+ # was not in the feed in the first place
+ reblogs.each do |status|
+ redis.zadd(key, status.reblog_of_id, status.reblog_of_id) unless redis.zscore(key, status.reblog_of_id).nil?
+ redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
+ end
+ end
+
+ def redis
+ Redis.current
+ end
+end
diff --git a/app/services/suspend_account_service.rb b/app/services/suspend_account_service.rb
@@ -17,9 +17,8 @@ class SuspendAccountService < BaseService
end
def purge_content
- @account.statuses.reorder(nil).find_each do |status|
- # This federates out deletes to previous followers
- RemoveStatusService.new.call(status)
+ @account.statuses.reorder(nil).find_in_batches do |statuses|
+ BatchedRemoveStatusService.new.call(statuses)
end
[
diff --git a/app/workers/pubsubhubbub/distribution_worker.rb b/app/workers/pubsubhubbub/distribution_worker.rb
@@ -5,24 +5,44 @@ class Pubsubhubbub::DistributionWorker
sidekiq_options queue: 'push'
- def perform(stream_entry_id)
- stream_entry = StreamEntry.find(stream_entry_id)
+ def perform(stream_entry_ids)
+ stream_entries = StreamEntry.where(id: stream_entry_ids).includes(:status).reject { |e| e.status&.direct_visibility? }
- return if stream_entry.status&.direct_visibility?
+ return if stream_entries.empty?
- @account = stream_entry.account
- @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, [stream_entry]))
+ @account = stream_entries.first.account
+ @subscriptions = active_subscriptions.to_a
+
+ distribute_public!(stream_entries.reject(&:hidden?))
+ distribute_hidden!(stream_entries.reject { |s| !s.hidden? })
+ end
+
+ private
+
+ def distribute_public!(stream_entries)
+ return if stream_entries.empty?
+
+ @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
+
+ Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions) do |subscription|
+ [subscription.id, @payload]
+ end
+ end
+
+ def distribute_hidden!(stream_entries)
+ return if stream_entries.empty?
+
+ @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
@domains = @account.followers_domains
- Subscription.where(account: @account).active.select('id, callback_url').find_each do |subscription|
- next if stream_entry.hidden? && !allowed_to_receive?(subscription.callback_url)
- Pubsubhubbub::DeliveryWorker.perform_async(subscription.id, @payload)
+ Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions.reject { |s| !allowed_to_receive?(s.callback_url) }) do |subscription|
+ [subscription.id, @payload]
end
- rescue ActiveRecord::RecordNotFound
- true
end
- private
+ def active_subscriptions
+ Subscription.where(account: @account).active.select('id, callback_url')
+ end
def allowed_to_receive?(callback_url)
@domains.include?(Addressable::URI.parse(callback_url).host)
diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb
@@ -0,0 +1,61 @@
+require 'rails_helper'
+
+RSpec.describe BatchedRemoveStatusService do
+ subject { BatchedRemoveStatusService.new }
+
+ let!(:alice) { Fabricate(:account) }
+ let!(:bob) { Fabricate(:account, username: 'bob', domain: 'example.com', salmon_url: 'http://example.com/salmon') }
+ let!(:jeff) { Fabricate(:account) }
+
+ let(:status1) { PostStatusService.new.call(alice, 'Hello @bob@example.com') }
+ let(:status2) { PostStatusService.new.call(alice, 'Another status') }
+
+ before do
+ allow(Redis.current).to receive_messages(publish: nil)
+
+ stub_request(:post, 'http://example.com/push').to_return(status: 200, body: '', headers: {})
+ stub_request(:post, 'http://example.com/salmon').to_return(status: 200, body: '', headers: {})
+
+ Fabricate(:subscription, account: alice, callback_url: 'http://example.com/push', confirmed: true, expires_at: 30.days.from_now)
+ jeff.follow!(alice)
+
+ status1
+ status2
+
+ subject.call([status1, status2])
+ end
+
+ it 'removes statuses from author\'s home feed' do
+ expect(Feed.new(:home, alice).get(10)).to_not include([status1.id, status2.id])
+ end
+
+ it 'removes statuses from local follower\'s home feed' do
+ expect(Feed.new(:home, jeff).get(10)).to_not include([status1.id, status2.id])
+ end
+
+ it 'notifies streaming API of followers' do
+ expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once)
+ end
+
+ it 'notifies streaming API of author' do
+ expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once)
+ end
+
+ it 'notifies streaming API of public timeline' do
+ expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once)
+ end
+
+ it 'sends PuSH update to PuSH subscribers with two payloads united' do
+ expect(a_request(:post, 'http://example.com/push').with { |req|
+ matches = req.body.scan(TagManager::VERBS[:delete])
+ matches.size == 2
+ }).to have_been_made
+ end
+
+ it 'sends Salmon slap to previously mentioned users' do
+ expect(a_request(:post, "http://example.com/salmon").with { |req|
+ xml = OStatus2::Salmon.new.unpack(req.body)
+ xml.match(TagManager::VERBS[:delete])
+ }).to have_been_made.once
+ end
+end
diff --git a/spec/workers/pubsubhubbub/distribution_worker_spec.rb b/spec/workers/pubsubhubbub/distribution_worker_spec.rb
@@ -16,10 +16,9 @@ describe Pubsubhubbub::DistributionWorker do
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :public) }
it 'delivers payload to all subscriptions' do
- allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
+ allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
subject.perform(status.stream_entry.id)
- expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/)
- expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(anonymous_subscription.id, /.*/)
+ expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([anonymous_subscription, subscription_with_follower])
end
end
@@ -27,10 +26,10 @@ describe Pubsubhubbub::DistributionWorker do
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :private) }
it 'delivers payload only to subscriptions with followers' do
- allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
+ allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
subject.perform(status.stream_entry.id)
- expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/)
- expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async).with(anonymous_subscription.id, /.*/)
+ expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([subscription_with_follower])
+ expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk).with([anonymous_subscription])
end
end
@@ -38,9 +37,9 @@ describe Pubsubhubbub::DistributionWorker do
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :direct) }
it 'does not deliver payload' do
- allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
+ allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
subject.perform(status.stream_entry.id)
- expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async)
+ expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk)
end
end
end