commit: 61ed133fea80041b354c78b043cec72dd8644101
parent: c1e77b56a92fc075f000af9c263c72ba6bdbe5f7
Author: Eugen Rochko <eugen@zeonfederated.com>
Date: Wed, 21 Feb 2018 23:21:32 +0100
Account archive download (#6460)
* Fix #201: Account archive download
* Export actor and private key in the archive
* Optimize BackupService
- Add conversation to cached associations of status, because
somehow it was forgotten and is source of N+1 queries
- Explicitly call GC between batches of records being fetched
(Model class allocations are the worst offender)
- Stream media files into the tar in 1MB chunks
(Do not allocate media file (up to 8MB) as string into memory)
- Use #bytesize instead of #size to calculate file size for JSON
(Fix FileOverflow error)
- Segment media into subfolders by status ID because apparently
GIF-to-MP4 media are all named "media.mp4" for some reason
* Keep uniquely generated filename in Paperclip::GifTranscoder
* Ensure dumped files do not overwrite each other by maintaing directory partitions
* Give tar archives a good name
* Add scheduler to remove week-old backups
* Fix code style issue
Diffstat:
27 files changed, 375 insertions(+), 7 deletions(-)
diff --git a/Gemfile b/Gemfile
@@ -116,6 +116,7 @@ group :development do
gem 'bullet', '~> 5.5'
gem 'letter_opener', '~> 1.4'
gem 'letter_opener_web', '~> 1.3'
+ gem 'memory_profiler'
gem 'rubocop', require: false
gem 'brakeman', '~> 4.0', require: false
gem 'bundler-audit', '~> 0.6', require: false
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -301,6 +301,7 @@ GEM
mini_mime (>= 0.1.1)
mario-redis-lock (1.2.0)
redis (~> 3, >= 3.0.5)
+ memory_profiler (0.9.10)
method_source (0.9.0)
microformats (4.0.7)
json
@@ -664,6 +665,7 @@ DEPENDENCIES
link_header (~> 0.0)
lograge (~> 0.7)
mario-redis-lock (~> 1.2)
+ memory_profiler
microformats (~> 4.0)
mime-types (~> 3.1)
nokogiri (~> 1.8)
diff --git a/app/controllers/settings/exports_controller.rb b/app/controllers/settings/exports_controller.rb
@@ -1,11 +1,23 @@
# frozen_string_literal: true
class Settings::ExportsController < ApplicationController
+ include Authorization
+
layout 'admin'
before_action :authenticate_user!
def show
- @export = Export.new(current_account)
+ @export = Export.new(current_account)
+ @backups = current_user.backups
+ end
+
+ def create
+ authorize :backup, :create?
+
+ backup = current_user.backups.create!
+ BackupWorker.perform_async(backup.id)
+
+ redirect_to settings_export_path
end
end
diff --git a/app/javascript/images/icon_file_download.svg b/app/javascript/images/icon_file_download.svg
@@ -0,0 +1,4 @@
+<svg fill="#FFFFFF" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
+ <path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/>
+ <path d="M0 0h24v24H0z" fill="none"/>
+</svg>+
\ No newline at end of file
diff --git a/app/javascript/images/mailer/icon_file_download.png b/app/javascript/images/mailer/icon_file_download.png
Binary files differ.
diff --git a/app/mailers/user_mailer.rb b/app/mailers/user_mailer.rb
@@ -66,4 +66,16 @@ class UserMailer < Devise::Mailer
mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject')
end
end
+
+ def backup_ready(user, backup)
+ @resource = user
+ @instance = Rails.configuration.x.local_domain
+ @backup = backup
+
+ return if @resource.disabled?
+
+ I18n.with_locale(@resource.locale || I18n.default_locale) do
+ mail to: @resource.email, subject: I18n.t('user_mailer.backup_ready.subject')
+ end
+ end
end
diff --git a/app/models/backup.rb b/app/models/backup.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+# == Schema Information
+#
+# Table name: backups
+#
+# id :integer not null, primary key
+# user_id :integer
+# dump_file_name :string
+# dump_content_type :string
+# dump_file_size :integer
+# dump_updated_at :datetime
+# processed :boolean default(FALSE), not null
+# created_at :datetime not null
+# updated_at :datetime not null
+#
+
+class Backup < ApplicationRecord
+ belongs_to :user, inverse_of: :backups
+
+ has_attached_file :dump
+ do_not_validate_attachment_file_type :dump
+end
diff --git a/app/models/status.rb b/app/models/status.rb
@@ -76,7 +76,7 @@ class Status < ApplicationRecord
scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) }
scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) }
- cache_associated :account, :application, :media_attachments, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, mentions: :account], thread: :account
+ cache_associated :account, :application, :media_attachments, :conversation, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, :conversation, mentions: :account], thread: :account
delegate :domain, to: :account, prefix: true
diff --git a/app/models/user.rb b/app/models/user.rb
@@ -60,6 +60,7 @@ class User < ApplicationRecord
accepts_nested_attributes_for :account
has_many :applications, class_name: 'Doorkeeper::Application', as: :owner
+ has_many :backups, inverse_of: :user
validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale?
validates_with BlacklistedEmailValidator, if: :email_changed?
diff --git a/app/policies/application_policy.rb b/app/policies/application_policy.rb
@@ -15,4 +15,8 @@ class ApplicationPolicy
def current_user
current_account&.user
end
+
+ def user_signed_in?
+ !current_user.nil?
+ end
end
diff --git a/app/policies/backup_policy.rb b/app/policies/backup_policy.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class BackupPolicy < ApplicationPolicy
+ MIN_AGE = 1.week
+
+ def create?
+ user_signed_in? && current_user.backups.where('created_at >= ?', MIN_AGE.ago).count.zero?
+ end
+end
diff --git a/app/serializers/activitypub/collection_serializer.rb b/app/serializers/activitypub/collection_serializer.rb
@@ -13,8 +13,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer
attribute :part_of, if: -> { object.part_of.present? }
has_one :first, if: -> { object.first.present? }
- has_many :items, key: :items, if: -> { (object.items.present? || page?) && !ordered? }
- has_many :items, key: :ordered_items, if: -> { (object.items.present? || page?) && ordered? }
+ has_many :items, key: :items, if: -> { (!object.items.nil? || page?) && !ordered? }
+ has_many :items, key: :ordered_items, if: -> { (!object.items.nil? || page?) && ordered? }
def type
if page?
diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb
@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+
+require 'rubygems/package'
+
+class BackupService < BaseService
+ attr_reader :account, :backup, :collection
+
+ def call(backup)
+ @backup = backup
+ @account = backup.user.account
+
+ build_json!
+ build_archive!
+ end
+
+ private
+
+ def build_json!
+ @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer)
+
+ account.statuses.with_includes.find_in_batches do |statuses|
+ statuses.each do |status|
+ item = serialize(status, ActivityPub::ActivitySerializer)
+ item.delete(:'@context')
+
+ unless item[:type] == 'Announce' || item[:object][:attachment].blank?
+ item[:object][:attachment].each do |attachment|
+ attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '')
+ end
+ end
+
+ @collection[:orderedItems] << item
+ end
+
+ GC.start
+ end
+ end
+
+ def build_archive!
+ tmp_file = Tempfile.new(%w(archive .tar.gz))
+
+ File.open(tmp_file, 'wb') do |file|
+ Zlib::GzipWriter.wrap(file) do |gz|
+ Gem::Package::TarWriter.new(gz) do |tar|
+ dump_media_attachments!(tar)
+ dump_outbox!(tar)
+ dump_actor!(tar)
+ end
+ end
+ end
+
+ archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz'
+
+ @backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
+ @backup.processed = true
+ @backup.save!
+ ensure
+ tmp_file.close
+ tmp_file.unlink
+ end
+
+ def dump_media_attachments!(tar)
+ MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments|
+ media_attachments.each do |m|
+ download_to_tar(tar, m.file, m.file.path)
+ end
+
+ GC.start
+ end
+ end
+
+ def dump_outbox!(tar)
+ json = Oj.dump(collection)
+
+ tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io|
+ io.write(json)
+ end
+ end
+
+ def dump_actor!(tar)
+ actor = serialize(account, ActivityPub::ActorSerializer)
+
+ actor[:icon][:url] = 'avatar' + File.extname(actor[:icon][:url]) if actor[:icon]
+ actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image]
+
+ download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists?
+ download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists?
+
+ json = Oj.dump(actor)
+
+ tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io|
+ io.write(json)
+ end
+
+ tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io|
+ io.write(account.private_key)
+ end
+ end
+
+ def collection_presenter
+ ActivityPub::CollectionPresenter.new(
+ id: account_outbox_url(account),
+ type: :ordered,
+ size: account.statuses_count,
+ items: []
+ )
+ end
+
+ def serialize(object, serializer)
+ ActiveModelSerializers::SerializableResource.new(
+ object,
+ serializer: serializer,
+ adapter: ActivityPub::Adapter
+ ).as_json
+ end
+
+ CHUNK_SIZE = 1.megabyte
+
+ def download_to_tar(tar, attachment, filename)
+ adapter = Paperclip.io_adapters.for(attachment)
+
+ tar.add_file_simple(filename, 0o444, adapter.size) do |io|
+ while (buffer = adapter.read(CHUNK_SIZE))
+ io.write(buffer)
+ end
+ end
+ end
+end
diff --git a/app/views/settings/exports/show.html.haml b/app/views/settings/exports/show.html.haml
@@ -20,3 +20,26 @@
%th= t('exports.mutes')
%td= @export.total_mutes
%td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv)
+
+%p.muted-hint= t('exports.archive_takeout.hint_html')
+
+- if policy(:backup).create?
+ %p= link_to t('exports.archive_takeout.request'), settings_export_path, class: 'button', method: :post
+
+- unless @backups.empty?
+ .table-wrapper
+ %table.table
+ %thead
+ %tr
+ %th= t('exports.archive_takeout.date')
+ %th= t('exports.archive_takeout.size')
+ %th
+ %tbody
+ - @backups.each do |backup|
+ %tr
+ %td= l backup.created_at
+ - if backup.processed?
+ %td= number_to_human_size backup.dump_file_size
+ %td= table_link_to 'download', t('exports.archive_takeout.download'), backup.dump.url
+ - else
+ %td{ colspan: 2 }= t('exports.archive_takeout.in_progress')
diff --git a/app/views/user_mailer/backup_ready.html.haml b/app/views/user_mailer/backup_ready.html.haml
@@ -0,0 +1,59 @@
+%table.email-table{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.email-body
+ .email-container
+ %table.content-section{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.content-cell.hero
+ .email-row
+ .col-6
+ %table.column{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.column-cell.text-center.padded
+ %table.hero-icon{ align: 'center', cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td
+ = image_tag full_pack_url('icon_file_download.png'), alt: ''
+
+ %h1= t 'user_mailer.backup_ready.title'
+
+%table.email-table{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.email-body
+ .email-container
+ %table.content-section{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.content-cell.content-start
+ .email-row
+ .col-6
+ %table.column{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.column-cell.text-center
+ %p= t 'user_mailer.backup_ready.explanation'
+
+%table.email-table{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.email-body
+ .email-container
+ %table.content-section{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.content-cell
+ %table.column{ cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.column-cell.button-cell
+ %table.button{ align: 'center', cellspacing: 0, cellpadding: 0 }
+ %tbody
+ %tr
+ %td.button-primary
+ = link_to full_asset_url(@backup.dump.url) do
+ %span= t 'exports.archive_takeout.download'
diff --git a/app/views/user_mailer/backup_ready.text.erb b/app/views/user_mailer/backup_ready.text.erb
@@ -0,0 +1,7 @@
+<%= t 'user_mailer.backup_ready.title' %>
+
+===
+
+<%= t 'user_mailer.backup_ready.explanation' %>
+
+=> <%= full_asset_url(@backup.dump.url) %>
diff --git a/app/workers/backup_worker.rb b/app/workers/backup_worker.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+class BackupWorker
+ include Sidekiq::Worker
+
+ sidekiq_options queue: 'pull'
+
+ def perform(backup_id)
+ backup = Backup.find(backup_id)
+ user = backup.user
+
+ BackupService.new.call(backup)
+
+ user.backups.where.not(id: backup.id).destroy_all
+ UserMailer.backup_ready(user, backup).deliver_later
+ end
+end
diff --git a/app/workers/scheduler/backup_cleanup_scheduler.rb b/app/workers/scheduler/backup_cleanup_scheduler.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+require 'sidekiq-scheduler'
+
+class Scheduler::BackupCleanupScheduler
+ include Sidekiq::Worker
+
+ def perform
+ old_backups.find_each(&:destroy!)
+ end
+
+ private
+
+ def old_backups
+ Backup.where('created_at < ?', 7.days.ago)
+ end
+end
diff --git a/config/locales/en.yml b/config/locales/en.yml
@@ -421,6 +421,13 @@ en:
title: This page is not correct
noscript_html: To use the Mastodon web application, please enable JavaScript. Alternatively, try one of the <a href="https://github.com/tootsuite/documentation/blob/master/Using-Mastodon/Apps.md">native apps</a> for Mastodon for your platform.
exports:
+ archive_takeout:
+ date: Date
+ download: Download your archive
+ hint_html: You can request an archive of your <strong>toots and uploaded media</strong>. The exported data will be in ActivityPub format, readable by any compliant software.
+ in_progress: Compiling your archive...
+ request: Request your archive
+ size: Size
blocks: You block
csv: CSV
follows: You follow
@@ -733,6 +740,10 @@ en:
setup: Set up
wrong_code: The entered code was invalid! Are server time and device time correct?
user_mailer:
+ backup_ready:
+ explanation: You requested a full backup of your Mastodon account. It's now ready for download!
+ subject: Your archive is ready for download
+ title: Archive takeout
welcome:
edit_profile_action: Setup profile
edit_profile_step: You can customize your profile by uploading an avatar, header, changing your display name and more. If you’d like to review new followers before they’re allowed to follow you, you can lock your account.
diff --git a/config/routes.rb b/config/routes.rb
@@ -76,7 +76,7 @@ Rails.application.routes.draw do
resource :notifications, only: [:show, :update]
resource :import, only: [:show, :create]
- resource :export, only: [:show]
+ resource :export, only: [:show, :create]
namespace :exports, constraints: { format: :csv } do
resources :follows, only: :index, controller: :following_accounts
resources :blocks, only: :index, controller: :blocked_accounts
diff --git a/config/sidekiq.yml b/config/sidekiq.yml
@@ -30,3 +30,6 @@
email_scheduler:
cron: '0 10 * * 2'
class: Scheduler::EmailScheduler
+ backup_cleanup_scheduler:
+ cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
+ class: Scheduler::BackupCleanupScheduler
diff --git a/db/migrate/20180211015820_create_backups.rb b/db/migrate/20180211015820_create_backups.rb
@@ -0,0 +1,11 @@
+class CreateBackups < ActiveRecord::Migration[5.1]
+ def change
+ create_table :backups do |t|
+ t.references :user, foreign_key: { on_delete: :nullify }
+ t.attachment :dump
+ t.boolean :processed, null: false, default: false
+
+ t.timestamps
+ end
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20180206000000) do
+ActiveRecord::Schema.define(version: 20180211015820) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -92,6 +92,18 @@ ActiveRecord::Schema.define(version: 20180206000000) do
t.index ["target_type", "target_id"], name: "index_admin_action_logs_on_target_type_and_target_id"
end
+ create_table "backups", force: :cascade do |t|
+ t.bigint "user_id"
+ t.string "dump_file_name"
+ t.string "dump_content_type"
+ t.integer "dump_file_size"
+ t.datetime "dump_updated_at"
+ t.boolean "processed", default: false, null: false
+ t.datetime "created_at", null: false
+ t.datetime "updated_at", null: false
+ t.index ["user_id"], name: "index_backups_on_user_id"
+ end
+
create_table "blocks", force: :cascade do |t|
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
diff --git a/lib/paperclip/gif_transcoder.rb b/lib/paperclip/gif_transcoder.rb
@@ -16,7 +16,7 @@ module Paperclip
final_file = Paperclip::Transcoder.make(file, options, attachment)
- attachment.instance.file_file_name = 'media.mp4'
+ attachment.instance.file_file_name = File.basename(attachment.instance.file_file_name, '.*') + '.mp4'
attachment.instance.file_content_type = 'video/mp4'
attachment.instance.type = MediaAttachment.types[:gifv]
diff --git a/spec/fabricators/backup_fabricator.rb b/spec/fabricators/backup_fabricator.rb
@@ -0,0 +1,3 @@
+Fabricator(:backup) do
+ user
+end
diff --git a/spec/mailers/previews/user_mailer_preview.rb b/spec/mailers/previews/user_mailer_preview.rb
@@ -34,4 +34,9 @@ class UserMailerPreview < ActionMailer::Preview
def welcome
UserMailer.welcome(User.first)
end
+
+ # Preview this email at http://localhost:3000/rails/mailers/user_mailer/backup_ready
+ def backup_ready
+ UserMailer.backup_ready(User.first, Backup.first)
+ end
end
diff --git a/spec/models/backup_spec.rb b/spec/models/backup_spec.rb
@@ -0,0 +1,5 @@
+require 'rails_helper'
+
+RSpec.describe Backup, type: :model do
+
+end