logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma
commit: 33729bbb2834bfa1f223b11d47dc8e3230d47657
parent: c1c64d6d06fb7307245880d6605d9d9153a4784f
Author: kaniini <ariadne@dereferenced.org>
Date:   Fri, 19 Jul 2019 21:36:36 +0000

Merge branch 'feature/richmedia-ttl' into 'develop'

add the rich media ttl based on image exp time

See merge request pleroma/pleroma!1438

Diffstat:

MCHANGELOG.md1+
Mconfig/config.exs3++-
Adocs/config/howto_set_richmedia_cache_ttl_based_on_image.md33+++++++++++++++++++++++++++++++++
Mlib/pleroma/web/rich_media/parser.ex45+++++++++++++++++++++++++++++++++++++++++++++
Alib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex52++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/pleroma/web/rich_media/parsers/ttl/ttl.ex3+++
Atest/fixtures/rich_media/amz.html5+++++
Atest/web/rich_media/aws_signed_url_test.exs81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 222 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md @@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text - Admin API: changed json structure for saving config settings. - RichMedia: parsers and their order are configured in `rich_media` config. +- RichMedia: add the rich media ttl based on image expiration time. ## [1.0.1] - 2019-07-14 ### Security diff --git a/config/config.exs b/config/config.exs @@ -345,7 +345,8 @@ config :pleroma, :rich_media, Pleroma.Web.RichMedia.Parsers.TwitterCard, Pleroma.Web.RichMedia.Parsers.OGP, Pleroma.Web.RichMedia.Parsers.OEmbed - ] + ], + ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl] config :pleroma, :media_proxy, enabled: false, diff --git a/docs/config/howto_set_richmedia_cache_ttl_based_on_image.md b/docs/config/howto_set_richmedia_cache_ttl_based_on_image.md @@ -0,0 +1,33 @@ +# How to set rich media cache ttl based on image ttl +## Explanation + +Richmedia are cached without the ttl but the rich media may have image which can expire, like aws signed url. +In such cases the old image url (expired) is returned from the media cache. + +So to avoid such situation we can define a module that will set ttl based on image. +The module must adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL` + +### Example + +```exs +defmodule MyModule do + @behaviour Pleroma.Web.RichMedia.Parser.TTL + + @impl Pleroma.Web.RichMedia.Parser.TTL + def ttl(data, url) do + image_url = Map.get(data, :image) + # do some parsing in the url and get the ttl of the image + # return ttl is unix time + parse_ttl_from_url(image_url) + end +end +``` + +And update the config + +```exs +config :pleroma, :rich_media, + ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, MyModule] +``` + +> For reference there is a parser for AWS signed URL `Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl`, it's enabled by default. diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex @@ -24,6 +24,7 @@ defmodule Pleroma.Web.RichMedia.Parser do Cachex.fetch!(:rich_media_cache, url, fn _ -> {:commit, parse_url(url)} end) + |> set_ttl_based_on_image(url) rescue e -> {:error, "Cachex error: #{inspect(e)}"} @@ -31,6 +32,50 @@ defmodule Pleroma.Web.RichMedia.Parser do end end + @doc """ + Set the rich media cache based on the expiration time of image. + + Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL` + + ## Example + + defmodule MyModule do + @behaviour Pleroma.Web.RichMedia.Parser.TTL + def ttl(data, url) do + image_url = Map.get(data, :image) + # do some parsing in the url and get the ttl of the image + # and return ttl is unix time + parse_ttl_from_url(image_url) + end + end + + Define the module in the config + + config :pleroma, :rich_media, + ttl_setters: [MyModule] + """ + def set_ttl_based_on_image({:ok, data}, url) do + with {:ok, nil} <- Cachex.ttl(:rich_media_cache, url) do + ttl = get_ttl_from_image(data, url) + Cachex.expire_at(:rich_media_cache, url, ttl * 1000) + {:ok, data} + else + _ -> + {:ok, data} + end + end + + defp get_ttl_from_image(data, url) do + Pleroma.Config.get([:rich_media, :ttl_setters]) + |> Enum.reduce({:ok, nil}, fn + module, {:ok, _ttl} -> + module.ttl(data, url) + + _, error -> + error + end) + end + defp parse_url(url) do try do {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options) diff --git a/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex b/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex @@ -0,0 +1,52 @@ +defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do + @behaviour Pleroma.Web.RichMedia.Parser.TTL + + @impl Pleroma.Web.RichMedia.Parser.TTL + def ttl(data, _url) do + image = Map.get(data, :image) + + if is_aws_signed_url(image) do + image + |> parse_query_params() + |> format_query_params() + |> get_expiration_timestamp() + end + end + + defp is_aws_signed_url(""), do: nil + defp is_aws_signed_url(nil), do: nil + + defp is_aws_signed_url(image) when is_binary(image) do + %URI{host: host, query: query} = URI.parse(image) + + if String.contains?(host, "amazonaws.com") and + String.contains?(query, "X-Amz-Expires") do + image + else + nil + end + end + + defp is_aws_signed_url(_), do: nil + + defp parse_query_params(image) do + %URI{query: query} = URI.parse(image) + query + end + + defp format_query_params(query) do + query + |> String.split(~r/&|=/) + |> Enum.chunk_every(2) + |> Map.new(fn [k, v] -> {k, v} end) + end + + defp get_expiration_timestamp(params) when is_map(params) do + {:ok, date} = + params + |> Map.get("X-Amz-Date") + |> Timex.parse("{ISO:Basic:Z}") + + Timex.to_unix(date) + String.to_integer(Map.get(params, "X-Amz-Expires")) + end +end diff --git a/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex b/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex @@ -0,0 +1,3 @@ +defmodule Pleroma.Web.RichMedia.Parser.TTL do + @callback ttl(Map.t(), String.t()) :: {:ok, Integer.t()} | {:error, String.t()} +end diff --git a/test/fixtures/rich_media/amz.html b/test/fixtures/rich_media/amz.html @@ -0,0 +1,5 @@ +<meta name="twitter:card" content="summary" /> +<meta name="twitter:site" content="@flickr" /> +<meta name="twitter:title" content="Small Island Developing States Photo Submission" /> +<meta name="twitter:description" content="View the album on Flickr." /> +<meta name="twitter:image" content="https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=20190716T175105Z&X-Amz-Expires=300000&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host" /> diff --git a/test/web/rich_media/aws_signed_url_test.exs b/test/web/rich_media/aws_signed_url_test.exs @@ -0,0 +1,81 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.TTL.AwsSignedUrlTest do + use ExUnit.Case, async: true + + test "s3 signed url is parsed correct for expiration time" do + url = "https://pleroma.social/amz" + + {:ok, timestamp} = + Timex.now() + |> DateTime.truncate(:second) + |> Timex.format("{ISO:Basic:Z}") + + # in seconds + valid_till = 30 + + metadata = construct_metadata(timestamp, valid_till, url) + + expire_time = + Timex.parse!(timestamp, "{ISO:Basic:Z}") |> Timex.to_unix() |> Kernel.+(valid_till) + + assert expire_time == Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.ttl(metadata, url) + end + + test "s3 signed url is parsed and correct ttl is set for rich media" do + url = "https://pleroma.social/amz" + + {:ok, timestamp} = + Timex.now() + |> DateTime.truncate(:second) + |> Timex.format("{ISO:Basic:Z}") + + # in seconds + valid_till = 30 + + metadata = construct_metadata(timestamp, valid_till, url) + + body = """ + <meta name="twitter:card" content="Pleroma" /> + <meta name="twitter:site" content="Pleroma" /> + <meta name="twitter:title" content="Pleroma" /> + <meta name="twitter:description" content="Pleroma" /> + <meta name="twitter:image" content="#{Map.get(metadata, :image)}" /> + """ + + Tesla.Mock.mock(fn + %{ + method: :get, + url: "https://pleroma.social/amz" + } -> + %Tesla.Env{status: 200, body: body} + end) + + Cachex.put(:rich_media_cache, url, metadata) + + Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image({:ok, metadata}, url) + + {:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url) + + # as there is delay in setting and pulling the data from cache we ignore 1 second + assert_in_delta(valid_till * 1000, cache_ttl, 1000) + end + + defp construct_s3_url(timestamp, valid_till) do + "https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{ + timestamp + }&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host" + end + + defp construct_metadata(timestamp, valid_till, url) do + %{ + image: construct_s3_url(timestamp, valid_till), + site: "Pleroma", + title: "Pleroma", + description: "Pleroma", + url: url + } + end +end