logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma
commit: 6f5a0cc14e99cb26ef009cf8cf16f38902d5e11b
parent: a2318e9dd6c8daf5554b17e9312f1fb6e14326de
Author: lain <lain@soykaf.club>
Date:   Thu, 13 Jun 2019 13:26:39 +0000

Merge branch 'fix/ogp-title' into 'develop'

Rich Media Parser: Attempt to use <title> from HTML as a fallback

Closes #979

See merge request pleroma/pleroma!1277

Diffstat:

Mlib/pleroma/web/rich_media/parsers/meta_tags_parser.ex33+++++++++++++++++++++++++--------
Atest/fixtures/rich_media/ogp-missing-title.html12++++++++++++
Mtest/web/rich_media/parser_test.exs22++++++++++++++++++++++
3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -1,15 +1,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do - with elements = [_ | _] <- get_elements(html, key_name, prefix), - meta_data = - Enum.reduce(elements, data, fn el, acc -> - attributes = normalize_attributes(el, prefix, key_name, value_name) + meta_data = + html + |> get_elements(key_name, prefix) + |> Enum.reduce(data, fn el, acc -> + attributes = normalize_attributes(el, prefix, key_name, value_name) - Map.merge(acc, attributes) - end) do - {:ok, meta_data} + Map.merge(acc, attributes) + end) + |> maybe_put_title(html) + + if Enum.empty?(meta_data) do + {:error, error_message} else - _e -> {:error, error_message} + {:ok, meta_data} end end @@ -27,4 +31,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do %{String.to_atom(data[key_name]) => data[value_name]} end + + defp maybe_put_title(%{title: _} = meta, _), do: meta + + defp maybe_put_title(meta, html) do + case get_page_title(html) do + "" -> meta + title -> Map.put_new(meta, :title, title) + end + end + + defp get_page_title(html) do + Floki.find(html, "title") |> Floki.text() + end end diff --git a/test/fixtures/rich_media/ogp-missing-title.html b/test/fixtures/rich_media/ogp-missing-title.html @@ -0,0 +1,12 @@ +<html prefix="og: http://ogp.me/ns#"> + +<head> + <title>The Rock (1996)</title> + <meta property="og:type" content="video.movie" /> + <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" /> + <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" /> + <meta property="og:description" + content="Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer."> +</head> + +</html> diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs @@ -11,6 +11,15 @@ defmodule Pleroma.Web.RichMedia.ParserTest do %{ method: :get, + url: "http://example.com/ogp-missing-title" + } -> + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") + } + + %{ + method: :get, url: "http://example.com/twitter-card" } -> %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} @@ -51,6 +60,19 @@ defmodule Pleroma.Web.RichMedia.ParserTest do }} end + test "falls back to <title> when ogp:title is missing" do + assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") == + {:ok, + %{ + image: "http://ia.media-imdb.com/images/rock.jpg", + title: "The Rock (1996)", + description: + "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", + type: "video.movie", + url: "http://www.imdb.com/title/tt0117500/" + }} + end + test "parses twitter card" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") == {:ok,