logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma
commit: 1e49bfa9acb4176a161181edcaf04e886c638c42
parent: 448e93ce2c1eab9eb91118b90f661c31056e4781
Author: lain <lain@soykaf.club>
Date:   Mon, 15 Jun 2020 12:41:48 +0000

Merge branch 'merge-ogp-twitter-parsers' into 'develop'

Merge OGP parser with TwitterCard

Closes #1835

See merge request pleroma/pleroma!2642

Diffstat:

MCHANGELOG.md1+
Mconfig/config.exs1-
Mconfig/description.exs2--
Mlib/pleroma/web/rich_media/parser.ex4++--
Mlib/pleroma/web/rich_media/parsers/meta_tags_parser.ex25+++++++++----------------
Mlib/pleroma/web/rich_media/parsers/oembed_parser.ex4++--
Mlib/pleroma/web/rich_media/parsers/ogp.ex11+++--------
Mlib/pleroma/web/rich_media/parsers/twitter_card.ex15++++-----------
Mtest/web/rich_media/parsers/twitter_card_test.exs130++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
9 files changed, 93 insertions(+), 100 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - MFR policy to set global expiration for all local Create activities +- OGP rich media parser merged with TwitterCard <details> <summary>API Changes</summary> - **Breaking:** Emoji API: changed methods and renamed routes. diff --git a/config/config.exs b/config/config.exs @@ -387,7 +387,6 @@ config :pleroma, :rich_media, ignore_tld: ["local", "localdomain", "lan"], parsers: [ Pleroma.Web.RichMedia.Parsers.TwitterCard, - Pleroma.Web.RichMedia.Parsers.OGP, Pleroma.Web.RichMedia.Parsers.OEmbed ], ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl] diff --git a/config/description.exs b/config/description.exs @@ -2104,9 +2104,7 @@ config :pleroma, :config_description, [ description: "List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.", suggestions: [ - Pleroma.Web.RichMedia.Parsers.MetaTagsParser, Pleroma.Web.RichMedia.Parsers.OEmbed, - Pleroma.Web.RichMedia.Parsers.OGP, Pleroma.Web.RichMedia.Parsers.TwitterCard ] }, diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex @@ -105,8 +105,8 @@ defmodule Pleroma.Web.RichMedia.Parser do defp maybe_parse(html) do Enum.reduce_while(parsers(), %{}, fn parser, acc -> case parser.parse(html, acc) do - {:ok, data} -> {:halt, data} - {:error, _msg} -> {:cont, acc} + data when data != %{} -> {:halt, data} + _ -> {:cont, acc} end end) end diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -3,22 +3,15 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do - def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do - meta_data = - html - |> get_elements(key_name, prefix) - |> Enum.reduce(data, fn el, acc -> - attributes = normalize_attributes(el, prefix, key_name, value_name) - - Map.merge(acc, attributes) - end) - |> maybe_put_title(html) - - if Enum.empty?(meta_data) do - {:error, error_message} - else - {:ok, meta_data} - end + def parse(data, html, prefix, key_name, value_name \\ "content") do + html + |> get_elements(key_name, prefix) + |> Enum.reduce(data, fn el, acc -> + attributes = normalize_attributes(el, prefix, key_name, value_name) + + Map.merge(acc, attributes) + end) + |> maybe_put_title(html) end defp get_elements(html, key_name, prefix) do diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex @@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do with elements = [_ | _] <- get_discovery_data(html), oembed_url when is_binary(oembed_url) <- get_oembed_url(elements), {:ok, oembed_data} <- get_oembed_data(oembed_url) do - {:ok, oembed_data} + oembed_data else - _e -> {:error, "No OEmbed data found"} + _e -> %{} end end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -3,13 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parsers.OGP do - def parse(html, data) do - Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse( - html, - data, - "og", - "No OGP metadata found", - "property" - ) + @deprecated "OGP parser is deprecated. Use TwitterCard instead." + def parse(_html, _data) do + %{} end end diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex @@ -5,18 +5,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser - @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()} + @spec parse(list(), map()) :: map() def parse(html, data) do data - |> parse_name_attrs(html) - |> parse_property_attrs(html) - end - - defp parse_name_attrs(data, html) do - MetaTagsParser.parse(html, data, "twitter", %{}, "name") - end - - defp parse_property_attrs({_, data}, html) do - MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property") + |> MetaTagsParser.parse(html, "og", "property") + |> MetaTagsParser.parse(html, "twitter", "name") + |> MetaTagsParser.parse(html, "twitter", "property") end end diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs @@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do alias Pleroma.Web.RichMedia.Parsers.TwitterCard test "returns error when html not contains twitter card" do - assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == - {:error, "No twitter card metadata found"} + assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{} end test "parses twitter card with only name attributes" do @@ -17,15 +16,21 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do |> Floki.parse_document!() assert TwitterCard.parse(html, %{}) == - {:ok, - %{ - "app:id:googleplay" => "com.nytimes.android", - "app:name:googleplay" => "NYTimes", - "app:url:googleplay" => "nytimes://reader/id/100000006583622", - "site" => nil, - "title" => - "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times" - }} + %{ + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "site" => nil, + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg", + "type" => "article", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database." + } end test "parses twitter card with only property attributes" do @@ -34,19 +39,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do |> Floki.parse_document!() assert TwitterCard.parse(html, %{}) == - {:ok, - %{ - "card" => "summary_large_image", - "description" => - "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", - "image" => - "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", - "image:alt" => "", - "title" => - "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", - "url" => - "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" - }} + %{ + "card" => "summary_large_image", + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", + "image:alt" => "", + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", + "type" => "article" + } end test "parses twitter card with name & property attributes" do @@ -55,23 +60,23 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do |> Floki.parse_document!() assert TwitterCard.parse(html, %{}) == - {:ok, - %{ - "app:id:googleplay" => "com.nytimes.android", - "app:name:googleplay" => "NYTimes", - "app:url:googleplay" => "nytimes://reader/id/100000006583622", - "card" => "summary_large_image", - "description" => - "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", - "image" => - "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", - "image:alt" => "", - "site" => nil, - "title" => - "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", - "url" => - "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" - }} + %{ + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "card" => "summary_large_image", + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", + "image:alt" => "", + "site" => nil, + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", + "type" => "article" + } end test "respect only first title tag on the page" do @@ -84,14 +89,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!() assert TwitterCard.parse(html, %{}) == - {:ok, - %{ - "site" => "@atlasobscura", - "title" => - "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura", - "card" => "summary_large_image", - "image" => image_path - }} + %{ + "site" => "@atlasobscura", + "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran", + "card" => "summary_large_image", + "image" => image_path, + "description" => + "She's the only woman veteran honored with a monument at West Point. But where was she buried?", + "site_name" => "Atlas Obscura", + "type" => "article", + "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point" + } end test "takes first founded title in html head if there is html markup error" do @@ -100,14 +108,20 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do |> Floki.parse_document!() assert TwitterCard.parse(html, %{}) == - {:ok, - %{ - "site" => nil, - "title" => - "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times", - "app:id:googleplay" => "com.nytimes.android", - "app:name:googleplay" => "NYTimes", - "app:url:googleplay" => "nytimes://reader/id/100000006583622" - }} + %{ + "site" => nil, + "title" => + "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "description" => + "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", + "image" => + "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg", + "type" => "article", + "url" => + "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" + } end end