commit: 1e49bfa9acb4176a161181edcaf04e886c638c42
parent: 448e93ce2c1eab9eb91118b90f661c31056e4781
Author: lain <lain@soykaf.club>
Date: Mon, 15 Jun 2020 12:41:48 +0000
Merge branch 'merge-ogp-twitter-parsers' into 'develop'
Merge OGP parser with TwitterCard
Closes #1835
See merge request pleroma/pleroma!2642
Diffstat:
9 files changed, 93 insertions(+), 100 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed
- MFR policy to set global expiration for all local Create activities
+- OGP rich media parser merged with TwitterCard
<details>
<summary>API Changes</summary>
- **Breaking:** Emoji API: changed methods and renamed routes.
diff --git a/config/config.exs b/config/config.exs
@@ -387,7 +387,6 @@ config :pleroma, :rich_media,
ignore_tld: ["local", "localdomain", "lan"],
parsers: [
Pleroma.Web.RichMedia.Parsers.TwitterCard,
- Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
],
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
diff --git a/config/description.exs b/config/description.exs
@@ -2104,9 +2104,7 @@ config :pleroma, :config_description, [
description:
"List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
suggestions: [
- Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
Pleroma.Web.RichMedia.Parsers.OEmbed,
- Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.TwitterCard
]
},
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
@@ -105,8 +105,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
- {:ok, data} -> {:halt, data}
- {:error, _msg} -> {:cont, acc}
+ data when data != %{} -> {:halt, data}
+ _ -> {:cont, acc}
end
end)
end
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -3,22 +3,15 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
- def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
- meta_data =
- html
- |> get_elements(key_name, prefix)
- |> Enum.reduce(data, fn el, acc ->
- attributes = normalize_attributes(el, prefix, key_name, value_name)
-
- Map.merge(acc, attributes)
- end)
- |> maybe_put_title(html)
-
- if Enum.empty?(meta_data) do
- {:error, error_message}
- else
- {:ok, meta_data}
- end
+ def parse(data, html, prefix, key_name, value_name \\ "content") do
+ html
+ |> get_elements(key_name, prefix)
+ |> Enum.reduce(data, fn el, acc ->
+ attributes = normalize_attributes(el, prefix, key_name, value_name)
+
+ Map.merge(acc, attributes)
+ end)
+ |> maybe_put_title(html)
end
defp get_elements(html, key_name, prefix) do
diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
with elements = [_ | _] <- get_discovery_data(html),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
- {:ok, oembed_data}
+ oembed_data
else
- _e -> {:error, "No OEmbed data found"}
+ _e -> %{}
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex
@@ -3,13 +3,8 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
- def parse(html, data) do
- Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
- html,
- data,
- "og",
- "No OGP metadata found",
- "property"
- )
+ @deprecated "OGP parser is deprecated. Use TwitterCard instead."
+ def parse(_html, _data) do
+ %{}
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@@ -5,18 +5,11 @@
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
- @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
+ @spec parse(list(), map()) :: map()
def parse(html, data) do
data
- |> parse_name_attrs(html)
- |> parse_property_attrs(html)
- end
-
- defp parse_name_attrs(data, html) do
- MetaTagsParser.parse(html, data, "twitter", %{}, "name")
- end
-
- defp parse_property_attrs({_, data}, html) do
- MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
+ |> MetaTagsParser.parse(html, "og", "property")
+ |> MetaTagsParser.parse(html, "twitter", "name")
+ |> MetaTagsParser.parse(html, "twitter", "property")
end
end
diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs
@@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
test "returns error when html not contains twitter card" do
- assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
- {:error, "No twitter card metadata found"}
+ assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
end
test "parses twitter card with only name attributes" do
@@ -17,15 +16,21 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
- }}
+ %{
+ "app:id:googleplay" => "com.nytimes.android",
+ "app:name:googleplay" => "NYTimes",
+ "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "site" => nil,
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "type" => "article",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
+ }
end
test "parses twitter card with only property attributes" do
@@ -34,19 +39,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "card" => "summary_large_image",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "image:alt" => "",
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- }}
+ %{
+ "card" => "summary_large_image",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "image:alt" => "",
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "type" => "article"
+ }
end
test "parses twitter card with name & property attributes" do
@@ -55,23 +60,23 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "card" => "summary_large_image",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "image:alt" => "",
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- }}
+ %{
+ "app:id:googleplay" => "com.nytimes.android",
+ "app:name:googleplay" => "NYTimes",
+ "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "card" => "summary_large_image",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "image:alt" => "",
+ "site" => nil,
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "type" => "article"
+ }
end
test "respect only first title tag on the page" do
@@ -84,14 +89,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "site" => "@atlasobscura",
- "title" =>
- "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
- "card" => "summary_large_image",
- "image" => image_path
- }}
+ %{
+ "site" => "@atlasobscura",
+ "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
+ "card" => "summary_large_image",
+ "image" => image_path,
+ "description" =>
+ "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
+ "site_name" => "Atlas Obscura",
+ "type" => "article",
+ "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
+ }
end
test "takes first founded title in html head if there is html markup error" do
@@ -100,14 +108,20 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
- {:ok,
- %{
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622"
- }}
+ %{
+ "site" => nil,
+ "title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "app:id:googleplay" => "com.nytimes.android",
+ "app:name:googleplay" => "NYTimes",
+ "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "type" => "article",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+ }
end
end