Add ArteTVCategoryIE to support category playlists - youtube-dl - [mirror] Download/Watch videos from video hosters

commit: ddc080a562cce984ac4a86969f511b1ae59421bf
parent 16a3fe2ba6b4c86e60bca930253c81c8efdd676b
Author: df <fieldhouse@gmx.net>
Date:   Mon, 18 Oct 2021 15:54:26 +0100

Add ArteTVCategoryIE to support category playlists

Diffstat:
M youtube_dl/extractor/arte.py 47 +++++++++++++++++++++++++++++++++++++++++++++++
M youtube_dl/extractor/extractors.py 1 +

2 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
@@ -12,6 +12,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
+    strip_or_none,
     try_get,
     unified_strdate,
     url_or_none,
@@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
         title = collection.get('title')
         description = collection.get('shortDescription') or collection.get('teaserText')
         return self.playlist_result(entries, playlist_id, title, description)
+
+
+class ArteTVCategoryIE(ArteTVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
+    _TESTS = [{
+        'url': 'https://www.arte.tv/en/videos/politics-and-society/',
+        'info_dict': {
+            'id': 'politics-and-society',
+            'title': 'Politics and society',
+            'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+        },
+        'playlist_mincount': 13,
+    },
+    ]
+
+    @classmethod
+    def suitable(cls, url):
+        return (
+            not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
+            and super(ArteTVCategoryIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        lang, playlist_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, playlist_id)
+
+        items = []
+        for video in re.finditer(
+                r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+                webpage):
+            video = video.group('url')
+            if video == url:
+                continue
+            if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
+                items.append(video)
+
+        if items:
+            title = (self._og_search_title(webpage, default=None)
+                     or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
+            title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+
+            result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
+            if result:
+                description = self._og_search_description(webpage, default=None)
+                if description:
+                    result['description'] = description
+                return result
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -71,6 +71,7 @@ from .arte import (
     ArteTVIE,
     ArteTVEmbedIE,
     ArteTVPlaylistIE,
+    ArteTVCategoryIE,
 )
 from .arnes import ArnesIE
 from .asiancrush import (

M	youtube_dl/extractor/arte.py	47	+++++++++++++++++++++++++++++++++++++++++++++++
M	youtube_dl/extractor/extractors.py	1	+