[YoutubeDL] Fix --ignore-errors for playlists with generator-based entries of url_transparent (closes #27064) - youtube-dl - [mirror] Download/Watch videos from video hosters

commit: ab0eda99e1d1c6cd6aa697f4931c439bec350bd0
parent ec99f4710877731da4619617a89cf1dd45a2fc2a
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 21 Nov 2020 04:00:08 +0700

[YoutubeDL] Fix --ignore-errors for playlists with generator-based entries of url_transparent (closes #27064)

Diffstat:
M test/test_YoutubeDL.py 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M youtube_dl/YoutubeDL.py 52 +++++++++++++++++++++++++++++++---------------------

2 files changed, 101 insertions(+), 21 deletions(-)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
@@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
         self.assertEqual(downloaded['extractor'], 'testex')
         self.assertEqual(downloaded['extractor_key'], 'TestEx')
 
+    # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
+    def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
+
+        class _YDL(YDL):
+            def __init__(self, *args, **kwargs):
+                super(_YDL, self).__init__(*args, **kwargs)
+
+            def trouble(self, s, tb=None):
+                pass
+
+        ydl = _YDL({
+            'format': 'extra',
+            'ignoreerrors': True,
+        })
+
+        class VideoIE(InfoExtractor):
+            _VALID_URL = r'video:(?P<id>\d+)'
+
+            def _real_extract(self, url):
+                video_id = self._match_id(url)
+                formats = [{
+                    'format_id': 'default',
+                    'url': 'url:',
+                }]
+                if video_id == '0':
+                    raise ExtractorError('foo')
+                if video_id == '2':
+                    formats.append({
+                        'format_id': 'extra',
+                        'url': TEST_URL,
+                    })
+                return {
+                    'id': video_id,
+                    'title': 'Video %s' % video_id,
+                    'formats': formats,
+                }
+
+        class PlaylistIE(InfoExtractor):
+            _VALID_URL = r'playlist:'
+
+            def _entries(self):
+                for n in range(3):
+                    video_id = compat_str(n)
+                    yield {
+                        '_type': 'url_transparent',
+                        'ie_key': VideoIE.ie_key(),
+                        'id': video_id,
+                        'url': 'video:%s' % video_id,
+                        'title': 'Video Transparent %s' % video_id,
+                    }
+
+            def _real_extract(self, url):
+                return self.playlist_result(self._entries())
+
+        ydl.add_info_extractor(VideoIE(ydl))
+        ydl.add_info_extractor(PlaylistIE(ydl))
+        info = ydl.extract_info('playlist:')
+        entries = info['entries']
+        self.assertEqual(len(entries), 3)
+        self.assertTrue(entries[0] is None)
+        self.assertTrue(entries[1] is None)
+        self.assertEqual(len(ydl.downloaded_info_dicts), 1)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(entries[2], downloaded)
+        self.assertEqual(downloaded['url'], TEST_URL)
+        self.assertEqual(downloaded['title'], 'Video Transparent 2')
+        self.assertEqual(downloaded['id'], '2')
+        self.assertEqual(downloaded['extractor'], 'Video')
+        self.assertEqual(downloaded['extractor_key'], 'Video')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
@@ -793,21 +793,14 @@ class YoutubeDL(object):
                 self.report_warning('The program functionality for this site has been marked as broken, '
                                     'and will probably not work.')
 
+            return self.__extract_info(url, ie, download, extra_info, process)
+        else:
+            self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+    def __handle_extraction_exceptions(func):
+        def wrapper(self, *args, **kwargs):
             try:
-                ie_result = ie.extract(url)
-                if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
-                    break
-                if isinstance(ie_result, list):
-                    # Backwards compatibility: old IE result format
-                    ie_result = {
-                        '_type': 'compat_list',
-                        'entries': ie_result,
-                    }
-                self.add_default_extra_info(ie_result, ie, url)
-                if process:
-                    return self.process_ie_result(ie_result, download, extra_info)
-                else:
-                    return ie_result
+                return func(self, *args, **kwargs)
             except GeoRestrictedError as e:
                 msg = e.msg
                 if e.countries:
@@ -815,20 +808,33 @@ class YoutubeDL(object):
                         map(ISO3166Utils.short2full, e.countries))
                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                 self.report_error(msg)
-                break
             except ExtractorError as e:  # An error we somewhat expected
                 self.report_error(compat_str(e), e.format_traceback())
-                break
             except MaxDownloadsReached:
                 raise
             except Exception as e:
                 if self.params.get('ignoreerrors', False):
                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
-                    break
                 else:
                     raise
+        return wrapper
+
+    @__handle_extraction_exceptions
+    def __extract_info(self, url, ie, download, extra_info, process):
+        ie_result = ie.extract(url)
+        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
+            return
+        if isinstance(ie_result, list):
+            # Backwards compatibility: old IE result format
+            ie_result = {
+                '_type': 'compat_list',
+                'entries': ie_result,
+            }
+        self.add_default_extra_info(ie_result, ie, url)
+        if process:
+            return self.process_ie_result(ie_result, download, extra_info)
         else:
-            self.report_error('no suitable InfoExtractor for URL %s' % url)
+            return ie_result
 
     def add_default_extra_info(self, ie_result, ie, url):
         self.add_extra_info(ie_result, {
@@ -1003,9 +1009,8 @@ class YoutubeDL(object):
                     self.to_screen('[download] ' + reason)
                     continue
 
-                entry_result = self.process_ie_result(entry,
-                                                      download=download,
-                                                      extra_info=extra)
+                entry_result = self.__process_iterable_entry(entry, download, extra)
+                # TODO: skip failed (empty) entries?
                 playlist_results.append(entry_result)
             ie_result['entries'] = playlist_results
             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@@ -1034,6 +1039,11 @@ class YoutubeDL(object):
         else:
             raise Exception('Invalid result type: %s' % result_type)
 
+    @__handle_extraction_exceptions
+    def __process_iterable_entry(self, entry, download, extra_info):
+        return self.process_ie_result(
+            entry, download=download, extra_info=extra_info)
+
     def _build_format_filter(self, filter_spec):
         " Returns a function to filter the formats according to the filter_spec "

M	test/test_YoutubeDL.py	70	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	youtube_dl/YoutubeDL.py	52	+++++++++++++++++++++++++++++++---------------------