commit: 5add3f4373287e6346ca3551239edab549284db3
parent 78ce962f4fe020994c216dd2671546fbe58a5c67
Author: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Feb 2022 03:50:32 +0000
Merge branch 'pukkandan-yt-searchurl' into yt-dl-master
Closes #27749
Diffstat:
3 files changed, 74 insertions(+), 79 deletions(-)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
@@ -66,9 +66,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
- # def test_youtube_search_matching(self):
- # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
- # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -1606,7 +1606,7 @@ from .youtube import (
YoutubeRecommendedIE,
YoutubeSearchDateIE,
YoutubeSearchIE,
- #YoutubeSearchURLIE,
+ YoutubeSearchURLIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -342,6 +342,60 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader,
}
+ def _search_results(self, query, params):
+ data = {
+ 'context': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20201021.03.00',
+ }
+ },
+ 'query': query,
+ }
+ if params:
+ data['params'] = params
+ for page_num in itertools.count(1):
+ search = self._download_json(
+ 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ video_id='query "%s"' % query,
+ note='Downloading page %s' % page_num,
+ errnote='Unable to download API page', fatal=False,
+ data=json.dumps(data).encode('utf8'),
+ headers={'content-type': 'application/json'})
+ if not search:
+ break
+ slr_contents = try_get(
+ search,
+ (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
+ lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
+ list)
+ if not slr_contents:
+ break
+ for slr_content in slr_contents:
+ isr_contents = try_get(
+ slr_content,
+ lambda x: x['itemSectionRenderer']['contents'],
+ list)
+ if not isr_contents:
+ continue
+ for content in isr_contents:
+ if not isinstance(content, dict):
+ continue
+ video = content.get('videoRenderer')
+ if not isinstance(video, dict):
+ continue
+ video_id = video.get('videoId')
+ if not video_id:
+ continue
+ yield self._extract_video(video)
+ token = try_get(
+ slr_contents,
+ lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+ compat_str)
+ if not token:
+ break
+ data['continuation'] = token
+
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
@@ -2135,7 +2189,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
(?:
(?:channel|c|user|feed|hashtag)/|
(?:playlist|watch)\?.*?\blist=|
- (?!(?:watch|embed|v|e)\b)
+ (?!(?:watch|embed|v|e|results)\b)
)
(?P<id>[^/?\#&]+)
'''
@@ -3148,93 +3202,35 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com searches'
- # there doesn't appear to be a real limit, for example if you search for
- # 'python' you get more than 8.000.000 results
- _MAX_RESULTS = float('inf')
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
- _SEARCH_PARAMS = None
+ _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
+ _MAX_RESULTS = float('inf')
_TESTS = []
- def _entries(self, query, n):
- data = {
- 'context': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': '2.20201021.03.00',
- }
- },
- 'query': query,
- }
- if self._SEARCH_PARAMS:
- data['params'] = self._SEARCH_PARAMS
- total = 0
- for page_num in itertools.count(1):
- search = self._download_json(
- 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- video_id='query "%s"' % query,
- note='Downloading page %s' % page_num,
- errnote='Unable to download API page', fatal=False,
- data=json.dumps(data).encode('utf8'),
- headers={'content-type': 'application/json'})
- if not search:
- break
- slr_contents = try_get(
- search,
- (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
- lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
- list)
- if not slr_contents:
- break
- for slr_content in slr_contents:
- isr_contents = try_get(
- slr_content,
- lambda x: x['itemSectionRenderer']['contents'],
- list)
- if not isr_contents:
- continue
- for content in isr_contents:
- if not isinstance(content, dict):
- continue
- video = content.get('videoRenderer')
- if not isinstance(video, dict):
- continue
- video_id = video.get('videoId')
- if not video_id:
- continue
- yield self._extract_video(video)
- total += 1
- if total == n:
- return
- token = try_get(
- slr_contents,
- lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
- compat_str)
- if not token:
- break
- data['continuation'] = token
-
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
- return self.playlist_result(self._entries(query, n), query)
+ entries = itertools.islice(self._search_results(query, self._SEARCH_PARAMS), 0, None if n == float('inf') else n)
+ return self.playlist_result(entries, query, query)
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube.com searches, newest videos first'
- _SEARCH_PARAMS = 'CAI%3D'
+ _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
+ _TESTS = []
-r"""
-class YoutubeSearchURLIE(YoutubeSearchIE):
- IE_DESC = 'YouTube.com search URLs'
- IE_NAME = 'youtube:search_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube search URLs with sorting and filter support'
+ IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
'info_dict': {
+ 'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
}
}, {
@@ -3243,11 +3239,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse_unquote_plus(mobj.group('query'))
- webpage = self._download_webpage(url, query)
- return self.playlist_result(self._process_page(webpage), playlist_title=query)
-"""
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ query = (qs.get('search_query') or qs.get('q'))[0]
+ params = qs.get('sp', ('',))[0]
+ return self.playlist_result(self._search_results(query, params), query, query)
class YoutubeFeedsInfoExtractor(YoutubeTabIE):