commit: da2069fb22fd3b34046fd1be03690fccdd9ab1a2
parent 95c98100155589e224c76fddb3d01dae0bd233ac
Author: Sergey M․ <dstftw@gmail.com>
Date: Sun, 13 Sep 2020 20:43:50 +0700
[googledrive] Use redirect URLs for source format (closes #18877, closes #23919, closes #24689, closes #26565)
Diffstat:
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
@@ -220,19 +220,27 @@ class GoogleDriveIE(InfoExtractor):
'id': video_id,
'export': 'download',
})
- urlh = self._request_webpage(
- source_url, video_id, note='Requesting source file',
- errnote='Unable to request source file', fatal=False)
+
+ def request_source_file(source_url, kind):
+ return self._request_webpage(
+ source_url, video_id, note='Requesting %s file' % kind,
+ errnote='Unable to request %s file' % kind, fatal=False)
+ urlh = request_source_file(source_url, 'source')
if urlh:
- def add_source_format(src_url):
+ def add_source_format(urlh):
formats.append({
- 'url': src_url,
+ # Use redirect URLs as download URLs in order to calculate
+ # correct cookies in _calc_cookies.
+ # Using original URLs may result in redirect loop due to
+ # google.com's cookies mistakenly used for googleusercontent.com
+ # redirect URLs (see #23919).
+ 'url': urlh.geturl(),
'ext': determine_ext(title, 'mp4').lower(),
'format_id': 'source',
'quality': 1,
})
if urlh.headers.get('Content-Disposition'):
- add_source_format(source_url)
+ add_source_format(urlh)
else:
confirmation_webpage = self._webpage_read_content(
urlh, url, video_id, note='Downloading confirmation page',
@@ -242,9 +250,12 @@ class GoogleDriveIE(InfoExtractor):
r'confirm=([^&"\']+)', confirmation_webpage,
'confirmation code', fatal=False)
if confirm:
- add_source_format(update_url_query(source_url, {
+ confirmed_source_url = update_url_query(source_url, {
'confirm': confirm,
- }))
+ })
+ urlh = request_source_file(confirmed_source_url, 'confirmed source')
+ if urlh and urlh.headers.get('Content-Disposition'):
+ add_source_format(urlh)
if not formats:
reason = self._search_regex(