commit: e8de54bce50f6f77a4d7e8e80675f7003d5bf630
parent baa6c5e95cb307e7d716645780ff8aef22de6aca
Author: dirkf <fieldhouse@gmx.net>
Date: Mon, 13 Mar 2023 19:45:54 +0000
[core] Handle `/../` sequences in HTTP URLs
* use Python's RFC implementation for embedded sequences
* hack: strip unbalanced leading `../` from path, like eg Firefox
See https://github.com/yt-dlp/yt-dlp/issues/3355
Diffstat:
1 file changed, 24 insertions(+), 0 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
@@ -39,6 +39,7 @@ from .compat import (
compat_str,
compat_tokenize_tokenize,
compat_urllib_error,
+ compat_urllib_parse,
compat_urllib_request,
compat_urllib_request_DataHandler,
)
@@ -60,6 +61,7 @@ from .utils import (
format_bytes,
formatSeconds,
GeoRestrictedError,
+ HEADRequest,
int_or_none,
ISO3166Utils,
locked_file,
@@ -74,6 +76,7 @@ from .utils import (
preferredencoding,
prepend_extension,
process_communicate_or_kill,
+ PUTRequest,
register_socks_protocols,
render_table,
replace_extension,
@@ -2297,6 +2300,27 @@ class YoutubeDL(object):
""" Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
+ # an embedded /../ sequence is not automatically handled by urllib2
+ # see https://github.com/yt-dlp/yt-dlp/issues/3355
+ url = req.get_full_url()
+ parts = url.partition('/../')
+ if parts[1]:
+ url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
+ if url:
+ # worse, URL path may have initial /../ against RFCs: work-around
+ # by stripping such prefixes, like eg Firefox
+ parts = compat_urllib_parse.urlsplit(url)
+ path = parts.path
+ while path.startswith('/../'):
+ path = path[3:]
+ url = parts._replace(path=path).geturl()
+ # get a new Request with the munged URL
+ if url != req.get_full_url():
+ req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+ req.get_method(), compat_urllib_request.Request)
+ req = req_type(
+ url, data=req.data, headers=dict(req.header_items()),
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):