logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git
commit: e8de54bce50f6f77a4d7e8e80675f7003d5bf630
parent baa6c5e95cb307e7d716645780ff8aef22de6aca
Author: dirkf <fieldhouse@gmx.net>
Date:   Mon, 13 Mar 2023 19:45:54 +0000

[core] Handle `/../` sequences in HTTP URLs

* use Python's RFC implementation for embedded sequences
* hack: strip unbalanced leading `../` from path, like eg Firefox

See https://github.com/yt-dlp/yt-dlp/issues/3355

Diffstat:

Myoutube_dl/YoutubeDL.py24++++++++++++++++++++++++
1 file changed, 24 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py @@ -39,6 +39,7 @@ from .compat import ( compat_str, compat_tokenize_tokenize, compat_urllib_error, + compat_urllib_parse, compat_urllib_request, compat_urllib_request_DataHandler, ) @@ -60,6 +61,7 @@ from .utils import ( format_bytes, formatSeconds, GeoRestrictedError, + HEADRequest, int_or_none, ISO3166Utils, locked_file, @@ -74,6 +76,7 @@ from .utils import ( preferredencoding, prepend_extension, process_communicate_or_kill, + PUTRequest, register_socks_protocols, render_table, replace_extension, @@ -2297,6 +2300,27 @@ class YoutubeDL(object): """ Start an HTTP download """ if isinstance(req, compat_basestring): req = sanitized_Request(req) + # an embedded /../ sequence is not automatically handled by urllib2 + # see https://github.com/yt-dlp/yt-dlp/issues/3355 + url = req.get_full_url() + parts = url.partition('/../') + if parts[1]: + url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2]) + if url: + # worse, URL path may have initial /../ against RFCs: work-around + # by stripping such prefixes, like eg Firefox + parts = compat_urllib_parse.urlsplit(url) + path = parts.path + while path.startswith('/../'): + path = path[3:] + url = parts._replace(path=path).geturl() + # get a new Request with the munged URL + if url != req.get_full_url(): + req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get( + req.get_method(), compat_urllib_request.Request) + req = req_type( + url, data=req.data, headers=dict(req.header_items()), + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self):