commit: c380cc28c4e94b4b61db7f86d35e48197b407266
parent f7f304910d1c1fc19313231d424daba304e1de71
Author: Sergey M․ <dstftw@gmail.com>
Date: Tue, 5 May 2020 04:19:33 +0700
[utils] Improve cookie files support
+ Add support for UTF-8 in cookie files
* Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at)
Diffstat:
3 files changed, 93 insertions(+), 5 deletions(-)
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
@@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
assert_cookie_has_value('HTTPONLY_COOKIE')
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
+ def test_malformed_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ # Cookies should be empty since all malformed cookie file entries
+ # will be ignored
+ self.assertFalse(cookiejar._cookies)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar FALSE / FALSE 0 COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -7,6 +7,7 @@ import base64
import binascii
import calendar
import codecs
+import collections
import contextlib
import ctypes
import datetime
@@ -30,6 +31,7 @@ import ssl
import subprocess
import sys
import tempfile
+import time
import traceback
import xml.etree.ElementTree
import zlib
@@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1. https://curl.haxx.se/docs/http-cookies.html
"""
_HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by youtube-dl. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """
+ Save cookies to a file.
+
+ Most of the code is taken from CPython 3.8 and slightly adapted
+ to support cookie files with UTF-8 in both python 2 and 3.
+ """
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
# Store session cookies with `expires` set to 0 instead of an empty
# string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0
- compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
+
+ with io.open(filename, 'w', encoding='utf-8') as f:
+ f.write(self._HEADER)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure:
+ secure = 'TRUE'
+ else:
+ secure = 'FALSE'
+ if cookie.domain.startswith('.'):
+ initial_dot = 'TRUE'
+ else:
+ initial_dot = 'FALSE'
+ if cookie.expires is not None:
+ expires = compat_str(cookie.expires)
+ else:
+ expires = ''
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ''
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ '\t'.join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value]) + '\n')
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
@@ -2752,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
cf = io.StringIO()
- with open(filename) as f:
+ with io.open(filename, encoding='utf-8') as f:
for line in f:
- if line.startswith(self._HTTPONLY_PREFIX):
- line = line[len(self._HTTPONLY_PREFIX):]
- cf.write(compat_str(line))
+ try:
+ cf.write(prepare_line(line))
+ except compat_cookiejar.LoadError as e:
+ write_string(
+ 'WARNING: skipping cookie file entry due to %s: %r\n'
+ % (e, line), sys.stderr)
+ continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to