logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

common.py (19491B)


  1. import contextlib
  2. import errno
  3. import functools
  4. import os
  5. import random
  6. import re
  7. import threading
  8. import time
  9. from ..minicurses import (
  10. BreaklineStatusPrinter,
  11. MultilineLogger,
  12. MultilinePrinter,
  13. QuietMultilinePrinter,
  14. )
  15. from ..utils import (
  16. IDENTITY,
  17. NO_DEFAULT,
  18. LockingUnsupportedError,
  19. Namespace,
  20. RetryManager,
  21. classproperty,
  22. deprecation_warning,
  23. format_bytes,
  24. join_nonempty,
  25. parse_bytes,
  26. remove_start,
  27. sanitize_open,
  28. shell_quote,
  29. timeconvert,
  30. timetuple_from_msec,
  31. try_call,
  32. )
  33. class FileDownloader:
  34. """File Downloader class.
  35. File downloader objects are the ones responsible of downloading the
  36. actual video file and writing it to disk.
  37. File downloaders accept a lot of parameters. In order not to saturate
  38. the object constructor with arguments, it receives a dictionary of
  39. options instead.
  40. Available options:
  41. verbose: Print additional info to stdout.
  42. quiet: Do not print messages to stdout.
  43. ratelimit: Download speed limit, in bytes/sec.
  44. throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
  45. retries: Number of times to retry for expected network errors.
  46. Default is 0 for API, but 10 for CLI
  47. file_access_retries: Number of times to retry on file access error (default: 3)
  48. buffersize: Size of download buffer in bytes.
  49. noresizebuffer: Do not automatically resize the download buffer.
  50. continuedl: Try to continue downloads if possible.
  51. noprogress: Do not print the progress bar.
  52. nopart: Do not use temporary .part files.
  53. updatetime: Use the Last-modified header to set output file timestamps.
  54. test: Download only first bytes to test the downloader.
  55. min_filesize: Skip files smaller than this size
  56. max_filesize: Skip files larger than this size
  57. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
  58. progress_delta: The minimum time between progress output, in seconds
  59. external_downloader_args: A dictionary of downloader keys (in lower case)
  60. and a list of additional command-line arguments for the
  61. executable. Use 'default' as the name for arguments to be
  62. passed to all downloaders. For compatibility with youtube-dl,
  63. a single list of args can also be used
  64. hls_use_mpegts: Use the mpegts container for HLS videos.
  65. http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
  66. useful for bypassing bandwidth throttling imposed by
  67. a webserver (experimental)
  68. progress_template: See YoutubeDL.py
  69. retry_sleep_functions: See YoutubeDL.py
  70. Subclasses of this one must re-define the real_download method.
  71. """
  72. _TEST_FILE_SIZE = 10241
  73. params = None
  74. def __init__(self, ydl, params):
  75. """Create a FileDownloader object with the given options."""
  76. self._set_ydl(ydl)
  77. self._progress_hooks = []
  78. self.params = params
  79. self._prepare_multiline_status()
  80. self.add_progress_hook(self.report_progress)
  81. if self.params.get('progress_delta'):
  82. self._progress_delta_lock = threading.Lock()
  83. self._progress_delta_time = time.monotonic()
  84. def _set_ydl(self, ydl):
  85. self.ydl = ydl
  86. for func in (
  87. 'deprecation_warning',
  88. 'deprecated_feature',
  89. 'report_error',
  90. 'report_file_already_downloaded',
  91. 'report_warning',
  92. 'to_console_title',
  93. 'to_stderr',
  94. 'trouble',
  95. 'write_debug',
  96. ):
  97. if not hasattr(self, func):
  98. setattr(self, func, getattr(ydl, func))
  99. def to_screen(self, *args, **kargs):
  100. self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
  101. __to_screen = to_screen
  102. @classproperty
  103. def FD_NAME(cls):
  104. return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
  105. @staticmethod
  106. def format_seconds(seconds):
  107. if seconds is None:
  108. return ' Unknown'
  109. time = timetuple_from_msec(seconds * 1000)
  110. if time.hours > 99:
  111. return '--:--:--'
  112. return '%02d:%02d:%02d' % time[:-1]
  113. @classmethod
  114. def format_eta(cls, seconds):
  115. return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
  116. @staticmethod
  117. def calc_percent(byte_counter, data_len):
  118. if data_len is None:
  119. return None
  120. return float(byte_counter) / float(data_len) * 100.0
  121. @staticmethod
  122. def format_percent(percent):
  123. return ' N/A%' if percent is None else f'{percent:>5.1f}%'
  124. @classmethod
  125. def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
  126. if total is NO_DEFAULT:
  127. rate, remaining = start_or_rate, now_or_remaining
  128. if None in (rate, remaining):
  129. return None
  130. return int(float(remaining) / rate)
  131. start, now = start_or_rate, now_or_remaining
  132. if total is None:
  133. return None
  134. if now is None:
  135. now = time.time()
  136. rate = cls.calc_speed(start, now, current)
  137. return rate and int((float(total) - float(current)) / rate)
  138. @staticmethod
  139. def calc_speed(start, now, bytes):
  140. dif = now - start
  141. if bytes == 0 or dif < 0.001: # One millisecond
  142. return None
  143. return float(bytes) / dif
  144. @staticmethod
  145. def format_speed(speed):
  146. return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
  147. @staticmethod
  148. def format_retries(retries):
  149. return 'inf' if retries == float('inf') else int(retries)
  150. @staticmethod
  151. def filesize_or_none(unencoded_filename):
  152. if os.path.isfile(unencoded_filename):
  153. return os.path.getsize(unencoded_filename)
  154. return 0
  155. @staticmethod
  156. def best_block_size(elapsed_time, bytes):
  157. new_min = max(bytes / 2.0, 1.0)
  158. new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  159. if elapsed_time < 0.001:
  160. return int(new_max)
  161. rate = bytes / elapsed_time
  162. if rate > new_max:
  163. return int(new_max)
  164. if rate < new_min:
  165. return int(new_min)
  166. return int(rate)
  167. @staticmethod
  168. def parse_bytes(bytestr):
  169. """Parse a string indicating a byte quantity into an integer."""
  170. deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
  171. 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
  172. return parse_bytes(bytestr)
  173. def slow_down(self, start_time, now, byte_counter):
  174. """Sleep if the download speed is over the rate limit."""
  175. rate_limit = self.params.get('ratelimit')
  176. if rate_limit is None or byte_counter == 0:
  177. return
  178. if now is None:
  179. now = time.time()
  180. elapsed = now - start_time
  181. if elapsed <= 0.0:
  182. return
  183. speed = float(byte_counter) / elapsed
  184. if speed > rate_limit:
  185. sleep_time = float(byte_counter) / rate_limit - elapsed
  186. if sleep_time > 0:
  187. time.sleep(sleep_time)
  188. def temp_name(self, filename):
  189. """Returns a temporary filename for the given filename."""
  190. if self.params.get('nopart', False) or filename == '-' or \
  191. (os.path.exists(filename) and not os.path.isfile(filename)):
  192. return filename
  193. return filename + '.part'
  194. def undo_temp_name(self, filename):
  195. if filename.endswith('.part'):
  196. return filename[:-len('.part')]
  197. return filename
  198. def ytdl_filename(self, filename):
  199. return filename + '.ytdl'
  200. def wrap_file_access(action, *, fatal=False):
  201. def error_callback(err, count, retries, *, fd):
  202. return RetryManager.report_retry(
  203. err, count, retries, info=fd.__to_screen,
  204. warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
  205. error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
  206. sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
  207. def wrapper(self, func, *args, **kwargs):
  208. for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
  209. try:
  210. return func(self, *args, **kwargs)
  211. except OSError as err:
  212. if err.errno in (errno.EACCES, errno.EINVAL):
  213. retry.error = err
  214. continue
  215. retry.error_callback(err, 1, 0)
  216. return functools.partial(functools.partialmethod, wrapper)
  217. @wrap_file_access('open', fatal=True)
  218. def sanitize_open(self, filename, open_mode):
  219. f, filename = sanitize_open(filename, open_mode)
  220. if not getattr(f, 'locked', None):
  221. self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
  222. return f, filename
  223. @wrap_file_access('remove')
  224. def try_remove(self, filename):
  225. if os.path.isfile(filename):
  226. os.remove(filename)
  227. @wrap_file_access('rename')
  228. def try_rename(self, old_filename, new_filename):
  229. if old_filename == new_filename:
  230. return
  231. os.replace(old_filename, new_filename)
  232. def try_utime(self, filename, last_modified_hdr):
  233. """Try to set the last-modified time of the given file."""
  234. if last_modified_hdr is None:
  235. return
  236. if not os.path.isfile(filename):
  237. return
  238. timestr = last_modified_hdr
  239. if timestr is None:
  240. return
  241. filetime = timeconvert(timestr)
  242. if filetime is None:
  243. return filetime
  244. # Ignore obviously invalid dates
  245. if filetime == 0:
  246. return
  247. with contextlib.suppress(Exception):
  248. os.utime(filename, (time.time(), filetime))
  249. return filetime
  250. def report_destination(self, filename):
  251. """Report destination filename."""
  252. self.to_screen('[download] Destination: ' + filename)
  253. def _prepare_multiline_status(self, lines=1):
  254. if self.params.get('noprogress'):
  255. self._multiline = QuietMultilinePrinter()
  256. elif self.ydl.params.get('logger'):
  257. self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
  258. elif self.params.get('progress_with_newline'):
  259. self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
  260. else:
  261. self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
  262. self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
  263. self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
  264. def _finish_multiline_status(self):
  265. self._multiline.end()
  266. ProgressStyles = Namespace(
  267. downloaded_bytes='light blue',
  268. percent='light blue',
  269. eta='yellow',
  270. speed='green',
  271. elapsed='bold white',
  272. total_bytes='',
  273. total_bytes_estimate='',
  274. )
  275. def _report_progress_status(self, s, default_template):
  276. for name, style in self.ProgressStyles.items_:
  277. name = f'_{name}_str'
  278. if name not in s:
  279. continue
  280. s[name] = self._format_progress(s[name], style)
  281. s['_default_template'] = default_template % s
  282. progress_dict = s.copy()
  283. progress_dict.pop('info_dict')
  284. progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
  285. progress_template = self.params.get('progress_template', {})
  286. self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
  287. progress_template.get('download') or '[download] %(progress._default_template)s',
  288. progress_dict), s.get('progress_idx') or 0)
  289. self.to_console_title(self.ydl.evaluate_outtmpl(
  290. progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
  291. progress_dict))
  292. def _format_progress(self, *args, **kwargs):
  293. return self.ydl._format_text(
  294. self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
  295. def report_progress(self, s):
  296. def with_fields(*tups, default=''):
  297. for *fields, tmpl in tups:
  298. if all(s.get(f) is not None for f in fields):
  299. return tmpl
  300. return default
  301. _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
  302. if s['status'] == 'finished':
  303. if self.params.get('noprogress'):
  304. self.to_screen('[download] Download completed')
  305. speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
  306. s.update({
  307. 'speed': speed,
  308. '_speed_str': self.format_speed(speed).strip(),
  309. '_total_bytes_str': _format_bytes('total_bytes'),
  310. '_elapsed_str': self.format_seconds(s.get('elapsed')),
  311. '_percent_str': self.format_percent(100),
  312. })
  313. self._report_progress_status(s, join_nonempty(
  314. '100%%',
  315. with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
  316. with_fields(('elapsed', 'in %(_elapsed_str)s')),
  317. with_fields(('speed', 'at %(_speed_str)s')),
  318. delim=' '))
  319. if s['status'] != 'downloading':
  320. return
  321. if update_delta := self.params.get('progress_delta'):
  322. with self._progress_delta_lock:
  323. if time.monotonic() < self._progress_delta_time:
  324. return
  325. self._progress_delta_time += update_delta
  326. s.update({
  327. '_eta_str': self.format_eta(s.get('eta')).strip(),
  328. '_speed_str': self.format_speed(s.get('speed')),
  329. '_percent_str': self.format_percent(try_call(
  330. lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
  331. lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
  332. lambda: s['downloaded_bytes'] == 0 and 0)),
  333. '_total_bytes_str': _format_bytes('total_bytes'),
  334. '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
  335. '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
  336. '_elapsed_str': self.format_seconds(s.get('elapsed')),
  337. })
  338. msg_template = with_fields(
  339. ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
  340. ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
  341. ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
  342. ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
  343. default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
  344. msg_template += with_fields(
  345. ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
  346. ('fragment_index', ' (frag %(fragment_index)s)'))
  347. self._report_progress_status(s, msg_template)
  348. def report_resuming_byte(self, resume_len):
  349. """Report attempt to resume at given byte."""
  350. self.to_screen(f'[download] Resuming download at byte {resume_len}')
  351. def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
  352. """Report retry"""
  353. is_frag = False if frag_index is NO_DEFAULT else 'fragment'
  354. RetryManager.report_retry(
  355. err, count, retries, info=self.__to_screen,
  356. warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
  357. error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
  358. sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
  359. suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
  360. def report_unable_to_resume(self):
  361. """Report it was impossible to resume download."""
  362. self.to_screen('[download] Unable to resume')
  363. @staticmethod
  364. def supports_manifest(manifest):
  365. """ Whether the downloader can download the fragments from the manifest.
  366. Redefine in subclasses if needed. """
  367. pass
  368. def download(self, filename, info_dict, subtitle=False):
  369. """Download to a filename using the info from info_dict
  370. Return True on success and False otherwise
  371. """
  372. nooverwrites_and_exists = (
  373. not self.params.get('overwrites', True)
  374. and os.path.exists(filename)
  375. )
  376. if not hasattr(filename, 'write'):
  377. continuedl_and_exists = (
  378. self.params.get('continuedl', True)
  379. and os.path.isfile(filename)
  380. and not self.params.get('nopart', False)
  381. )
  382. # Check file already present
  383. if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
  384. self.report_file_already_downloaded(filename)
  385. self._hook_progress({
  386. 'filename': filename,
  387. 'status': 'finished',
  388. 'total_bytes': os.path.getsize(filename),
  389. }, info_dict)
  390. self._finish_multiline_status()
  391. return True, False
  392. if subtitle:
  393. sleep_interval = self.params.get('sleep_interval_subtitles') or 0
  394. else:
  395. min_sleep_interval = self.params.get('sleep_interval') or 0
  396. sleep_interval = random.uniform(
  397. min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
  398. if sleep_interval > 0:
  399. self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
  400. time.sleep(sleep_interval)
  401. ret = self.real_download(filename, info_dict)
  402. self._finish_multiline_status()
  403. return ret, True
  404. def real_download(self, filename, info_dict):
  405. """Real download process. Redefine in subclasses."""
  406. raise NotImplementedError('This method must be implemented by subclasses')
  407. def _hook_progress(self, status, info_dict):
  408. # Ideally we want to make a copy of the dict, but that is too slow
  409. status['info_dict'] = info_dict
  410. # youtube-dl passes the same status object to all the hooks.
  411. # Some third party scripts seems to be relying on this.
  412. # So keep this behavior if possible
  413. for ph in self._progress_hooks:
  414. ph(status)
  415. def add_progress_hook(self, ph):
  416. # See YoutubeDl.py (search for progress_hooks) for a description of
  417. # this interface
  418. self._progress_hooks.append(ph)
  419. def _debug_cmd(self, args, exe=None):
  420. if not self.params.get('verbose', False):
  421. return
  422. if exe is None:
  423. exe = os.path.basename(args[0])
  424. self.write_debug(f'{exe} command line: {shell_quote(args)}')