logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

external.py (21625B)


  1. from __future__ import unicode_literals
  2. import os
  3. import re
  4. import subprocess
  5. import sys
  6. import tempfile
  7. import time
  8. from .common import FileDownloader
  9. from ..compat import (
  10. compat_setenv,
  11. compat_str,
  12. compat_subprocess_Popen,
  13. )
  14. try:
  15. from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
  16. except ImportError:
  17. FFmpegPostProcessor = None
  18. from ..utils import (
  19. cli_option,
  20. cli_valueless_option,
  21. cli_bool_option,
  22. cli_configuration_args,
  23. encodeFilename,
  24. encodeArgument,
  25. handle_youtubedl_headers,
  26. check_executable,
  27. is_outdated_version,
  28. process_communicate_or_kill,
  29. T,
  30. traverse_obj,
  31. )
  32. class ExternalFD(FileDownloader):
  33. def real_download(self, filename, info_dict):
  34. self.report_destination(filename)
  35. tmpfilename = self.temp_name(filename)
  36. self._cookies_tempfile = None
  37. try:
  38. started = time.time()
  39. retval = self._call_downloader(tmpfilename, info_dict)
  40. except KeyboardInterrupt:
  41. if not info_dict.get('is_live'):
  42. raise
  43. # Live stream downloading cancellation should be considered as
  44. # correct and expected termination thus all postprocessing
  45. # should take place
  46. retval = 0
  47. self.to_screen('[%s] Interrupted by user' % self.get_basename())
  48. finally:
  49. if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
  50. try:
  51. os.remove(self._cookies_tempfile)
  52. except OSError:
  53. self.report_warning(
  54. 'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
  55. if retval == 0:
  56. status = {
  57. 'filename': filename,
  58. 'status': 'finished',
  59. 'elapsed': time.time() - started,
  60. }
  61. if filename != '-':
  62. fsize = os.path.getsize(encodeFilename(tmpfilename))
  63. self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
  64. self.try_rename(tmpfilename, filename)
  65. status.update({
  66. 'downloaded_bytes': fsize,
  67. 'total_bytes': fsize,
  68. })
  69. self._hook_progress(status)
  70. return True
  71. else:
  72. self.to_stderr('\n')
  73. self.report_error('%s exited with code %d' % (
  74. self.get_basename(), retval))
  75. return False
  76. @classmethod
  77. def get_basename(cls):
  78. return cls.__name__[:-2].lower()
  79. @property
  80. def exe(self):
  81. return self.params.get('external_downloader')
  82. @classmethod
  83. def available(cls):
  84. return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
  85. @classmethod
  86. def supports(cls, info_dict):
  87. return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
  88. @classmethod
  89. def can_download(cls, info_dict):
  90. return cls.available() and cls.supports(info_dict)
  91. def _option(self, command_option, param):
  92. return cli_option(self.params, command_option, param)
  93. def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
  94. return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
  95. def _valueless_option(self, command_option, param, expected_value=True):
  96. return cli_valueless_option(self.params, command_option, param, expected_value)
  97. def _configuration_args(self, default=[]):
  98. return cli_configuration_args(self.params, 'external_downloader_args', default)
  99. def _write_cookies(self):
  100. if not self.ydl.cookiejar.filename:
  101. tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
  102. tmp_cookies.close()
  103. self._cookies_tempfile = tmp_cookies.name
  104. self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
  105. # real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
  106. self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
  107. return self.ydl.cookiejar.filename or self._cookies_tempfile
  108. def _call_downloader(self, tmpfilename, info_dict):
  109. """ Either overwrite this or implement _make_cmd """
  110. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  111. self._debug_cmd(cmd)
  112. p = subprocess.Popen(
  113. cmd, stderr=subprocess.PIPE)
  114. _, stderr = process_communicate_or_kill(p)
  115. if p.returncode != 0:
  116. self.to_stderr(stderr.decode('utf-8', 'replace'))
  117. return p.returncode
  118. @staticmethod
  119. def _header_items(info_dict):
  120. return traverse_obj(
  121. info_dict, ('http_headers', T(dict.items), Ellipsis))
  122. class CurlFD(ExternalFD):
  123. AVAILABLE_OPT = '-V'
  124. def _make_cmd(self, tmpfilename, info_dict):
  125. cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
  126. cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
  127. if cookie_header:
  128. cmd += ['--cookie', cookie_header]
  129. for key, val in self._header_items(info_dict):
  130. cmd += ['--header', '%s: %s' % (key, val)]
  131. cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
  132. cmd += self._valueless_option('--silent', 'noprogress')
  133. cmd += self._valueless_option('--verbose', 'verbose')
  134. cmd += self._option('--limit-rate', 'ratelimit')
  135. retry = self._option('--retry', 'retries')
  136. if len(retry) == 2:
  137. if retry[1] in ('inf', 'infinite'):
  138. retry[1] = '2147483647'
  139. cmd += retry
  140. cmd += self._option('--max-filesize', 'max_filesize')
  141. cmd += self._option('--interface', 'source_address')
  142. cmd += self._option('--proxy', 'proxy')
  143. cmd += self._valueless_option('--insecure', 'nocheckcertificate')
  144. cmd += self._configuration_args()
  145. cmd += ['--', info_dict['url']]
  146. return cmd
  147. def _call_downloader(self, tmpfilename, info_dict):
  148. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  149. self._debug_cmd(cmd)
  150. # curl writes the progress to stderr so don't capture it.
  151. p = subprocess.Popen(cmd)
  152. process_communicate_or_kill(p)
  153. return p.returncode
  154. class AxelFD(ExternalFD):
  155. AVAILABLE_OPT = '-V'
  156. def _make_cmd(self, tmpfilename, info_dict):
  157. cmd = [self.exe, '-o', tmpfilename]
  158. for key, val in self._header_items(info_dict):
  159. cmd += ['-H', '%s: %s' % (key, val)]
  160. cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
  161. if cookie_header:
  162. cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
  163. cmd += self._configuration_args()
  164. cmd += ['--', info_dict['url']]
  165. return cmd
  166. class WgetFD(ExternalFD):
  167. AVAILABLE_OPT = '--version'
  168. def _make_cmd(self, tmpfilename, info_dict):
  169. cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
  170. if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
  171. cmd += ['--load-cookies', self._write_cookies()]
  172. for key, val in self._header_items(info_dict):
  173. cmd += ['--header', '%s: %s' % (key, val)]
  174. cmd += self._option('--limit-rate', 'ratelimit')
  175. retry = self._option('--tries', 'retries')
  176. if len(retry) == 2:
  177. if retry[1] in ('inf', 'infinite'):
  178. retry[1] = '0'
  179. cmd += retry
  180. cmd += self._option('--bind-address', 'source_address')
  181. proxy = self.params.get('proxy')
  182. if proxy:
  183. for var in ('http_proxy', 'https_proxy'):
  184. cmd += ['--execute', '%s=%s' % (var, proxy)]
  185. cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
  186. cmd += self._configuration_args()
  187. cmd += ['--', info_dict['url']]
  188. return cmd
  189. class Aria2cFD(ExternalFD):
  190. AVAILABLE_OPT = '-v'
  191. @staticmethod
  192. def _aria2c_filename(fn):
  193. return fn if os.path.isabs(fn) else os.path.join('.', fn)
  194. def _make_cmd(self, tmpfilename, info_dict):
  195. cmd = [self.exe, '-c',
  196. '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
  197. '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
  198. if 'fragments' in info_dict:
  199. cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
  200. else:
  201. cmd += ['--min-split-size', '1M']
  202. if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
  203. cmd += ['--load-cookies={0}'.format(self._write_cookies())]
  204. for key, val in self._header_items(info_dict):
  205. cmd += ['--header', '%s: %s' % (key, val)]
  206. cmd += self._configuration_args(['--max-connection-per-server', '4'])
  207. cmd += ['--out', os.path.basename(tmpfilename)]
  208. cmd += self._option('--max-overall-download-limit', 'ratelimit')
  209. cmd += self._option('--interface', 'source_address')
  210. cmd += self._option('--all-proxy', 'proxy')
  211. cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
  212. cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
  213. cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
  214. cmd += self._configuration_args()
  215. # aria2c strips out spaces from the beginning/end of filenames and paths.
  216. # We work around this issue by adding a "./" to the beginning of the
  217. # filename and relative path, and adding a "/" at the end of the path.
  218. # See: https://github.com/yt-dlp/yt-dlp/issues/276
  219. # https://github.com/ytdl-org/youtube-dl/issues/20312
  220. # https://github.com/aria2/aria2/issues/1373
  221. dn = os.path.dirname(tmpfilename)
  222. if dn:
  223. cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
  224. if 'fragments' not in info_dict:
  225. cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
  226. cmd += ['--auto-file-renaming=false']
  227. if 'fragments' in info_dict:
  228. cmd += ['--file-allocation=none', '--uri-selector=inorder']
  229. url_list_file = '%s.frag.urls' % (tmpfilename, )
  230. url_list = []
  231. for frag_index, fragment in enumerate(info_dict['fragments']):
  232. fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
  233. url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
  234. stream, _ = self.sanitize_open(url_list_file, 'wb')
  235. stream.write('\n'.join(url_list).encode())
  236. stream.close()
  237. cmd += ['-i', self._aria2c_filename(url_list_file)]
  238. else:
  239. cmd += ['--', info_dict['url']]
  240. return cmd
  241. class Aria2pFD(ExternalFD):
  242. ''' Aria2pFD class
  243. This class support to use aria2p as downloader.
  244. (Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
  245. through JSON-RPC.)
  246. It can help you to get download progress more easily.
  247. To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
  248. Then run aria2c in the background and enable with the --enable-rpc option.
  249. '''
  250. try:
  251. import aria2p
  252. __avail = True
  253. except ImportError:
  254. __avail = False
  255. @classmethod
  256. def available(cls):
  257. return cls.__avail
  258. def _call_downloader(self, tmpfilename, info_dict):
  259. aria2 = self.aria2p.API(
  260. self.aria2p.Client(
  261. host='http://localhost',
  262. port=6800,
  263. secret=''
  264. )
  265. )
  266. options = {
  267. 'min-split-size': '1M',
  268. 'max-connection-per-server': 4,
  269. 'auto-file-renaming': 'false',
  270. }
  271. options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
  272. options['out'] = os.path.basename(tmpfilename)
  273. if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
  274. options['load-cookies'] = self._write_cookies()
  275. options['header'] = []
  276. for key, val in self._header_items(info_dict):
  277. options['header'].append('{0}: {1}'.format(key, val))
  278. download = aria2.add_uris([info_dict['url']], options)
  279. status = {
  280. 'status': 'downloading',
  281. 'tmpfilename': tmpfilename,
  282. }
  283. started = time.time()
  284. while download.status in ['active', 'waiting']:
  285. download = aria2.get_download(download.gid)
  286. status.update({
  287. 'downloaded_bytes': download.completed_length,
  288. 'total_bytes': download.total_length,
  289. 'elapsed': time.time() - started,
  290. 'eta': download.eta.total_seconds(),
  291. 'speed': download.download_speed,
  292. })
  293. self._hook_progress(status)
  294. time.sleep(.5)
  295. return download.status != 'complete'
  296. class HttpieFD(ExternalFD):
  297. @classmethod
  298. def available(cls):
  299. return check_executable('http', ['--version'])
  300. def _make_cmd(self, tmpfilename, info_dict):
  301. cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
  302. for key, val in self._header_items(info_dict):
  303. cmd += ['%s:%s' % (key, val)]
  304. # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
  305. # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
  306. # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
  307. # 2: https://httpie.io/docs/cli/sessions
  308. cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
  309. if cookie_header:
  310. cmd += ['Cookie:%s' % cookie_header]
  311. return cmd
  312. class FFmpegFD(ExternalFD):
  313. @classmethod
  314. def supports(cls, info_dict):
  315. return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
  316. @classmethod
  317. def available(cls):
  318. # actual availability can only be confirmed for an instance
  319. return bool(FFmpegPostProcessor)
  320. def _call_downloader(self, tmpfilename, info_dict):
  321. # `downloader` means the parent `YoutubeDL`
  322. ffpp = FFmpegPostProcessor(downloader=self.ydl)
  323. if not ffpp.available:
  324. self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
  325. return False
  326. ffpp.check_version()
  327. args = [ffpp.executable, '-y']
  328. for log_level in ('quiet', 'verbose'):
  329. if self.params.get(log_level, False):
  330. args += ['-loglevel', log_level]
  331. break
  332. seekable = info_dict.get('_seekable')
  333. if seekable is not None:
  334. # setting -seekable prevents ffmpeg from guessing if the server
  335. # supports seeking(by adding the header `Range: bytes=0-`), which
  336. # can cause problems in some cases
  337. # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
  338. # http://trac.ffmpeg.org/ticket/6125#comment:10
  339. args += ['-seekable', '1' if seekable else '0']
  340. args += self._configuration_args()
  341. # start_time = info_dict.get('start_time') or 0
  342. # if start_time:
  343. # args += ['-ss', compat_str(start_time)]
  344. # end_time = info_dict.get('end_time')
  345. # if end_time:
  346. # args += ['-t', compat_str(end_time - start_time)]
  347. url = info_dict['url']
  348. cookies = self.ydl.cookiejar.get_cookies_for_url(url)
  349. if cookies:
  350. args.extend(['-cookies', ''.join(
  351. '{0}={1}; path={2}; domain={3};\r\n'.format(
  352. cookie.name, cookie.value, cookie.path, cookie.domain)
  353. for cookie in cookies)])
  354. if info_dict.get('http_headers') and re.match(r'^https?://', url):
  355. # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
  356. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
  357. headers = handle_youtubedl_headers(info_dict['http_headers'])
  358. args += [
  359. '-headers',
  360. ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
  361. env = None
  362. proxy = self.params.get('proxy')
  363. if proxy:
  364. if not re.match(r'^[\da-zA-Z]+://', proxy):
  365. proxy = 'http://%s' % proxy
  366. if proxy.startswith('socks'):
  367. self.report_warning(
  368. '%s does not support SOCKS proxies. Downloading is likely to fail. '
  369. 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
  370. # Since December 2015 ffmpeg supports -http_proxy option (see
  371. # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
  372. # We could switch to the following code if we are able to detect version properly
  373. # args += ['-http_proxy', proxy]
  374. env = os.environ.copy()
  375. compat_setenv('HTTP_PROXY', proxy, env=env)
  376. compat_setenv('http_proxy', proxy, env=env)
  377. protocol = info_dict.get('protocol')
  378. if protocol == 'rtmp':
  379. player_url = info_dict.get('player_url')
  380. page_url = info_dict.get('page_url')
  381. app = info_dict.get('app')
  382. play_path = info_dict.get('play_path')
  383. tc_url = info_dict.get('tc_url')
  384. flash_version = info_dict.get('flash_version')
  385. live = info_dict.get('rtmp_live', False)
  386. conn = info_dict.get('rtmp_conn')
  387. if player_url is not None:
  388. args += ['-rtmp_swfverify', player_url]
  389. if page_url is not None:
  390. args += ['-rtmp_pageurl', page_url]
  391. if app is not None:
  392. args += ['-rtmp_app', app]
  393. if play_path is not None:
  394. args += ['-rtmp_playpath', play_path]
  395. if tc_url is not None:
  396. args += ['-rtmp_tcurl', tc_url]
  397. if flash_version is not None:
  398. args += ['-rtmp_flashver', flash_version]
  399. if live:
  400. args += ['-rtmp_live', 'live']
  401. if isinstance(conn, list):
  402. for entry in conn:
  403. args += ['-rtmp_conn', entry]
  404. elif isinstance(conn, compat_str):
  405. args += ['-rtmp_conn', conn]
  406. args += ['-i', url, '-c', 'copy']
  407. if self.params.get('test', False):
  408. args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
  409. if protocol in ('m3u8', 'm3u8_native'):
  410. if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
  411. args += ['-f', 'mpegts']
  412. else:
  413. args += ['-f', 'mp4']
  414. if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
  415. args += ['-bsf:a', 'aac_adtstoasc']
  416. elif protocol == 'rtmp':
  417. args += ['-f', 'flv']
  418. else:
  419. args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
  420. args = [encodeArgument(opt) for opt in args]
  421. args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
  422. self._debug_cmd(args)
  423. # From [1], a PIPE opened in Popen() should be closed, unless
  424. # .communicate() is called. Avoid leaking any PIPEs by using Popen
  425. # as a context manager (newer Python 3.x and compat)
  426. # Fixes "Resource Warning" in test/test_downloader_external.py
  427. # [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
  428. with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
  429. try:
  430. retval = proc.wait()
  431. except BaseException as e:
  432. # subprocess.run would send the SIGKILL signal to ffmpeg and the
  433. # mp4 file couldn't be played, but if we ask ffmpeg to quit it
  434. # produces a file that is playable (this is mostly useful for live
  435. # streams). Note that Windows is not affected and produces playable
  436. # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
  437. if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
  438. process_communicate_or_kill(proc, b'q')
  439. else:
  440. proc.kill()
  441. raise
  442. return retval
  443. class AVconvFD(FFmpegFD):
  444. pass
  445. _BY_NAME = dict(
  446. (klass.get_basename(), klass)
  447. for name, klass in globals().items()
  448. if name.endswith('FD') and name != 'ExternalFD'
  449. )
  450. def list_external_downloaders():
  451. return sorted(_BY_NAME.keys())
  452. def get_external_downloader(external_downloader):
  453. """ Given the name of the executable, see whether we support the given
  454. downloader . """
  455. # Drop .exe extension on Windows
  456. bn = os.path.splitext(os.path.basename(external_downloader))[0]
  457. return _BY_NAME[bn]