logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

crunchyroll.py (28745B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. import zlib
  6. from hashlib import sha1
  7. from math import pow, sqrt, floor
  8. from .common import InfoExtractor
  9. from .vrv import VRVIE
  10. from ..compat import (
  11. compat_b64decode,
  12. compat_etree_Element,
  13. compat_etree_fromstring,
  14. compat_str,
  15. compat_urllib_parse_urlencode,
  16. compat_urllib_request,
  17. compat_urlparse,
  18. )
  19. from ..utils import (
  20. ExtractorError,
  21. bytes_to_intlist,
  22. extract_attributes,
  23. float_or_none,
  24. intlist_to_bytes,
  25. int_or_none,
  26. lowercase_escape,
  27. merge_dicts,
  28. remove_end,
  29. sanitized_Request,
  30. urlencode_postdata,
  31. xpath_text,
  32. )
  33. from ..aes import (
  34. aes_cbc_decrypt,
  35. )
  36. class CrunchyrollBaseIE(InfoExtractor):
  37. _LOGIN_URL = 'https://www.crunchyroll.com/login'
  38. _LOGIN_FORM = 'login_form'
  39. _NETRC_MACHINE = 'crunchyroll'
  40. def _call_rpc_api(self, method, video_id, note=None, data=None):
  41. data = data or {}
  42. data['req'] = 'RpcApi' + method
  43. data = compat_urllib_parse_urlencode(data).encode('utf-8')
  44. return self._download_xml(
  45. 'https://www.crunchyroll.com/xml/',
  46. video_id, note, fatal=False, data=data, headers={
  47. 'Content-Type': 'application/x-www-form-urlencoded',
  48. })
  49. def _login(self):
  50. username, password = self._get_login_info()
  51. if username is None:
  52. return
  53. login_page = self._download_webpage(
  54. self._LOGIN_URL, None, 'Downloading login page')
  55. def is_logged(webpage):
  56. return 'href="/logout"' in webpage
  57. # Already logged in
  58. if is_logged(login_page):
  59. return
  60. login_form_str = self._search_regex(
  61. r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
  62. login_page, 'login form', group='form')
  63. post_url = extract_attributes(login_form_str).get('action')
  64. if not post_url:
  65. post_url = self._LOGIN_URL
  66. elif not post_url.startswith('http'):
  67. post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
  68. login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
  69. login_form.update({
  70. 'login_form[name]': username,
  71. 'login_form[password]': password,
  72. })
  73. response = self._download_webpage(
  74. post_url, None, 'Logging in', 'Wrong login info',
  75. data=urlencode_postdata(login_form),
  76. headers={'Content-Type': 'application/x-www-form-urlencoded'})
  77. # Successful login
  78. if is_logged(response):
  79. return
  80. error = self._html_search_regex(
  81. '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
  82. response, 'error message', default=None)
  83. if error:
  84. raise ExtractorError('Unable to login: %s' % error, expected=True)
  85. raise ExtractorError('Unable to log in')
  86. def _real_initialize(self):
  87. self._login()
  88. @staticmethod
  89. def _add_skip_wall(url):
  90. parsed_url = compat_urlparse.urlparse(url)
  91. qs = compat_urlparse.parse_qs(parsed_url.query)
  92. # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
  93. # > This content may be inappropriate for some people.
  94. # > Are you sure you want to continue?
  95. # since it's not disabled by default in crunchyroll account's settings.
  96. # See https://github.com/ytdl-org/youtube-dl/issues/7202.
  97. qs['skip_wall'] = ['1']
  98. return compat_urlparse.urlunparse(
  99. parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
  100. class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
  101. IE_NAME = 'crunchyroll'
  102. _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
  103. _TESTS = [{
  104. 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
  105. 'info_dict': {
  106. 'id': '645513',
  107. 'ext': 'mp4',
  108. 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
  109. 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
  110. 'thumbnail': r're:^https?://.*\.jpg$',
  111. 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
  112. 'upload_date': '20131013',
  113. 'url': 're:(?!.*&amp)',
  114. },
  115. 'params': {
  116. # rtmp
  117. 'skip_download': True,
  118. },
  119. 'skip': 'Video gone',
  120. }, {
  121. 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
  122. 'info_dict': {
  123. 'id': '589804',
  124. 'ext': 'flv',
  125. 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
  126. 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
  127. 'thumbnail': r're:^https?://.*\.jpg$',
  128. 'uploader': 'Danny Choo Network',
  129. 'upload_date': '20120213',
  130. },
  131. 'params': {
  132. # rtmp
  133. 'skip_download': True,
  134. },
  135. 'skip': 'Video gone',
  136. }, {
  137. 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
  138. 'info_dict': {
  139. 'id': '702409',
  140. 'ext': 'mp4',
  141. 'title': compat_str,
  142. 'description': compat_str,
  143. 'thumbnail': r're:^https?://.*\.jpg$',
  144. 'uploader': 'Re:Zero Partners',
  145. 'timestamp': 1462098900,
  146. 'upload_date': '20160501',
  147. },
  148. 'params': {
  149. # m3u8 download
  150. 'skip_download': True,
  151. },
  152. }, {
  153. 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
  154. 'info_dict': {
  155. 'id': '727589',
  156. 'ext': 'mp4',
  157. 'title': compat_str,
  158. 'description': compat_str,
  159. 'thumbnail': r're:^https?://.*\.jpg$',
  160. 'uploader': 'Kadokawa Pictures Inc.',
  161. 'timestamp': 1484130900,
  162. 'upload_date': '20170111',
  163. 'series': compat_str,
  164. 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
  165. 'season_number': 2,
  166. 'episode': 'Give Me Deliverance From This Judicial Injustice!',
  167. 'episode_number': 1,
  168. },
  169. 'params': {
  170. # m3u8 download
  171. 'skip_download': True,
  172. },
  173. }, {
  174. 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
  175. 'only_matching': True,
  176. }, {
  177. # geo-restricted (US), 18+ maturity wall, non-premium available
  178. 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
  179. 'only_matching': True,
  180. }, {
  181. # A description with double quotes
  182. 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
  183. 'info_dict': {
  184. 'id': '535080',
  185. 'ext': 'mp4',
  186. 'title': compat_str,
  187. 'description': compat_str,
  188. 'uploader': 'Marvelous AQL Inc.',
  189. 'timestamp': 1255512600,
  190. 'upload_date': '20091014',
  191. },
  192. 'params': {
  193. # Just test metadata extraction
  194. 'skip_download': True,
  195. },
  196. }, {
  197. # make sure we can extract an uploader name that's not a link
  198. 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
  199. 'info_dict': {
  200. 'id': '606899',
  201. 'ext': 'mp4',
  202. 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
  203. 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
  204. 'uploader': 'Geneon Entertainment',
  205. 'upload_date': '20120717',
  206. },
  207. 'params': {
  208. # just test metadata extraction
  209. 'skip_download': True,
  210. },
  211. 'skip': 'Video gone',
  212. }, {
  213. # A video with a vastly different season name compared to the series name
  214. 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
  215. 'info_dict': {
  216. 'id': '590532',
  217. 'ext': 'mp4',
  218. 'title': compat_str,
  219. 'description': compat_str,
  220. 'uploader': 'TV TOKYO',
  221. 'timestamp': 1330956000,
  222. 'upload_date': '20120305',
  223. 'series': 'Nyarko-san: Another Crawling Chaos',
  224. 'season': 'Haiyoru! Nyaruani (ONA)',
  225. },
  226. 'params': {
  227. # Just test metadata extraction
  228. 'skip_download': True,
  229. },
  230. }, {
  231. 'url': 'http://www.crunchyroll.com/media-723735',
  232. 'only_matching': True,
  233. }, {
  234. 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
  235. 'only_matching': True,
  236. }]
  237. _FORMAT_IDS = {
  238. '360': ('60', '106'),
  239. '480': ('61', '106'),
  240. '720': ('62', '106'),
  241. '1080': ('80', '108'),
  242. }
  243. def _download_webpage(self, url_or_request, *args, **kwargs):
  244. request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
  245. else sanitized_Request(url_or_request))
  246. # Accept-Language must be set explicitly to accept any language to avoid issues
  247. # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
  248. # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
  249. # should be imposed or not (from what I can see it just takes the first language
  250. # ignoring the priority and requires it to correspond the IP). By the way this causes
  251. # Crunchyroll to not work in georestriction cases in some browsers that don't place
  252. # the locale lang first in header. However allowing any language seems to workaround the issue.
  253. request.add_header('Accept-Language', '*')
  254. return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
  255. def _decrypt_subtitles(self, data, iv, id):
  256. data = bytes_to_intlist(compat_b64decode(data))
  257. iv = bytes_to_intlist(compat_b64decode(iv))
  258. id = int(id)
  259. def obfuscate_key_aux(count, modulo, start):
  260. output = list(start)
  261. for _ in range(count):
  262. output.append(output[-1] + output[-2])
  263. # cut off start values
  264. output = output[2:]
  265. output = list(map(lambda x: x % modulo + 33, output))
  266. return output
  267. def obfuscate_key(key):
  268. num1 = int(floor(pow(2, 25) * sqrt(6.9)))
  269. num2 = (num1 ^ key) << 5
  270. num3 = key ^ num1
  271. num4 = num3 ^ (num3 >> 3) ^ num2
  272. prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
  273. shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
  274. # Extend 160 Bit hash to 256 Bit
  275. return shaHash + [0] * 12
  276. key = obfuscate_key(id)
  277. decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
  278. return zlib.decompress(decrypted_data)
  279. def _convert_subtitles_to_srt(self, sub_root):
  280. output = ''
  281. for i, event in enumerate(sub_root.findall('./events/event'), 1):
  282. start = event.attrib['start'].replace('.', ',')
  283. end = event.attrib['end'].replace('.', ',')
  284. text = event.attrib['text'].replace('\\N', '\n')
  285. output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
  286. return output
  287. def _convert_subtitles_to_ass(self, sub_root):
  288. output = ''
  289. def ass_bool(strvalue):
  290. assvalue = '0'
  291. if strvalue == '1':
  292. assvalue = '-1'
  293. return assvalue
  294. output = '[Script Info]\n'
  295. output += 'Title: %s\n' % sub_root.attrib['title']
  296. output += 'ScriptType: v4.00+\n'
  297. output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
  298. output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
  299. output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
  300. output += """
  301. [V4+ Styles]
  302. Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
  303. """
  304. for style in sub_root.findall('./styles/style'):
  305. output += 'Style: ' + style.attrib['name']
  306. output += ',' + style.attrib['font_name']
  307. output += ',' + style.attrib['font_size']
  308. output += ',' + style.attrib['primary_colour']
  309. output += ',' + style.attrib['secondary_colour']
  310. output += ',' + style.attrib['outline_colour']
  311. output += ',' + style.attrib['back_colour']
  312. output += ',' + ass_bool(style.attrib['bold'])
  313. output += ',' + ass_bool(style.attrib['italic'])
  314. output += ',' + ass_bool(style.attrib['underline'])
  315. output += ',' + ass_bool(style.attrib['strikeout'])
  316. output += ',' + style.attrib['scale_x']
  317. output += ',' + style.attrib['scale_y']
  318. output += ',' + style.attrib['spacing']
  319. output += ',' + style.attrib['angle']
  320. output += ',' + style.attrib['border_style']
  321. output += ',' + style.attrib['outline']
  322. output += ',' + style.attrib['shadow']
  323. output += ',' + style.attrib['alignment']
  324. output += ',' + style.attrib['margin_l']
  325. output += ',' + style.attrib['margin_r']
  326. output += ',' + style.attrib['margin_v']
  327. output += ',' + style.attrib['encoding']
  328. output += '\n'
  329. output += """
  330. [Events]
  331. Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
  332. """
  333. for event in sub_root.findall('./events/event'):
  334. output += 'Dialogue: 0'
  335. output += ',' + event.attrib['start']
  336. output += ',' + event.attrib['end']
  337. output += ',' + event.attrib['style']
  338. output += ',' + event.attrib['name']
  339. output += ',' + event.attrib['margin_l']
  340. output += ',' + event.attrib['margin_r']
  341. output += ',' + event.attrib['margin_v']
  342. output += ',' + event.attrib['effect']
  343. output += ',' + event.attrib['text']
  344. output += '\n'
  345. return output
  346. def _extract_subtitles(self, subtitle):
  347. sub_root = compat_etree_fromstring(subtitle)
  348. return [{
  349. 'ext': 'srt',
  350. 'data': self._convert_subtitles_to_srt(sub_root),
  351. }, {
  352. 'ext': 'ass',
  353. 'data': self._convert_subtitles_to_ass(sub_root),
  354. }]
  355. def _get_subtitles(self, video_id, webpage):
  356. subtitles = {}
  357. for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
  358. sub_doc = self._call_rpc_api(
  359. 'Subtitle_GetXml', video_id,
  360. 'Downloading subtitles for ' + sub_name, data={
  361. 'subtitle_script_id': sub_id,
  362. })
  363. if not isinstance(sub_doc, compat_etree_Element):
  364. continue
  365. sid = sub_doc.get('id')
  366. iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
  367. data = xpath_text(sub_doc, 'data', 'subtitle data')
  368. if not sid or not iv or not data:
  369. continue
  370. subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
  371. lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
  372. if not lang_code:
  373. continue
  374. subtitles[lang_code] = self._extract_subtitles(subtitle)
  375. return subtitles
  376. def _real_extract(self, url):
  377. mobj = re.match(self._VALID_URL, url)
  378. video_id = mobj.group('video_id')
  379. if mobj.group('prefix') == 'm':
  380. mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
  381. webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
  382. else:
  383. webpage_url = 'http://www.' + mobj.group('url')
  384. webpage = self._download_webpage(
  385. self._add_skip_wall(webpage_url), video_id,
  386. headers=self.geo_verification_headers())
  387. note_m = self._html_search_regex(
  388. r'<div class="showmedia-trailer-notice">(.+?)</div>',
  389. webpage, 'trailer-notice', default='')
  390. if note_m:
  391. raise ExtractorError(note_m)
  392. mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
  393. if mobj:
  394. msg = json.loads(mobj.group('msg'))
  395. if msg.get('type') == 'error':
  396. raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
  397. if 'To view this, please log in to verify you are 18 or older.' in webpage:
  398. self.raise_login_required()
  399. media = self._parse_json(self._search_regex(
  400. r'vilos\.config\.media\s*=\s*({.+?});',
  401. webpage, 'vilos media', default='{}'), video_id)
  402. media_metadata = media.get('metadata') or {}
  403. language = self._search_regex(
  404. r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
  405. webpage, 'language', default=None, group='lang')
  406. video_title = self._html_search_regex(
  407. (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
  408. r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
  409. webpage, 'video_title', default=None)
  410. if not video_title:
  411. video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
  412. video_title = re.sub(r' {2,}', ' ', video_title)
  413. video_description = (self._parse_json(self._html_search_regex(
  414. r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
  415. webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
  416. if video_description:
  417. video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
  418. video_uploader = self._html_search_regex(
  419. # try looking for both an uploader that's a link and one that's not
  420. [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
  421. webpage, 'video_uploader', default=False)
  422. formats = []
  423. for stream in media.get('streams', []):
  424. audio_lang = stream.get('audio_lang')
  425. hardsub_lang = stream.get('hardsub_lang')
  426. vrv_formats = self._extract_vrv_formats(
  427. stream.get('url'), video_id, stream.get('format'),
  428. audio_lang, hardsub_lang)
  429. for f in vrv_formats:
  430. if not hardsub_lang:
  431. f['preference'] = 1
  432. language_preference = 0
  433. if audio_lang == language:
  434. language_preference += 1
  435. if hardsub_lang == language:
  436. language_preference += 1
  437. if language_preference:
  438. f['language_preference'] = language_preference
  439. formats.extend(vrv_formats)
  440. if not formats:
  441. available_fmts = []
  442. for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
  443. attrs = extract_attributes(a)
  444. href = attrs.get('href')
  445. if href and '/freetrial' in href:
  446. continue
  447. available_fmts.append(fmt)
  448. if not available_fmts:
  449. for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
  450. available_fmts = re.findall(p, webpage)
  451. if available_fmts:
  452. break
  453. if not available_fmts:
  454. available_fmts = self._FORMAT_IDS.keys()
  455. video_encode_ids = []
  456. for fmt in available_fmts:
  457. stream_quality, stream_format = self._FORMAT_IDS[fmt]
  458. video_format = fmt + 'p'
  459. stream_infos = []
  460. streamdata = self._call_rpc_api(
  461. 'VideoPlayer_GetStandardConfig', video_id,
  462. 'Downloading media info for %s' % video_format, data={
  463. 'media_id': video_id,
  464. 'video_format': stream_format,
  465. 'video_quality': stream_quality,
  466. 'current_page': url,
  467. })
  468. if isinstance(streamdata, compat_etree_Element):
  469. stream_info = streamdata.find('./{default}preload/stream_info')
  470. if stream_info is not None:
  471. stream_infos.append(stream_info)
  472. stream_info = self._call_rpc_api(
  473. 'VideoEncode_GetStreamInfo', video_id,
  474. 'Downloading stream info for %s' % video_format, data={
  475. 'media_id': video_id,
  476. 'video_format': stream_format,
  477. 'video_encode_quality': stream_quality,
  478. })
  479. if isinstance(stream_info, compat_etree_Element):
  480. stream_infos.append(stream_info)
  481. for stream_info in stream_infos:
  482. video_encode_id = xpath_text(stream_info, './video_encode_id')
  483. if video_encode_id in video_encode_ids:
  484. continue
  485. video_encode_ids.append(video_encode_id)
  486. video_file = xpath_text(stream_info, './file')
  487. if not video_file:
  488. continue
  489. if video_file.startswith('http'):
  490. formats.extend(self._extract_m3u8_formats(
  491. video_file, video_id, 'mp4', entry_protocol='m3u8_native',
  492. m3u8_id='hls', fatal=False))
  493. continue
  494. video_url = xpath_text(stream_info, './host')
  495. if not video_url:
  496. continue
  497. metadata = stream_info.find('./metadata')
  498. format_info = {
  499. 'format': video_format,
  500. 'height': int_or_none(xpath_text(metadata, './height')),
  501. 'width': int_or_none(xpath_text(metadata, './width')),
  502. }
  503. if '.fplive.net/' in video_url:
  504. video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
  505. parsed_video_url = compat_urlparse.urlparse(video_url)
  506. direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
  507. netloc='v.lvlt.crcdn.net',
  508. path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
  509. if self._is_valid_url(direct_video_url, video_id, video_format):
  510. format_info.update({
  511. 'format_id': 'http-' + video_format,
  512. 'url': direct_video_url,
  513. })
  514. formats.append(format_info)
  515. continue
  516. format_info.update({
  517. 'format_id': 'rtmp-' + video_format,
  518. 'url': video_url,
  519. 'play_path': video_file,
  520. 'ext': 'flv',
  521. })
  522. formats.append(format_info)
  523. self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
  524. metadata = self._call_rpc_api(
  525. 'VideoPlayer_GetMediaMetadata', video_id,
  526. note='Downloading media info', data={
  527. 'media_id': video_id,
  528. })
  529. subtitles = {}
  530. for subtitle in media.get('subtitles', []):
  531. subtitle_url = subtitle.get('url')
  532. if not subtitle_url:
  533. continue
  534. subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
  535. 'url': subtitle_url,
  536. 'ext': subtitle.get('format', 'ass'),
  537. })
  538. if not subtitles:
  539. subtitles = self.extract_subtitles(video_id, webpage)
  540. # webpage provide more accurate data than series_title from XML
  541. series = self._html_search_regex(
  542. r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
  543. webpage, 'series', fatal=False)
  544. season = episode = episode_number = duration = thumbnail = None
  545. if isinstance(metadata, compat_etree_Element):
  546. season = xpath_text(metadata, 'series_title')
  547. episode = xpath_text(metadata, 'episode_title')
  548. episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
  549. duration = float_or_none(media_metadata.get('duration'), 1000)
  550. thumbnail = xpath_text(metadata, 'episode_image_url')
  551. if not episode:
  552. episode = media_metadata.get('title')
  553. if not episode_number:
  554. episode_number = int_or_none(media_metadata.get('episode_number'))
  555. if not thumbnail:
  556. thumbnail = media_metadata.get('thumbnail', {}).get('url')
  557. season_number = int_or_none(self._search_regex(
  558. r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
  559. webpage, 'season number', default=None))
  560. info = self._search_json_ld(webpage, video_id, default={})
  561. return merge_dicts({
  562. 'id': video_id,
  563. 'title': video_title,
  564. 'description': video_description,
  565. 'duration': duration,
  566. 'thumbnail': thumbnail,
  567. 'uploader': video_uploader,
  568. 'series': series,
  569. 'season': season,
  570. 'season_number': season_number,
  571. 'episode': episode,
  572. 'episode_number': episode_number,
  573. 'subtitles': subtitles,
  574. 'formats': formats,
  575. }, info)
  576. class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
  577. IE_NAME = 'crunchyroll:playlist'
  578. _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
  579. _TESTS = [{
  580. 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
  581. 'info_dict': {
  582. 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
  583. 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
  584. },
  585. 'playlist_count': 13,
  586. }, {
  587. # geo-restricted (US), 18+ maturity wall, non-premium available
  588. 'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
  589. 'info_dict': {
  590. 'id': 'cosplay-complex-ova',
  591. 'title': 'Cosplay Complex OVA'
  592. },
  593. 'playlist_count': 3,
  594. 'skip': 'Georestricted',
  595. }, {
  596. # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
  597. 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
  598. 'only_matching': True,
  599. }]
  600. def _real_extract(self, url):
  601. show_id = self._match_id(url)
  602. webpage = self._download_webpage(
  603. self._add_skip_wall(url), show_id,
  604. headers=self.geo_verification_headers())
  605. title = self._html_search_meta('name', webpage, default=None)
  606. episode_paths = re.findall(
  607. r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
  608. webpage)
  609. entries = [
  610. self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
  611. for ep_id, ep in episode_paths
  612. ]
  613. entries.reverse()
  614. return {
  615. '_type': 'playlist',
  616. 'id': show_id,
  617. 'title': title,
  618. 'entries': entries,
  619. }