logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

iqiyi.py (31008B)


  1. import hashlib
  2. import itertools
  3. import re
  4. import time
  5. import urllib.parse
  6. from .common import InfoExtractor
  7. from .openload import PhantomJSwrapper
  8. from ..utils import (
  9. ExtractorError,
  10. clean_html,
  11. decode_packed_codes,
  12. float_or_none,
  13. format_field,
  14. get_element_by_attribute,
  15. get_element_by_id,
  16. int_or_none,
  17. js_to_json,
  18. ohdave_rsa_encrypt,
  19. parse_age_limit,
  20. parse_duration,
  21. parse_iso8601,
  22. parse_resolution,
  23. qualities,
  24. remove_start,
  25. str_or_none,
  26. traverse_obj,
  27. urljoin,
  28. )
  29. def md5_text(text):
  30. return hashlib.md5(text.encode()).hexdigest()
  31. class IqiyiSDK:
  32. def __init__(self, target, ip, timestamp):
  33. self.target = target
  34. self.ip = ip
  35. self.timestamp = timestamp
  36. @staticmethod
  37. def split_sum(data):
  38. return str(sum(int(p, 16) for p in data))
  39. @staticmethod
  40. def digit_sum(num):
  41. if isinstance(num, int):
  42. num = str(num)
  43. return str(sum(map(int, num)))
  44. def even_odd(self):
  45. even = self.digit_sum(str(self.timestamp)[::2])
  46. odd = self.digit_sum(str(self.timestamp)[1::2])
  47. return even, odd
  48. def preprocess(self, chunksize):
  49. self.target = md5_text(self.target)
  50. chunks = []
  51. for i in range(32 // chunksize):
  52. chunks.append(self.target[chunksize * i:chunksize * (i + 1)])
  53. if 32 % chunksize:
  54. chunks.append(self.target[32 - 32 % chunksize:])
  55. return chunks, list(map(int, self.ip.split('.')))
  56. def mod(self, modulus):
  57. chunks, ip = self.preprocess(32)
  58. self.target = chunks[0] + ''.join(str(p % modulus) for p in ip)
  59. def split(self, chunksize):
  60. modulus_map = {
  61. 4: 256,
  62. 5: 10,
  63. 8: 100,
  64. }
  65. chunks, ip = self.preprocess(chunksize)
  66. ret = ''
  67. for i in range(len(chunks)):
  68. ip_part = str(ip[i] % modulus_map[chunksize]) if i < 4 else ''
  69. if chunksize == 8:
  70. ret += ip_part + chunks[i]
  71. else:
  72. ret += chunks[i] + ip_part
  73. self.target = ret
  74. def handle_input16(self):
  75. self.target = md5_text(self.target)
  76. self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:])
  77. def handle_input8(self):
  78. self.target = md5_text(self.target)
  79. ret = ''
  80. for i in range(4):
  81. part = self.target[8 * i:8 * (i + 1)]
  82. ret += self.split_sum(part) + part
  83. self.target = ret
  84. def handleSum(self):
  85. self.target = md5_text(self.target)
  86. self.target = self.split_sum(self.target) + self.target
  87. def date(self, scheme):
  88. self.target = md5_text(self.target)
  89. d = time.localtime(self.timestamp)
  90. strings = {
  91. 'y': str(d.tm_year),
  92. 'm': '%02d' % d.tm_mon,
  93. 'd': '%02d' % d.tm_mday,
  94. }
  95. self.target += ''.join(strings[c] for c in scheme)
  96. def split_time_even_odd(self):
  97. even, odd = self.even_odd()
  98. self.target = odd + md5_text(self.target) + even
  99. def split_time_odd_even(self):
  100. even, odd = self.even_odd()
  101. self.target = even + md5_text(self.target) + odd
  102. def split_ip_time_sum(self):
  103. chunks, ip = self.preprocess(32)
  104. self.target = str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp)
  105. def split_time_ip_sum(self):
  106. chunks, ip = self.preprocess(32)
  107. self.target = self.digit_sum(self.timestamp) + chunks[0] + str(sum(ip))
  108. class IqiyiSDKInterpreter:
  109. def __init__(self, sdk_code):
  110. self.sdk_code = sdk_code
  111. def run(self, target, ip, timestamp):
  112. self.sdk_code = decode_packed_codes(self.sdk_code)
  113. functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
  114. sdk = IqiyiSDK(target, ip, timestamp)
  115. other_functions = {
  116. 'handleSum': sdk.handleSum,
  117. 'handleInput8': sdk.handle_input8,
  118. 'handleInput16': sdk.handle_input16,
  119. 'splitTimeEvenOdd': sdk.split_time_even_odd,
  120. 'splitTimeOddEven': sdk.split_time_odd_even,
  121. 'splitIpTimeSum': sdk.split_ip_time_sum,
  122. 'splitTimeIpSum': sdk.split_time_ip_sum,
  123. }
  124. for function in functions:
  125. if re.match(r'mod\d+', function):
  126. sdk.mod(int(function[3:]))
  127. elif re.match(r'date[ymd]{3}', function):
  128. sdk.date(function[4:])
  129. elif re.match(r'split\d+', function):
  130. sdk.split(int(function[5:]))
  131. elif function in other_functions:
  132. other_functions[function]()
  133. else:
  134. raise ExtractorError(f'Unknown function {function}')
  135. return sdk.target
  136. class IqiyiIE(InfoExtractor):
  137. IE_NAME = 'iqiyi'
  138. IE_DESC = '爱奇艺'
  139. _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
  140. _NETRC_MACHINE = 'iqiyi'
  141. _TESTS = [{
  142. 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  143. # MD5 checksum differs on my machine and Travis CI
  144. 'info_dict': {
  145. 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  146. 'ext': 'mp4',
  147. 'title': '美国德州空中惊现奇异云团 酷似UFO',
  148. },
  149. }, {
  150. 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
  151. 'md5': 'b7dc800a4004b1b57749d9abae0472da',
  152. 'info_dict': {
  153. 'id': 'e3f585b550a280af23c98b6cb2be19fb',
  154. 'ext': 'mp4',
  155. # This can be either Simplified Chinese or Traditional Chinese
  156. 'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
  157. },
  158. 'skip': 'Geo-restricted to China',
  159. }, {
  160. 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
  161. 'only_matching': True,
  162. }, {
  163. 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
  164. 'only_matching': True,
  165. }, {
  166. 'url': 'http://yule.iqiyi.com/pcb.html',
  167. 'info_dict': {
  168. 'id': '4a0af228fddb55ec96398a364248ed7f',
  169. 'ext': 'mp4',
  170. 'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
  171. },
  172. }, {
  173. # VIP-only video. The first 2 parts (6 minutes) are available without login
  174. # MD5 sums omitted as values are different on Travis CI and my machine
  175. 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
  176. 'info_dict': {
  177. 'id': 'f3cf468b39dddb30d676f89a91200dc1',
  178. 'ext': 'mp4',
  179. 'title': '泰坦尼克号',
  180. },
  181. 'skip': 'Geo-restricted to China',
  182. }, {
  183. 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
  184. 'info_dict': {
  185. 'id': '202918101',
  186. 'title': '灌篮高手 国语版',
  187. },
  188. 'playlist_count': 101,
  189. }, {
  190. 'url': 'http://www.pps.tv/w_19rrbav0ph.html',
  191. 'only_matching': True,
  192. }]
  193. _FORMATS_MAP = {
  194. '96': 1, # 216p, 240p
  195. '1': 2, # 336p, 360p
  196. '2': 3, # 480p, 504p
  197. '21': 4, # 504p
  198. '4': 5, # 720p
  199. '17': 5, # 720p
  200. '5': 6, # 1072p, 1080p
  201. '18': 7, # 1080p
  202. }
  203. @staticmethod
  204. def _rsa_fun(data):
  205. # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
  206. N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
  207. e = 65537
  208. return ohdave_rsa_encrypt(data, e, N)
  209. def _perform_login(self, username, password):
  210. data = self._download_json(
  211. 'http://kylin.iqiyi.com/get_token', None,
  212. note='Get token for logging', errnote='Unable to get token for logging')
  213. sdk = data['sdk']
  214. timestamp = int(time.time())
  215. target = (
  216. f'/apis/reglogin/login.action?lang=zh_TW&area_code=null&email={username}'
  217. f'&passwd={self._rsa_fun(password.encode())}&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1')
  218. interp = IqiyiSDKInterpreter(sdk)
  219. sign = interp.run(target, data['ip'], timestamp)
  220. validation_params = {
  221. 'target': target,
  222. 'server': 'BEA3AA1908656AABCCFF76582C4C6660',
  223. 'token': data['token'],
  224. 'bird_src': 'f8d91d57af224da7893dd397d52d811a',
  225. 'sign': sign,
  226. 'bird_t': timestamp,
  227. }
  228. validation_result = self._download_json(
  229. 'http://kylin.iqiyi.com/validate?' + urllib.parse.urlencode(validation_params), None,
  230. note='Validate credentials', errnote='Unable to validate credentials')
  231. MSG_MAP = {
  232. 'P00107': 'please login via the web interface and enter the CAPTCHA code',
  233. 'P00117': 'bad username or password',
  234. }
  235. code = validation_result['code']
  236. if code != 'A00000':
  237. msg = MSG_MAP.get(code)
  238. if not msg:
  239. msg = f'error {code}'
  240. if validation_result.get('msg'):
  241. msg += ': ' + validation_result['msg']
  242. self.report_warning('unable to log in: ' + msg)
  243. return False
  244. return True
  245. def get_raw_data(self, tvid, video_id):
  246. tm = int(time.time() * 1000)
  247. key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
  248. sc = md5_text(str(tm) + key + tvid)
  249. params = {
  250. 'tvid': tvid,
  251. 'vid': video_id,
  252. 'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
  253. 'sc': sc,
  254. 't': tm,
  255. }
  256. return self._download_json(
  257. f'http://cache.m.iqiyi.com/jp/tmts/{tvid}/{video_id}/',
  258. video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
  259. query=params, headers=self.geo_verification_headers())
  260. def _extract_playlist(self, webpage):
  261. PAGE_SIZE = 50
  262. links = re.findall(
  263. r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
  264. webpage)
  265. if not links:
  266. return
  267. album_id = self._search_regex(
  268. r'albumId\s*:\s*(\d+),', webpage, 'album ID')
  269. album_title = self._search_regex(
  270. r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)
  271. entries = list(map(self.url_result, links))
  272. # Start from 2 because links in the first page are already on webpage
  273. for page_num in itertools.count(2):
  274. pagelist_page = self._download_webpage(
  275. f'http://cache.video.qiyi.com/jp/avlist/{album_id}/{page_num}/{PAGE_SIZE}/',
  276. album_id,
  277. note=f'Download playlist page {page_num}',
  278. errnote=f'Failed to download playlist page {page_num}')
  279. pagelist = self._parse_json(
  280. remove_start(pagelist_page, 'var tvInfoJs='), album_id)
  281. vlist = pagelist['data']['vlist']
  282. for item in vlist:
  283. entries.append(self.url_result(item['vurl']))
  284. if len(vlist) < PAGE_SIZE:
  285. break
  286. return self.playlist_result(entries, album_id, album_title)
  287. def _real_extract(self, url):
  288. webpage = self._download_webpage(
  289. url, 'temp_id', note='download video page')
  290. # There's no simple way to determine whether an URL is a playlist or not
  291. # Sometimes there are playlist links in individual videos, so treat it
  292. # as a single video first
  293. tvid = self._search_regex(
  294. r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
  295. if tvid is None:
  296. playlist_result = self._extract_playlist(webpage)
  297. if playlist_result:
  298. return playlist_result
  299. raise ExtractorError('Can\'t find any video')
  300. video_id = self._search_regex(
  301. r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
  302. formats = []
  303. for _ in range(5):
  304. raw_data = self.get_raw_data(tvid, video_id)
  305. if raw_data['code'] != 'A00000':
  306. if raw_data['code'] == 'A00111':
  307. self.raise_geo_restricted()
  308. raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
  309. data = raw_data['data']
  310. for stream in data['vidl']:
  311. if 'm3utx' not in stream:
  312. continue
  313. vd = str(stream['vd'])
  314. formats.append({
  315. 'url': stream['m3utx'],
  316. 'format_id': vd,
  317. 'ext': 'mp4',
  318. 'quality': self._FORMATS_MAP.get(vd, -1),
  319. 'protocol': 'm3u8_native',
  320. })
  321. if formats:
  322. break
  323. self._sleep(5, video_id)
  324. title = (get_element_by_id('widget-videotitle', webpage)
  325. or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
  326. or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
  327. return {
  328. 'id': video_id,
  329. 'title': title,
  330. 'formats': formats,
  331. }
  332. class IqIE(InfoExtractor):
  333. IE_NAME = 'iq.com'
  334. IE_DESC = 'International version of iQiyi'
  335. _VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P<id>\w+)'
  336. _TESTS = [{
  337. 'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4',
  338. 'md5': '2d7caf6eeca8a32b407094b33b757d39',
  339. 'info_dict': {
  340. 'ext': 'mp4',
  341. 'id': '1ma1i6ferf4',
  342. 'title': '航海王 第1000集',
  343. 'description': 'Subtitle available on Sunday 4PM(GMT+8).',
  344. 'duration': 1430,
  345. 'timestamp': 1637488203,
  346. 'upload_date': '20211121',
  347. 'episode_number': 1000,
  348. 'episode': 'Episode 1000',
  349. 'series': 'One Piece',
  350. 'age_limit': 13,
  351. 'average_rating': float,
  352. },
  353. 'params': {
  354. 'format': '500',
  355. },
  356. 'expected_warnings': ['format is restricted'],
  357. }, {
  358. # VIP-restricted video
  359. 'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
  360. 'only_matching': True,
  361. }]
  362. _BID_TAGS = {
  363. '100': '240P',
  364. '200': '360P',
  365. '300': '480P',
  366. '500': '720P',
  367. '600': '1080P',
  368. '610': '1080P50',
  369. '700': '2K',
  370. '800': '4K',
  371. }
  372. _LID_TAGS = {
  373. '1': 'zh_CN',
  374. '2': 'zh_TW',
  375. '3': 'en',
  376. '4': 'ko',
  377. '5': 'ja',
  378. '18': 'th',
  379. '21': 'my',
  380. '23': 'vi',
  381. '24': 'id',
  382. '26': 'es',
  383. '27': 'pt',
  384. '28': 'ar',
  385. }
  386. _DASH_JS = '''
  387. console.log(page.evaluate(function() {
  388. var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
  389. var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
  390. var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
  391. var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x;
  392. var authKey = cmd5x(cmd5x('') + tm + '' + tvid);
  393. var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join('');
  394. var dash_paths = {};
  395. bid_list.forEach(function(bid) {
  396. var query = {
  397. 'tvid': tvid,
  398. 'bid': bid,
  399. 'ds': 1,
  400. 'vid': vid,
  401. 'src': src,
  402. 'vt': 0,
  403. 'rs': 1,
  404. 'uid': uid,
  405. 'ori': 'pcw',
  406. 'ps': 1,
  407. 'k_uid': k_uid,
  408. 'pt': 0,
  409. 'd': 0,
  410. 's': '',
  411. 'lid': '',
  412. 'slid': 0,
  413. 'cf': '',
  414. 'ct': '',
  415. 'authKey': authKey,
  416. 'k_tag': 1,
  417. 'ost': 0,
  418. 'ppt': 0,
  419. 'dfp': dfp,
  420. 'prio': JSON.stringify({
  421. 'ff': 'f4v',
  422. 'code': 2
  423. }),
  424. 'k_err_retries': 0,
  425. 'up': '',
  426. 'su': 2,
  427. 'applang': lang,
  428. 'sver': 2,
  429. 'X-USER-MODE': mode,
  430. 'qd_v': 2,
  431. 'tm': tm,
  432. 'qdy': 'a',
  433. 'qds': 0,
  434. 'k_ft1': '143486267424900',
  435. 'k_ft4': '1572868',
  436. 'k_ft7': '4',
  437. 'k_ft5': '1',
  438. 'bop': JSON.stringify({
  439. 'version': '10.0',
  440. 'dfp': dfp
  441. }),
  442. };
  443. var enc_params = [];
  444. for (var prop in query) {
  445. enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop]));
  446. }
  447. ut_list.forEach(function(ut) {
  448. enc_params.push('ut=' + ut);
  449. })
  450. var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
  451. dash_paths[bid] = dash_path;
  452. });
  453. return JSON.stringify(dash_paths);
  454. }));
  455. saveAndExit();
  456. '''
  457. def _extract_vms_player_js(self, webpage, video_id):
  458. player_js_cache = self.cache.load('iq', 'player_js')
  459. if player_js_cache:
  460. return player_js_cache
  461. webpack_js_url = self._proto_relative_url(self._search_regex(
  462. r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
  463. webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
  464. webpack_map = self._search_json(
  465. r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
  466. contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
  467. end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
  468. replacement_map = self._search_json(
  469. r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
  470. contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
  471. end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
  472. fatal=False) or {}
  473. for module_index in reversed(webpack_map):
  474. real_module = replacement_map.get(module_index) or module_index
  475. module_js = self._download_webpage(
  476. f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
  477. video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
  478. if 'vms request' in module_js:
  479. self.cache.store('iq', 'player_js', module_js)
  480. return module_js
  481. raise ExtractorError('Unable to extract player JS')
  482. def _extract_cmd5x_function(self, webpage, video_id):
  483. return self._search_regex(r',\s*(function\s*\([^\)]*\)\s*{\s*var _qda.+_qdc\(\)\s*})\s*,',
  484. self._extract_vms_player_js(webpage, video_id), 'signature function')
  485. def _update_bid_tags(self, webpage, video_id):
  486. extracted_bid_tags = self._search_json(
  487. r'function\s*\([^)]*\)\s*\{\s*"use strict";?\s*var \w\s*=\s*',
  488. self._extract_vms_player_js(webpage, video_id), 'video tags', video_id,
  489. contains_pattern=r'{\s*\d+\s*:\s*\{\s*nbid\s*:.+}\s*}',
  490. end_pattern=r'\s*,\s*\w\s*=\s*\{\s*getNewVd', fatal=False, transform_source=js_to_json)
  491. if not extracted_bid_tags:
  492. return
  493. self._BID_TAGS = {
  494. bid: traverse_obj(extracted_bid_tags, (bid, 'value'), expected_type=str, default=self._BID_TAGS.get(bid))
  495. for bid in extracted_bid_tags
  496. }
  497. def _get_cookie(self, name, default=None):
  498. cookie = self._get_cookies('https://iq.com/').get(name)
  499. return cookie.value if cookie else default
  500. def _real_extract(self, url):
  501. video_id = self._match_id(url)
  502. webpage = self._download_webpage(url, video_id)
  503. self._update_bid_tags(webpage, video_id)
  504. next_props = self._search_nextjs_data(webpage, video_id)['props']
  505. page_data = next_props['initialState']['play']
  506. video_info = page_data['curVideoInfo']
  507. uid = traverse_obj(
  508. self._parse_json(
  509. self._get_cookie('I00002', '{}'), video_id, transform_source=urllib.parse.unquote, fatal=False),
  510. ('data', 'uid'), default=0)
  511. if uid:
  512. vip_data = self._download_json(
  513. 'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={
  514. 'batch': 1,
  515. 'platformId': 3,
  516. 'modeCode': self._get_cookie('mod', 'intl'),
  517. 'langCode': self._get_cookie('lang', 'en_us'),
  518. 'deviceId': self._get_cookie('QC005', ''),
  519. }, fatal=False)
  520. ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none)
  521. else:
  522. ut_list = ['0']
  523. # bid 0 as an initial format checker
  524. dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get(
  525. url, note2='Executing signature code (this may take a couple minutes)',
  526. html='<!DOCTYPE html>', video_id=video_id, jscode=self._DASH_JS % {
  527. 'tvid': video_info['tvId'],
  528. 'vid': video_info['vid'],
  529. 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
  530. expected_type=str, default='04022001010011000000'),
  531. 'uid': uid,
  532. 'dfp': self._get_cookie('dfp', ''),
  533. 'mode': self._get_cookie('mod', 'intl'),
  534. 'lang': self._get_cookie('lang', 'en_us'),
  535. 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
  536. 'ut_list': '[' + ','.join(ut_list) + ']',
  537. 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
  538. })[1].strip(), video_id)
  539. formats, subtitles = [], {}
  540. initial_format_data = self._download_json(
  541. urljoin('https://cache-video.iq.com', dash_paths['0']), video_id,
  542. note='Downloading initial video format info', errnote='Unable to download initial video format info')['data']
  543. preview_time = traverse_obj(
  544. initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
  545. if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
  546. self.report_warning('This preview video is limited{}'.format(format_field(preview_time, None, ' to %s seconds')))
  547. # TODO: Extract audio-only formats
  548. for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none)):
  549. dash_path = dash_paths.get(bid)
  550. if not dash_path:
  551. self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted')
  552. continue
  553. format_data = traverse_obj(self._download_json(
  554. urljoin('https://cache-video.iq.com', dash_path), video_id,
  555. note=f'Downloading format data for {self._BID_TAGS[bid]}', errnote='Unable to download format data',
  556. fatal=False), 'data', expected_type=dict)
  557. video_format = traverse_obj(format_data, ('program', 'video', lambda _, v: str(v['bid']) == bid),
  558. expected_type=dict, get_all=False) or {}
  559. extracted_formats = []
  560. if video_format.get('m3u8Url'):
  561. extracted_formats.extend(self._extract_m3u8_formats(
  562. urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['m3u8Url']),
  563. 'mp4', m3u8_id=bid, fatal=False))
  564. if video_format.get('mpdUrl'):
  565. # TODO: Properly extract mpd hostname
  566. extracted_formats.extend(self._extract_mpd_formats(
  567. urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['mpdUrl']),
  568. mpd_id=bid, fatal=False))
  569. if video_format.get('m3u8'):
  570. ff = video_format.get('ff', 'ts')
  571. if ff == 'ts':
  572. m3u8_formats, _ = self._parse_m3u8_formats_and_subtitles(
  573. video_format['m3u8'], ext='mp4', m3u8_id=bid, fatal=False)
  574. extracted_formats.extend(m3u8_formats)
  575. elif ff == 'm4s':
  576. mpd_data = traverse_obj(
  577. self._parse_json(video_format['m3u8'], video_id, fatal=False), ('payload', ..., 'data'), expected_type=str)
  578. if not mpd_data:
  579. continue
  580. mpd_formats, _ = self._parse_mpd_formats_and_subtitles(
  581. mpd_data, bid, format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'))
  582. extracted_formats.extend(mpd_formats)
  583. else:
  584. self.report_warning(f'{ff} formats are currently not supported')
  585. if not extracted_formats:
  586. if video_format.get('s'):
  587. self.report_warning(f'{self._BID_TAGS[bid]} format is restricted')
  588. else:
  589. self.report_warning(f'Unable to extract {self._BID_TAGS[bid]} format')
  590. for f in extracted_formats:
  591. f.update({
  592. 'quality': qualities(list(self._BID_TAGS.keys()))(bid),
  593. 'format_note': self._BID_TAGS[bid],
  594. **parse_resolution(video_format.get('scrsz')),
  595. })
  596. formats.extend(extracted_formats)
  597. for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict):
  598. lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
  599. subtitles.setdefault(lang, []).extend([{
  600. 'ext': format_ext,
  601. 'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key]),
  602. } for format_key, format_ext in [('srt', 'srt'), ('webvtt', 'vtt')] if sub_format.get(format_key)])
  603. extra_metadata = page_data.get('albumInfo') if video_info.get('albumId') and page_data.get('albumInfo') else video_info
  604. return {
  605. 'id': video_id,
  606. 'title': video_info['name'],
  607. 'formats': formats,
  608. 'subtitles': subtitles,
  609. 'description': video_info.get('mergeDesc'),
  610. 'duration': parse_duration(video_info.get('len')),
  611. 'age_limit': parse_age_limit(video_info.get('rating')),
  612. 'average_rating': traverse_obj(page_data, ('playScoreInfo', 'score'), expected_type=float_or_none),
  613. 'timestamp': parse_iso8601(video_info.get('isoUploadDate')),
  614. 'categories': traverse_obj(extra_metadata, ('videoTagMap', ..., ..., 'name'), expected_type=str),
  615. 'cast': traverse_obj(extra_metadata, ('actorArr', ..., 'name'), expected_type=str),
  616. 'episode_number': int_or_none(video_info.get('order')) or None,
  617. 'series': video_info.get('albumName'),
  618. }
  619. class IqAlbumIE(InfoExtractor):
  620. IE_NAME = 'iq.com:album'
  621. _VALID_URL = r'https?://(?:www\.)?iq\.com/album/(?:[\w%-]*-)?(?P<id>\w+)'
  622. _TESTS = [{
  623. 'url': 'https://www.iq.com/album/one-piece-1999-1bk9icvr331',
  624. 'info_dict': {
  625. 'id': '1bk9icvr331',
  626. 'title': 'One Piece',
  627. 'description': 'Subtitle available on Sunday 4PM(GMT+8).',
  628. },
  629. 'playlist_mincount': 238,
  630. }, {
  631. # Movie/single video
  632. 'url': 'https://www.iq.com/album/九龙城寨-2021-22yjnij099k',
  633. 'info_dict': {
  634. 'ext': 'mp4',
  635. 'id': '22yjnij099k',
  636. 'title': '九龙城寨',
  637. 'description': 'md5:8a09f50b8ba0db4dc69bc7c844228044',
  638. 'duration': 5000,
  639. 'timestamp': 1641911371,
  640. 'upload_date': '20220111',
  641. 'series': '九龙城寨',
  642. 'cast': ['Shi Yan Neng', 'Yu Lang', 'Peter lv', 'Sun Zi Jun', 'Yang Xiao Bo'],
  643. 'age_limit': 13,
  644. 'average_rating': float,
  645. },
  646. 'expected_warnings': ['format is restricted'],
  647. }]
  648. def _entries(self, album_id_num, page_ranges, album_id=None, mode_code='intl', lang_code='en_us'):
  649. for page_range in page_ranges:
  650. page = self._download_json(
  651. f'https://pcw-api.iq.com/api/episodeListSource/{album_id_num}', album_id,
  652. note=f'Downloading video list episodes {page_range.get("msg", "")}',
  653. errnote='Unable to download video list', query={
  654. 'platformId': 3,
  655. 'modeCode': mode_code,
  656. 'langCode': lang_code,
  657. 'endOrder': page_range['to'],
  658. 'startOrder': page_range['from'],
  659. })
  660. for video in page['data']['epg']:
  661. yield self.url_result('https://www.iq.com/play/%s' % (video.get('playLocSuffix') or video['qipuIdStr']),
  662. IqIE.ie_key(), video.get('qipuIdStr'), video.get('name'))
  663. def _real_extract(self, url):
  664. album_id = self._match_id(url)
  665. webpage = self._download_webpage(url, album_id)
  666. next_data = self._search_nextjs_data(webpage, album_id)
  667. album_data = next_data['props']['initialState']['album']['videoAlbumInfo']
  668. if album_data.get('videoType') == 'singleVideo':
  669. return self.url_result(f'https://www.iq.com/play/{album_id}', IqIE.ie_key())
  670. return self.playlist_result(
  671. self._entries(album_data['albumId'], album_data['totalPageRange'], album_id,
  672. traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'modeCode')),
  673. traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'langCode'))),
  674. album_id, album_data.get('name'), album_data.get('desc'))