logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

vk.py (35929B)


  1. import collections
  2. import hashlib
  3. import re
  4. from .common import InfoExtractor
  5. from .dailymotion import DailymotionIE
  6. from .odnoklassniki import OdnoklassnikiIE
  7. from .pladform import PladformIE
  8. from .sibnet import SibnetEmbedIE
  9. from .vimeo import VimeoIE
  10. from .youtube import YoutubeIE
  11. from ..utils import (
  12. ExtractorError,
  13. UserNotLive,
  14. clean_html,
  15. get_element_by_class,
  16. get_element_html_by_id,
  17. int_or_none,
  18. join_nonempty,
  19. parse_qs,
  20. parse_resolution,
  21. str_or_none,
  22. str_to_int,
  23. try_call,
  24. unescapeHTML,
  25. unified_timestamp,
  26. update_url_query,
  27. url_or_none,
  28. urlencode_postdata,
  29. urljoin,
  30. )
  31. from ..utils.traversal import require, traverse_obj
  32. class VKBaseIE(InfoExtractor):
  33. _NETRC_MACHINE = 'vk'
  34. def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
  35. response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
  36. challenge_url, cookie = response[1].url if response else '', None
  37. if challenge_url.startswith('https://vk.com/429.html?'):
  38. cookie = self._get_cookies(challenge_url).get('hash429')
  39. if not cookie:
  40. return response
  41. hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
  42. self._request_webpage(
  43. update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
  44. note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
  45. return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
  46. def _perform_login(self, username, password):
  47. login_page, url_handle = self._download_webpage_handle(
  48. 'https://vk.com', None, 'Downloading login page')
  49. login_form = self._hidden_inputs(login_page)
  50. login_form.update({
  51. 'email': username.encode('cp1251'),
  52. 'pass': password.encode('cp1251'),
  53. })
  54. # vk serves two same remixlhk cookies in Set-Cookie header and expects
  55. # first one to be actually set
  56. self._apply_first_set_cookie_header(url_handle, 'remixlhk')
  57. login_page = self._download_webpage(
  58. 'https://vk.com/login', None,
  59. note='Logging in',
  60. data=urlencode_postdata(login_form))
  61. if re.search(r'onLoginFailed', login_page):
  62. raise ExtractorError(
  63. 'Unable to login, incorrect username and/or password', expected=True)
  64. def _download_payload(self, path, video_id, data, fatal=True):
  65. endpoint = f'https://vk.com/{path}.php'
  66. data['al'] = 1
  67. code, payload = self._download_json(
  68. endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
  69. headers={
  70. 'Referer': endpoint,
  71. 'X-Requested-With': 'XMLHttpRequest',
  72. })['payload']
  73. if code == '3':
  74. self.raise_login_required()
  75. elif code == '8':
  76. raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
  77. return payload
  78. class VKIE(VKBaseIE):
  79. IE_NAME = 'vk'
  80. IE_DESC = 'VK'
  81. _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk(?:(?:video)?\.ru|\.com)/video_ext\.php.+?)\1']
  82. _VALID_URL = r'''(?x)
  83. https?://
  84. (?:
  85. (?:
  86. (?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/video_|
  87. (?:www\.)?daxab\.com/
  88. )
  89. ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
  90. (?:
  91. (?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/(?:.+?\?.*?z=)?(?:video|clip)|
  92. (?:www\.)?daxab\.com/embed/
  93. )
  94. (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
  95. )
  96. '''
  97. _TESTS = [
  98. {
  99. 'url': 'https://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
  100. 'info_dict': {
  101. 'id': '-77521_162222515',
  102. 'ext': 'mp4',
  103. 'title': 'ProtivoGunz - Хуёвая песня',
  104. 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
  105. 'uploader_id': '39545378',
  106. 'duration': 195,
  107. 'timestamp': 1329049880,
  108. 'upload_date': '20120212',
  109. 'comment_count': int,
  110. 'like_count': int,
  111. 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
  112. },
  113. 'params': {'skip_download': 'm3u8'},
  114. },
  115. {
  116. 'url': 'https://vk.com/video205387401_165548505',
  117. 'info_dict': {
  118. 'id': '205387401_165548505',
  119. 'ext': 'mp4',
  120. 'title': 'No name',
  121. 'uploader': 'Tom Cruise',
  122. 'uploader_id': '205387401',
  123. 'duration': 9,
  124. 'timestamp': 1374364108,
  125. 'upload_date': '20130720',
  126. 'comment_count': int,
  127. 'like_count': int,
  128. 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
  129. },
  130. },
  131. {
  132. 'note': 'Embedded video',
  133. 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
  134. 'info_dict': {
  135. 'id': '-77521_162222515',
  136. 'ext': 'mp4',
  137. 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
  138. 'title': 'ProtivoGunz - Хуёвая песня',
  139. 'duration': 195,
  140. 'upload_date': '20120212',
  141. 'timestamp': 1329049880,
  142. 'uploader_id': '39545378',
  143. 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
  144. },
  145. 'params': {'skip_download': 'm3u8'},
  146. },
  147. {
  148. 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
  149. 'info_dict': {
  150. 'id': '-93049196_456239755',
  151. 'ext': 'mp4',
  152. 'title': '8 серия (озвучка)',
  153. 'duration': 8383,
  154. 'comment_count': int,
  155. 'uploader': 'Dizi2021',
  156. 'like_count': int,
  157. 'timestamp': 1640162189,
  158. 'upload_date': '20211222',
  159. 'uploader_id': '-93049196',
  160. 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
  161. },
  162. },
  163. {
  164. 'note': 'youtube embed',
  165. 'url': 'https://vk.com/video276849682_170681728',
  166. 'info_dict': {
  167. 'id': 'V3K4mi0SYkc',
  168. 'ext': 'mp4',
  169. 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
  170. 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
  171. 'duration': 179,
  172. 'upload_date': '20130117',
  173. 'uploader': "Children's Joy Foundation Inc.",
  174. 'uploader_id': '@CJFIofficial',
  175. 'view_count': int,
  176. 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
  177. 'availability': 'public',
  178. 'like_count': int,
  179. 'live_status': 'not_live',
  180. 'playable_in_embed': True,
  181. 'channel': 'Children\'s Joy Foundation Inc.',
  182. 'uploader_url': 'https://www.youtube.com/@CJFIofficial',
  183. 'thumbnail': r're:https?://.+\.jpg$',
  184. 'tags': 'count:27',
  185. 'start_time': 0.0,
  186. 'categories': ['Nonprofits & Activism'],
  187. 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
  188. 'channel_follower_count': int,
  189. 'age_limit': 0,
  190. 'timestamp': 1358394935,
  191. },
  192. },
  193. {
  194. 'note': 'dailymotion embed',
  195. 'url': 'https://vk.com/video-95168827_456239103?list=cca524a0f0d5557e16',
  196. 'info_dict': {
  197. 'id': 'x8gfli0',
  198. 'ext': 'mp4',
  199. 'title': 'md5:45410f60ccd4b2760da98cb5fc777d70',
  200. 'description': 'md5:2e71c5c9413735cfa06cf1a166f16c84',
  201. 'uploader': 'Movies and cinema.',
  202. 'upload_date': '20221218',
  203. 'uploader_id': 'x1jdavv',
  204. 'timestamp': 1671387617,
  205. 'age_limit': 0,
  206. 'duration': 2918,
  207. 'like_count': int,
  208. 'view_count': int,
  209. 'thumbnail': r're:https?://.+x1080$',
  210. 'tags': list,
  211. },
  212. 'skip': 'This video has been deleted and is no longer available.',
  213. },
  214. {
  215. 'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
  216. 'info_dict': {
  217. 'id': '-74006511_456247211',
  218. 'ext': 'mp4',
  219. 'comment_count': int,
  220. 'duration': 9,
  221. 'like_count': int,
  222. 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
  223. 'timestamp': 1664995597,
  224. 'title': 'Clip by @madempress',
  225. 'upload_date': '20221005',
  226. 'uploader': 'Шальная Императрица',
  227. 'uploader_id': '-74006511',
  228. },
  229. },
  230. {
  231. # video key is extra_data not url\d+
  232. 'url': 'https://vk.com/video-110305615_171782105',
  233. 'md5': 'e13fcda136f99764872e739d13fac1d1',
  234. 'info_dict': {
  235. 'id': '-110305615_171782105',
  236. 'ext': 'mp4',
  237. 'title': 'S-Dance, репетиции к The way show',
  238. 'uploader': 'THE WAY SHOW | 17 апреля',
  239. 'uploader_id': '-110305615',
  240. 'timestamp': 1454859345,
  241. 'upload_date': '20160207',
  242. },
  243. 'skip': 'Removed',
  244. },
  245. {
  246. 'note': 'finished live stream, postlive_mp4',
  247. 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
  248. 'info_dict': {
  249. 'id': '-387766_456242764',
  250. 'ext': 'mp4',
  251. 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
  252. 'uploader': 'Игромания',
  253. 'duration': 5239,
  254. 'upload_date': '20160929',
  255. 'uploader_id': '-387766',
  256. 'timestamp': 1475137527,
  257. 'thumbnail': r're:https?://.+\.jpg$',
  258. 'comment_count': int,
  259. 'like_count': int,
  260. },
  261. 'params': {
  262. 'skip_download': True,
  263. },
  264. 'skip': 'No formats found',
  265. },
  266. {
  267. # live stream, hls and rtmp links, most likely already finished live
  268. # stream by the time you are reading this comment
  269. 'url': 'https://vk.com/video-140332_456239111',
  270. 'only_matching': True,
  271. },
  272. {
  273. # removed video, just testing that we match the pattern
  274. 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
  275. 'only_matching': True,
  276. },
  277. {
  278. # age restricted video, requires vk account credentials
  279. 'url': 'https://vk.com/video205387401_164765225',
  280. 'only_matching': True,
  281. },
  282. {
  283. # pladform embed
  284. 'url': 'https://vk.com/video-76116461_171554880',
  285. 'only_matching': True,
  286. },
  287. {
  288. 'url': 'http://new.vk.com/video205387401_165548505',
  289. 'only_matching': True,
  290. },
  291. {
  292. # This video is no longer available, because its author has been blocked.
  293. 'url': 'https://vk.com/video-10639516_456240611',
  294. 'only_matching': True,
  295. },
  296. {
  297. # The video is not available in your region.
  298. 'url': 'https://vk.com/video-51812607_171445436',
  299. 'only_matching': True,
  300. },
  301. {
  302. 'url': 'https://vk.com/clip30014565_456240946',
  303. 'only_matching': True,
  304. },
  305. {
  306. 'url': 'https://vkvideo.ru/video-127553155_456242961',
  307. 'only_matching': True,
  308. },
  309. {
  310. 'url': 'https://vk.ru/video-220754053_456242564',
  311. 'only_matching': True,
  312. },
  313. ]
  314. def _real_extract(self, url):
  315. mobj = self._match_valid_url(url)
  316. video_id = mobj.group('videoid')
  317. mv_data = {}
  318. if video_id:
  319. data = {
  320. 'act': 'show',
  321. 'video': video_id,
  322. }
  323. # Some videos (removed?) can only be downloaded with list id specified
  324. list_id = mobj.group('list_id')
  325. if list_id:
  326. data['list'] = list_id
  327. payload = self._download_payload('al_video', video_id, data)
  328. info_page = payload[1]
  329. opts = payload[-1]
  330. mv_data = opts.get('mvData') or {}
  331. player = opts.get('player') or {}
  332. else:
  333. video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
  334. info_page = self._download_webpage(
  335. 'https://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
  336. error_message = self._html_search_regex(
  337. [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
  338. r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
  339. info_page, 'error message', default=None)
  340. if error_message:
  341. raise ExtractorError(error_message, expected=True)
  342. if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
  343. raise ExtractorError(
  344. 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
  345. expected=True)
  346. ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
  347. ERRORS = {
  348. r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
  349. ERROR_COPYRIGHT,
  350. r'>The video .*? was removed from public access by request of the copyright holder.<':
  351. ERROR_COPYRIGHT,
  352. r'<!>Please log in or <':
  353. 'Video %s is only available for registered users, '
  354. 'use --username and --password options to provide account credentials.',
  355. r'<!>Unknown error':
  356. 'Video %s does not exist.',
  357. r'<!>Видео временно недоступно':
  358. 'Video %s is temporarily unavailable.',
  359. r'<!>Access denied':
  360. 'Access denied to video %s.',
  361. r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
  362. 'Video %s is no longer available, because its author has been blocked.',
  363. r'<!>This video is no longer available, because its author has been blocked.':
  364. 'Video %s is no longer available, because its author has been blocked.',
  365. r'<!>This video is no longer available, because it has been deleted.':
  366. 'Video %s is no longer available, because it has been deleted.',
  367. r'<!>The video .+? is not available in your region.':
  368. 'Video %s is not available in your region.',
  369. }
  370. for error_re, error_msg in ERRORS.items():
  371. if re.search(error_re, info_page):
  372. raise ExtractorError(error_msg % video_id, expected=True)
  373. player = self._parse_json(self._search_regex(
  374. r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
  375. info_page, 'player params'), video_id)
  376. youtube_url = YoutubeIE._extract_url(info_page)
  377. if youtube_url:
  378. return self.url_result(youtube_url, YoutubeIE.ie_key())
  379. vimeo_url = VimeoIE._extract_url(url, info_page)
  380. if vimeo_url is not None:
  381. return self.url_result(vimeo_url, VimeoIE.ie_key())
  382. pladform_url = PladformIE._extract_url(info_page)
  383. if pladform_url:
  384. return self.url_result(pladform_url, PladformIE.ie_key())
  385. m_rutube = re.search(
  386. r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
  387. if m_rutube is not None:
  388. rutube_url = self._proto_relative_url(
  389. m_rutube.group(1).replace('\\', ''))
  390. return self.url_result(rutube_url)
  391. dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
  392. if dailymotion_url:
  393. return self.url_result(dailymotion_url, DailymotionIE.ie_key())
  394. odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
  395. if odnoklassniki_url:
  396. return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
  397. sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
  398. if sibnet_url:
  399. return self.url_result(sibnet_url)
  400. m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
  401. if m_opts:
  402. m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
  403. if m_opts_url:
  404. opts_url = m_opts_url.group(1)
  405. if opts_url.startswith('//'):
  406. opts_url = 'https:' + opts_url
  407. return self.url_result(opts_url)
  408. data = player['params'][0]
  409. title = unescapeHTML(data['md_title'])
  410. # 2 = live
  411. # 3 = post live (finished live)
  412. is_live = data.get('live') == 2
  413. timestamp = unified_timestamp(self._html_search_regex(
  414. r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
  415. 'upload date', default=None)) or int_or_none(data.get('date'))
  416. view_count = str_to_int(self._search_regex(
  417. r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
  418. info_page, 'view count', default=None))
  419. formats = []
  420. subtitles = {}
  421. for format_id, format_url in data.items():
  422. format_url = url_or_none(format_url)
  423. if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
  424. continue
  425. if (format_id.startswith(('url', 'cache'))
  426. or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
  427. height = int_or_none(self._search_regex(
  428. r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
  429. formats.append({
  430. 'format_id': format_id,
  431. 'url': format_url,
  432. 'ext': 'mp4',
  433. 'source_preference': 1,
  434. 'height': height,
  435. })
  436. elif format_id.startswith('hls') and format_id != 'hls_live_playback':
  437. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  438. format_url, video_id, 'mp4', 'm3u8_native',
  439. m3u8_id=format_id, fatal=False, live=is_live)
  440. formats.extend(fmts)
  441. self._merge_subtitles(subs, target=subtitles)
  442. elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'):
  443. fmts, subs = self._extract_mpd_formats_and_subtitles(
  444. format_url, video_id, mpd_id=format_id, fatal=False)
  445. formats.extend(fmts)
  446. self._merge_subtitles(subs, target=subtitles)
  447. elif format_id == 'rtmp':
  448. formats.append({
  449. 'format_id': format_id,
  450. 'url': format_url,
  451. 'ext': 'flv',
  452. })
  453. for sub in data.get('subs') or {}:
  454. subtitles.setdefault(sub.get('lang', 'en'), []).append({
  455. 'ext': sub.get('title', '.srt').split('.')[-1],
  456. 'url': url_or_none(sub.get('url')),
  457. })
  458. return {
  459. 'id': video_id,
  460. 'formats': formats,
  461. 'title': title,
  462. 'thumbnail': data.get('jpg'),
  463. 'uploader': data.get('md_author'),
  464. 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
  465. 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
  466. 'timestamp': timestamp,
  467. 'view_count': view_count,
  468. 'like_count': int_or_none(mv_data.get('likes')),
  469. 'comment_count': int_or_none(mv_data.get('commcount')),
  470. 'is_live': is_live,
  471. 'subtitles': subtitles,
  472. '_format_sort_fields': ('res', 'source'),
  473. }
  474. class VKUserVideosIE(VKBaseIE):
  475. IE_NAME = 'vk:uservideos'
  476. IE_DESC = "VK - User's Videos"
  477. _BASE_URL_RE = r'https?://(?:(?:m|new)\.)?vk(?:video\.ru|\.com/video)'
  478. _VALID_URL = [
  479. rf'{_BASE_URL_RE}/playlist/(?P<id>-?\d+_\d+)',
  480. rf'{_BASE_URL_RE}/(?P<id>@[^/?#]+)(?:/all)?/?(?!\?.*\bz=video)(?:[?#]|$)',
  481. ]
  482. _TESTS = [{
  483. 'url': 'https://vk.com/video/@mobidevices',
  484. 'info_dict': {
  485. 'id': '-17892518_all',
  486. },
  487. 'playlist_mincount': 1355,
  488. }, {
  489. 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
  490. 'info_dict': {
  491. 'id': '-17892518_uploaded',
  492. },
  493. 'playlist_mincount': 182,
  494. }, {
  495. 'url': 'https://vkvideo.ru/playlist/-204353299_426',
  496. 'info_dict': {
  497. 'id': '-204353299_playlist_426',
  498. },
  499. 'playlist_mincount': 33,
  500. }, {
  501. 'url': 'https://vk.com/video/@gorkyfilmstudio/all',
  502. 'only_matching': True,
  503. }, {
  504. 'url': 'https://vkvideo.ru/@mobidevices',
  505. 'only_matching': True,
  506. }, {
  507. 'url': 'https://vk.com/video/playlist/-174476437_2',
  508. 'only_matching': True,
  509. }]
  510. _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
  511. def _entries(self, page_id, section):
  512. video_list_json = self._download_payload('al_video', page_id, {
  513. 'act': 'load_videos_silent',
  514. 'offset': 0,
  515. 'oid': page_id,
  516. 'section': section,
  517. })[0][section]
  518. count = video_list_json['count']
  519. total = video_list_json['total']
  520. video_list = video_list_json['list']
  521. while True:
  522. for video in video_list:
  523. v = self._VIDEO._make(video[:2])
  524. video_id = '%d_%d' % (v.owner_id, v.id)
  525. yield self.url_result(
  526. 'https://vk.com/video' + video_id, VKIE.ie_key(), video_id)
  527. if count >= total:
  528. break
  529. video_list_json = self._download_payload('al_video', page_id, {
  530. 'act': 'load_videos_silent',
  531. 'offset': count,
  532. 'oid': page_id,
  533. 'section': section,
  534. })[0][section]
  535. new_count = video_list_json['count']
  536. if not new_count:
  537. self.to_screen(f'{page_id}: Skipping {total - count} unavailable videos')
  538. break
  539. count += new_count
  540. video_list = video_list_json['list']
  541. def _real_extract(self, url):
  542. u_id = self._match_id(url)
  543. webpage = self._download_webpage(url, u_id)
  544. if u_id.startswith('@'):
  545. page_id = traverse_obj(
  546. self._search_json(r'\bvar newCur\s*=', webpage, 'cursor data', u_id),
  547. ('oid', {int}, {str_or_none}, {require('page id')}))
  548. section = traverse_obj(parse_qs(url), ('section', 0)) or 'all'
  549. else:
  550. page_id, _, section = u_id.partition('_')
  551. section = f'playlist_{section}'
  552. playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
  553. return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
  554. class VKWallPostIE(VKBaseIE):
  555. IE_NAME = 'vk:wallpost'
  556. _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
  557. _TESTS = [{
  558. # public page URL, audio playlist
  559. 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
  560. 'info_dict': {
  561. 'id': '-23538238_35',
  562. 'title': 'Black Shadow - Wall post -23538238_35',
  563. 'description': 'md5:190c78f905a53e0de793d83933c6e67f',
  564. },
  565. 'playlist': [{
  566. 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
  567. 'info_dict': {
  568. 'id': '135220665_111806521',
  569. 'ext': 'm4a',
  570. 'title': 'Black Shadow - Слепое Верование',
  571. 'duration': 370,
  572. 'uploader': 'Black Shadow',
  573. 'artist': 'Black Shadow',
  574. 'track': 'Слепое Верование',
  575. },
  576. }, {
  577. 'md5': '4cc7e804579122b17ea95af7834c9233',
  578. 'info_dict': {
  579. 'id': '135220665_111802303',
  580. 'ext': 'm4a',
  581. 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
  582. 'duration': 423,
  583. 'uploader': 'Black Shadow',
  584. 'artist': 'Black Shadow',
  585. 'track': 'Война - Негасимое Бездны Пламя!',
  586. },
  587. }],
  588. 'params': {
  589. 'skip_download': True,
  590. },
  591. }, {
  592. # single YouTube embed with irrelevant reaction videos
  593. 'url': 'https://vk.com/wall-32370614_7173954',
  594. 'info_dict': {
  595. 'id': '-32370614_7173954',
  596. 'title': 'md5:9f93c405bbc00061d34007d78c75e3bc',
  597. 'description': 'md5:953b811f26fa9f21ee5856e2ea8e68fc',
  598. },
  599. 'playlist_count': 1,
  600. }, {
  601. # wall page URL
  602. 'url': 'https://vk.com/wall-23538238_35',
  603. 'only_matching': True,
  604. }, {
  605. # mobile wall page URL
  606. 'url': 'https://m.vk.com/wall-23538238_35',
  607. 'only_matching': True,
  608. }]
  609. _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
  610. _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
  611. def _decode(self, enc):
  612. dec = ''
  613. e = n = 0
  614. for c in enc:
  615. r = self._BASE64_CHARS.index(c)
  616. cond = n % 4
  617. e = 64 * e + r if cond else r
  618. n += 1
  619. if cond:
  620. dec += chr(255 & e >> (-2 * n & 6))
  621. return dec
  622. def _unmask_url(self, mask_url, vk_id):
  623. if 'audio_api_unavailable' in mask_url:
  624. extra = mask_url.split('?extra=')[1].split('#')
  625. func, base = self._decode(extra[1]).split(chr(11))
  626. mask_url = list(self._decode(extra[0]))
  627. url_len = len(mask_url)
  628. indexes = [None] * url_len
  629. index = int(base) ^ vk_id
  630. for n in range(url_len - 1, -1, -1):
  631. index = (url_len * (n + 1) ^ index + n) % url_len
  632. indexes[n] = index
  633. for n in range(1, url_len):
  634. c = mask_url[n]
  635. index = indexes[url_len - 1 - n]
  636. mask_url[n] = mask_url[index]
  637. mask_url[index] = c
  638. mask_url = ''.join(mask_url)
  639. return mask_url
  640. def _real_extract(self, url):
  641. post_id = self._match_id(url)
  642. webpage = self._download_payload('wkview', post_id, {
  643. 'act': 'show',
  644. 'w': 'wall' + post_id,
  645. })[1]
  646. uploader = clean_html(get_element_by_class('PostHeaderTitle__authorName', webpage))
  647. entries = []
  648. for audio in re.findall(r'data-audio="([^"]+)', webpage):
  649. audio = self._parse_json(unescapeHTML(audio), post_id)
  650. if not audio['url']:
  651. continue
  652. title = unescapeHTML(audio.get('title'))
  653. artist = unescapeHTML(audio.get('artist'))
  654. entries.append({
  655. 'id': f'{audio["owner_id"]}_{audio["id"]}',
  656. 'title': join_nonempty(artist, title, delim=' - '),
  657. 'thumbnails': try_call(lambda: [{'url': u} for u in audio['coverUrl'].split(',')]),
  658. 'duration': int_or_none(audio.get('duration')),
  659. 'uploader': uploader,
  660. 'artist': artist,
  661. 'track': title,
  662. 'formats': [{
  663. 'url': audio['url'],
  664. 'ext': 'm4a',
  665. 'vcodec': 'none',
  666. 'acodec': 'mp3',
  667. 'container': 'm4a_dash',
  668. }],
  669. })
  670. entries.extend(self.url_result(urljoin(url, entry), VKIE) for entry in set(re.findall(
  671. r'<a[^>]+href=(?:["\'])(/video(?:-?[\d_]+)[^"\']*)',
  672. get_element_html_by_id('wl_post_body', webpage))))
  673. return self.playlist_result(
  674. entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '),
  675. clean_html(get_element_by_class('wall_post_text', webpage)))
  676. class VKPlayBaseIE(InfoExtractor):
  677. _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vk(?:play|video)\.ru)/'
  678. _RESOLUTIONS = {
  679. 'tiny': '256x144',
  680. 'lowest': '426x240',
  681. 'low': '640x360',
  682. 'medium': '852x480',
  683. 'high': '1280x720',
  684. 'full_hd': '1920x1080',
  685. 'quad_hd': '2560x1440',
  686. }
  687. def _extract_from_initial_state(self, url, video_id, path):
  688. webpage = self._download_webpage(url, video_id)
  689. video_info = traverse_obj(self._search_json(
  690. r'<script[^>]+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id),
  691. path, expected_type=dict)
  692. if not video_info:
  693. raise ExtractorError('Unable to extract video info from html inline initial state')
  694. return video_info
  695. def _extract_formats(self, stream_info, video_id):
  696. formats = []
  697. for stream in traverse_obj(stream_info, (
  698. 'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
  699. url = stream['url']
  700. format_id = str_or_none(stream['type'])
  701. if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url:
  702. formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False))
  703. elif format_id == 'dash':
  704. formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False))
  705. elif format_id in ('live_dash', 'live_playback_dash'):
  706. self.write_debug(f'Not extracting unsupported format "{format_id}"')
  707. else:
  708. formats.append({
  709. 'url': url,
  710. 'ext': 'mp4',
  711. 'format_id': format_id,
  712. **parse_resolution(self._RESOLUTIONS.get(format_id)),
  713. })
  714. return formats
  715. def _extract_common_meta(self, stream_info):
  716. return traverse_obj(stream_info, {
  717. 'id': ('id', {str_or_none}),
  718. 'title': ('title', {str}),
  719. 'release_timestamp': ('startTime', {int_or_none}),
  720. 'thumbnail': ('previewUrl', {url_or_none}),
  721. 'view_count': ('count', 'views', {int_or_none}),
  722. 'like_count': ('count', 'likes', {int_or_none}),
  723. 'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}),
  724. 'uploader': (('user', ('blog', 'owner')), 'nick', {str}),
  725. 'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}),
  726. 'duration': ('duration', {int_or_none}),
  727. 'is_live': ('isOnline', {bool}),
  728. 'concurrent_view_count': ('count', 'viewers', {int_or_none}),
  729. }, get_all=False)
  730. class VKPlayIE(VKPlayBaseIE):
  731. _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<username>[^/#?]+)/record/(?P<id>[\da-f-]+)'
  732. _TESTS = [{
  733. 'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
  734. 'info_dict': {
  735. 'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da',
  736. 'ext': 'mp4',
  737. 'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!',
  738. 'uploader': 'ZitsmanN',
  739. 'uploader_id': '13159830',
  740. 'release_timestamp': 1683461378,
  741. 'release_date': '20230507',
  742. 'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
  743. 'duration': 10608,
  744. 'view_count': int,
  745. 'like_count': int,
  746. 'categories': ['Atomic Heart'],
  747. },
  748. 'params': {'skip_download': 'm3u8'},
  749. }, {
  750. 'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
  751. 'only_matching': True,
  752. }, {
  753. 'url': 'https://live.vkvideo.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
  754. 'only_matching': True,
  755. }]
  756. def _real_extract(self, url):
  757. username, video_id = self._match_valid_url(url).groups()
  758. record_info = traverse_obj(self._download_json(
  759. f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False),
  760. ('data', 'record', {dict}))
  761. if not record_info:
  762. record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data'))
  763. return {
  764. **self._extract_common_meta(record_info),
  765. 'id': video_id,
  766. 'formats': self._extract_formats(record_info, video_id),
  767. }
  768. class VKPlayLiveIE(VKPlayBaseIE):
  769. _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<id>[^/#?]+)/?(?:[#?]|$)'
  770. _TESTS = [{
  771. 'url': 'https://vkplay.live/bayda',
  772. 'info_dict': {
  773. 'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2',
  774. 'ext': 'mp4',
  775. 'title': r're:эскапизм крута .*',
  776. 'uploader': 'Bayda',
  777. 'uploader_id': '12279401',
  778. 'release_timestamp': 1687209962,
  779. 'release_date': '20230619',
  780. 'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
  781. 'view_count': int,
  782. 'concurrent_view_count': int,
  783. 'like_count': int,
  784. 'categories': ['EVE Online'],
  785. 'live_status': 'is_live',
  786. },
  787. 'skip': 'livestream',
  788. 'params': {'skip_download': True},
  789. }, {
  790. 'url': 'https://live.vkplay.ru/lebwa',
  791. 'only_matching': True,
  792. }, {
  793. 'url': 'https://live.vkvideo.ru/panterka',
  794. 'only_matching': True,
  795. }]
  796. def _real_extract(self, url):
  797. username = self._match_id(url)
  798. stream_info = self._download_json(
  799. f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False)
  800. if not stream_info:
  801. stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream'))
  802. formats = self._extract_formats(stream_info, username)
  803. if not formats and not traverse_obj(stream_info, ('isOnline', {bool})):
  804. raise UserNotLive(video_id=username)
  805. return {
  806. **self._extract_common_meta(stream_info),
  807. 'formats': formats,
  808. }