logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

rutube.py (17291B)


  1. import itertools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. UnsupportedError,
  5. bool_or_none,
  6. determine_ext,
  7. int_or_none,
  8. js_to_json,
  9. parse_qs,
  10. str_or_none,
  11. try_get,
  12. unified_timestamp,
  13. url_or_none,
  14. )
  15. from ..utils.traversal import (
  16. subs_list_to_dict,
  17. traverse_obj,
  18. )
  19. class RutubeBaseIE(InfoExtractor):
  20. def _download_api_info(self, video_id, query=None):
  21. if not query:
  22. query = {}
  23. query['format'] = 'json'
  24. return self._download_json(
  25. f'https://rutube.ru/api/video/{video_id}/',
  26. video_id, 'Downloading video JSON',
  27. 'Unable to download video JSON', query=query)
  28. def _extract_info(self, video, video_id=None, require_title=True):
  29. title = video['title'] if require_title else video.get('title')
  30. age_limit = video.get('is_adult')
  31. if age_limit is not None:
  32. age_limit = 18 if age_limit is True else 0
  33. uploader_id = try_get(video, lambda x: x['author']['id'])
  34. category = try_get(video, lambda x: x['category']['name'])
  35. description = video.get('description')
  36. duration = int_or_none(video.get('duration'))
  37. return {
  38. 'id': video.get('id') or video_id if video_id else video['id'],
  39. 'title': title,
  40. 'description': description,
  41. 'thumbnail': video.get('thumbnail_url'),
  42. 'duration': duration,
  43. 'uploader': try_get(video, lambda x: x['author']['name']),
  44. 'uploader_id': str(uploader_id) if uploader_id else None,
  45. 'timestamp': unified_timestamp(video.get('created_ts')),
  46. 'categories': [category] if category else None,
  47. 'age_limit': age_limit,
  48. 'view_count': int_or_none(video.get('hits')),
  49. 'comment_count': int_or_none(video.get('comments_count')),
  50. 'is_live': bool_or_none(video.get('is_livestream')),
  51. 'chapters': self._extract_chapters_from_description(description, duration),
  52. }
  53. def _download_and_extract_info(self, video_id, query=None):
  54. return self._extract_info(
  55. self._download_api_info(video_id, query=query), video_id)
  56. def _download_api_options(self, video_id, query=None):
  57. if not query:
  58. query = {}
  59. query['format'] = 'json'
  60. return self._download_json(
  61. f'https://rutube.ru/api/play/options/{video_id}/',
  62. video_id, 'Downloading options JSON',
  63. 'Unable to download options JSON',
  64. headers=self.geo_verification_headers(), query=query)
  65. def _extract_formats_and_subtitles(self, options, video_id):
  66. formats = []
  67. subtitles = {}
  68. for format_id, format_url in options['video_balancer'].items():
  69. ext = determine_ext(format_url)
  70. if ext == 'm3u8':
  71. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  72. format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
  73. formats.extend(fmts)
  74. self._merge_subtitles(subs, target=subtitles)
  75. elif ext == 'f4m':
  76. formats.extend(self._extract_f4m_formats(
  77. format_url, video_id, f4m_id=format_id, fatal=False))
  78. else:
  79. formats.append({
  80. 'url': format_url,
  81. 'format_id': format_id,
  82. })
  83. for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
  84. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  85. hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls')
  86. formats.extend(fmts)
  87. self._merge_subtitles(subs, target=subtitles)
  88. self._merge_subtitles(traverse_obj(options, ('captions', ..., {
  89. 'id': 'code',
  90. 'url': 'file',
  91. 'name': ('langTitle', {str}),
  92. }, all, {subs_list_to_dict(lang='ru')})), target=subtitles)
  93. return formats, subtitles
  94. def _download_and_extract_formats_and_subtitles(self, video_id, query=None):
  95. return self._extract_formats_and_subtitles(
  96. self._download_api_options(video_id, query=query), video_id)
  97. class RutubeIE(RutubeBaseIE):
  98. IE_NAME = 'rutube'
  99. IE_DESC = 'Rutube videos'
  100. _VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
  101. _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
  102. _TESTS = [{
  103. 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
  104. 'md5': '3d73fdfe5bb81b9aef139e22ef3de26a',
  105. 'info_dict': {
  106. 'id': '3eac3b4561676c17df9132a9a1e62e3e',
  107. 'ext': 'mp4',
  108. 'title': 'Раненный кенгуру забежал в аптеку',
  109. 'description': 'http://www.ntdtv.ru ',
  110. 'duration': 81,
  111. 'uploader': 'NTDRussian',
  112. 'uploader_id': '29790',
  113. 'timestamp': 1381943602,
  114. 'upload_date': '20131016',
  115. 'age_limit': 0,
  116. 'view_count': int,
  117. 'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
  118. 'categories': ['Новости и СМИ'],
  119. 'chapters': [],
  120. },
  121. }, {
  122. 'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
  123. 'only_matching': True,
  124. }, {
  125. 'url': 'https://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
  126. 'only_matching': True,
  127. }, {
  128. 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
  129. 'only_matching': True,
  130. }, {
  131. 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
  132. 'only_matching': True,
  133. }, {
  134. 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
  135. 'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7',
  136. 'info_dict': {
  137. 'id': '884fb55f07a97ab673c7d654553e0f48',
  138. 'ext': 'mp4',
  139. 'title': 'Яцуноками, Nioh2',
  140. 'description': 'Nioh2: финал сражения с боссом Яцуноками',
  141. 'duration': 15,
  142. 'uploader': 'mexus',
  143. 'uploader_id': '24222106',
  144. 'timestamp': 1670646232,
  145. 'upload_date': '20221210',
  146. 'age_limit': 0,
  147. 'view_count': int,
  148. 'thumbnail': 'https://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
  149. 'categories': ['Видеоигры'],
  150. 'chapters': [],
  151. },
  152. }, {
  153. 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
  154. 'info_dict': {
  155. 'id': 'c65b465ad0c98c89f3b25cb03dcc87c6',
  156. 'ext': 'mp4',
  157. 'chapters': 'count:4',
  158. 'categories': ['Бизнес и предпринимательство'],
  159. 'description': 'md5:252feac1305257d8c1bab215cedde75d',
  160. 'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
  161. 'duration': 782,
  162. 'age_limit': 0,
  163. 'uploader_id': '23491359',
  164. 'timestamp': 1677153329,
  165. 'view_count': int,
  166. 'upload_date': '20230223',
  167. 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1',
  168. 'uploader': 'Стас Быков',
  169. },
  170. }, {
  171. 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
  172. 'info_dict': {
  173. 'id': 'c58f502c7bb34a8fcdd976b221fca292',
  174. 'ext': 'mp4',
  175. 'categories': ['Телепередачи'],
  176. 'description': '',
  177. 'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
  178. 'live_status': 'is_live',
  179. 'age_limit': 0,
  180. 'uploader_id': '23460655',
  181. 'timestamp': 1652972968,
  182. 'view_count': int,
  183. 'upload_date': '20220519',
  184. 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
  185. 'uploader': 'Первый канал',
  186. },
  187. }, {
  188. 'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/',
  189. 'info_dict': {
  190. 'id': '03a9cb54bac3376af4c5cb0f18444e01',
  191. 'ext': 'mp4',
  192. 'age_limit': 0,
  193. 'description': '',
  194. 'title': 'Церемония начала торгов акциями ПАО «ЕвроТранс»',
  195. 'chapters': [],
  196. 'upload_date': '20240829',
  197. 'duration': 293,
  198. 'uploader': 'MOEX - Московская биржа',
  199. 'timestamp': 1724946628,
  200. 'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg',
  201. 'view_count': int,
  202. 'uploader_id': '38420507',
  203. 'categories': ['Интервью'],
  204. },
  205. }, {
  206. 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
  207. 'only_matching': True,
  208. }, {
  209. 'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
  210. 'only_matching': True,
  211. }]
  212. def _real_extract(self, url):
  213. video_id = self._match_id(url)
  214. query = parse_qs(url)
  215. info = self._download_and_extract_info(video_id, query)
  216. formats, subtitles = self._download_and_extract_formats_and_subtitles(video_id, query)
  217. return {
  218. **info,
  219. 'formats': formats,
  220. 'subtitles': subtitles,
  221. }
  222. class RutubeEmbedIE(RutubeBaseIE):
  223. IE_NAME = 'rutube:embed'
  224. IE_DESC = 'Rutube embedded videos'
  225. _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)(?:[?#/]|$)'
  226. _TESTS = [{
  227. 'url': 'https://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
  228. 'info_dict': {
  229. 'id': 'a10e53b86e8f349080f718582ce4c661',
  230. 'ext': 'mp4',
  231. 'timestamp': 1387830582,
  232. 'upload_date': '20131223',
  233. 'uploader_id': '297833',
  234. 'uploader': 'subziro89 ILya',
  235. 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
  236. 'age_limit': 0,
  237. 'duration': 1395,
  238. 'chapters': [],
  239. 'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b',
  240. 'view_count': int,
  241. 'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg',
  242. 'categories': ['Сериалы'],
  243. },
  244. 'params': {
  245. 'skip_download': True,
  246. },
  247. }, {
  248. 'url': 'https://rutube.ru/play/embed/8083783',
  249. 'only_matching': True,
  250. }, {
  251. # private video
  252. 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
  253. 'only_matching': True,
  254. }]
  255. def _real_extract(self, url):
  256. embed_id = self._match_id(url)
  257. # Query may contain private videos token and should be passed to API
  258. # requests (see #19163)
  259. query = parse_qs(url)
  260. options = self._download_api_options(embed_id, query)
  261. video_id = options['effective_video']
  262. formats, subtitles = self._extract_formats_and_subtitles(options, video_id)
  263. info = self._download_and_extract_info(video_id, query)
  264. info.update({
  265. 'extractor_key': 'Rutube',
  266. 'formats': formats,
  267. 'subtitles': subtitles,
  268. })
  269. return info
  270. class RutubePlaylistBaseIE(RutubeBaseIE):
  271. def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
  272. return self._PAGE_TEMPLATE % (playlist_id, page_num)
  273. def _entries(self, playlist_id, *args, **kwargs):
  274. next_page_url = None
  275. for pagenum in itertools.count(1):
  276. page = self._download_json(
  277. next_page_url or self._next_page_url(
  278. pagenum, playlist_id, *args, **kwargs),
  279. playlist_id, f'Downloading page {pagenum}')
  280. results = page.get('results')
  281. if not results or not isinstance(results, list):
  282. break
  283. for result in results:
  284. video_url = url_or_none(result.get('video_url'))
  285. if not video_url:
  286. continue
  287. entry = self._extract_info(result, require_title=False)
  288. entry.update({
  289. '_type': 'url',
  290. 'url': video_url,
  291. 'ie_key': RutubeIE.ie_key(),
  292. })
  293. yield entry
  294. next_page_url = page.get('next')
  295. if not next_page_url or not page.get('has_next'):
  296. break
  297. def _extract_playlist(self, playlist_id, *args, **kwargs):
  298. return self.playlist_result(
  299. self._entries(playlist_id, *args, **kwargs),
  300. playlist_id, kwargs.get('playlist_name'))
  301. def _real_extract(self, url):
  302. return self._extract_playlist(self._match_id(url))
  303. class RutubeTagsIE(RutubePlaylistBaseIE):
  304. IE_NAME = 'rutube:tags'
  305. IE_DESC = 'Rutube tags'
  306. _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
  307. _TESTS = [{
  308. 'url': 'https://rutube.ru/tags/video/1800/',
  309. 'info_dict': {
  310. 'id': '1800',
  311. },
  312. 'playlist_mincount': 68,
  313. }]
  314. _PAGE_TEMPLATE = 'https://rutube.ru/api/tags/video/%s/?page=%s&format=json'
  315. class RutubeMovieIE(RutubePlaylistBaseIE):
  316. IE_NAME = 'rutube:movie'
  317. IE_DESC = 'Rutube movies'
  318. _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
  319. _MOVIE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/?format=json'
  320. _PAGE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
  321. def _real_extract(self, url):
  322. movie_id = self._match_id(url)
  323. movie = self._download_json(
  324. self._MOVIE_TEMPLATE % movie_id, movie_id,
  325. 'Downloading movie JSON')
  326. return self._extract_playlist(
  327. movie_id, playlist_name=movie.get('name'))
  328. class RutubePersonIE(RutubePlaylistBaseIE):
  329. IE_NAME = 'rutube:person'
  330. IE_DESC = 'Rutube person videos'
  331. _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
  332. _TESTS = [{
  333. 'url': 'https://rutube.ru/video/person/313878/',
  334. 'info_dict': {
  335. 'id': '313878',
  336. },
  337. 'playlist_mincount': 36,
  338. }]
  339. _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json'
  340. class RutubePlaylistIE(RutubePlaylistBaseIE):
  341. IE_NAME = 'rutube:playlist'
  342. IE_DESC = 'Rutube playlists'
  343. _VALID_URL = r'https?://rutube\.ru/plst/(?P<id>\d+)'
  344. _TESTS = [{
  345. 'url': 'https://rutube.ru/plst/308547/',
  346. 'info_dict': {
  347. 'id': '308547',
  348. },
  349. 'playlist_mincount': 22,
  350. }]
  351. _PAGE_TEMPLATE = 'https://rutube.ru/api/playlist/custom/%s/videos?page=%s&format=json'
  352. class RutubeChannelIE(RutubePlaylistBaseIE):
  353. IE_NAME = 'rutube:channel'
  354. IE_DESC = 'Rutube channel'
  355. _VALID_URL = r'https?://rutube\.ru/(?:channel/(?P<id>\d+)|u/(?P<slug>\w+))(?:/(?P<section>videos|shorts|playlists))?'
  356. _TESTS = [{
  357. 'url': 'https://rutube.ru/channel/639184/videos/',
  358. 'info_dict': {
  359. 'id': '639184_videos',
  360. },
  361. 'playlist_mincount': 129,
  362. }, {
  363. 'url': 'https://rutube.ru/channel/25902603/shorts/',
  364. 'info_dict': {
  365. 'id': '25902603_shorts',
  366. },
  367. 'playlist_mincount': 277,
  368. }, {
  369. 'url': 'https://rutube.ru/channel/25902603/',
  370. 'info_dict': {
  371. 'id': '25902603',
  372. },
  373. 'playlist_mincount': 406,
  374. }, {
  375. 'url': 'https://rutube.ru/u/rutube/videos/',
  376. 'info_dict': {
  377. 'id': '23704195_videos',
  378. },
  379. 'playlist_mincount': 113,
  380. }]
  381. _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json&origin__type=%s'
  382. def _next_page_url(self, page_num, playlist_id, section):
  383. origin_type = {
  384. 'videos': 'rtb,rst,ifrm,rspa',
  385. 'shorts': 'rshorts',
  386. None: '',
  387. }.get(section)
  388. return self._PAGE_TEMPLATE % (playlist_id, page_num, origin_type)
  389. def _real_extract(self, url):
  390. playlist_id, slug, section = self._match_valid_url(url).group('id', 'slug', 'section')
  391. if section == 'playlists':
  392. raise UnsupportedError(url)
  393. if slug:
  394. webpage = self._download_webpage(url, slug)
  395. redux_state = self._search_json(
  396. r'window\.reduxState\s*=', webpage, 'redux state', slug, transform_source=js_to_json)
  397. playlist_id = traverse_obj(redux_state, (
  398. 'api', 'queries', lambda k, _: k.startswith('channelIdBySlug'),
  399. 'data', 'channel_id', {int}, {str_or_none}, any))
  400. playlist = self._extract_playlist(playlist_id, section=section)
  401. if section:
  402. playlist['id'] = f'{playlist_id}_{section}'
  403. return playlist