logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

smotri.py (16016B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. import hashlib
  6. import uuid
  7. from .common import InfoExtractor
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. sanitized_Request,
  12. unified_strdate,
  13. urlencode_postdata,
  14. xpath_text,
  15. )
  16. class SmotriIE(InfoExtractor):
  17. IE_DESC = 'Smotri.com'
  18. IE_NAME = 'smotri'
  19. _VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
  20. _NETRC_MACHINE = 'smotri'
  21. _TESTS = [
  22. # real video id 2610366
  23. {
  24. 'url': 'http://smotri.com/video/view/?id=v261036632ab',
  25. 'md5': '02c0dfab2102984e9c5bb585cc7cc321',
  26. 'info_dict': {
  27. 'id': 'v261036632ab',
  28. 'ext': 'mp4',
  29. 'title': 'катастрофа с камер видеонаблюдения',
  30. 'uploader': 'rbc2008',
  31. 'uploader_id': 'rbc08',
  32. 'upload_date': '20131118',
  33. 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
  34. },
  35. },
  36. # real video id 57591
  37. {
  38. 'url': 'http://smotri.com/video/view/?id=v57591cb20',
  39. 'md5': '830266dfc21f077eac5afd1883091bcd',
  40. 'info_dict': {
  41. 'id': 'v57591cb20',
  42. 'ext': 'flv',
  43. 'title': 'test',
  44. 'uploader': 'Support Photofile@photofile',
  45. 'uploader_id': 'support-photofile',
  46. 'upload_date': '20070704',
  47. 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
  48. },
  49. },
  50. # video-password, not approved by moderator
  51. {
  52. 'url': 'http://smotri.com/video/view/?id=v1390466a13c',
  53. 'md5': 'f6331cef33cad65a0815ee482a54440b',
  54. 'info_dict': {
  55. 'id': 'v1390466a13c',
  56. 'ext': 'mp4',
  57. 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
  58. 'uploader': 'timoxa40',
  59. 'uploader_id': 'timoxa40',
  60. 'upload_date': '20100404',
  61. 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
  62. },
  63. 'params': {
  64. 'videopassword': 'qwerty',
  65. },
  66. 'skip': 'Video is not approved by moderator',
  67. },
  68. # video-password
  69. {
  70. 'url': 'http://smotri.com/video/view/?id=v6984858774#',
  71. 'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
  72. 'info_dict': {
  73. 'id': 'v6984858774',
  74. 'ext': 'mp4',
  75. 'title': 'Дача Солженицина ПАРОЛЬ 223322',
  76. 'uploader': 'psavari1',
  77. 'uploader_id': 'psavari1',
  78. 'upload_date': '20081103',
  79. 'thumbnail': r're:^https?://.*\.jpg$',
  80. },
  81. 'params': {
  82. 'videopassword': '223322',
  83. },
  84. },
  85. # age limit + video-password, not approved by moderator
  86. {
  87. 'url': 'http://smotri.com/video/view/?id=v15408898bcf',
  88. 'md5': '91e909c9f0521adf5ee86fbe073aad70',
  89. 'info_dict': {
  90. 'id': 'v15408898bcf',
  91. 'ext': 'flv',
  92. 'title': 'этот ролик не покажут по ТВ',
  93. 'uploader': 'zzxxx',
  94. 'uploader_id': 'ueggb',
  95. 'upload_date': '20101001',
  96. 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
  97. 'age_limit': 18,
  98. },
  99. 'params': {
  100. 'videopassword': '333'
  101. },
  102. 'skip': 'Video is not approved by moderator',
  103. },
  104. # age limit + video-password
  105. {
  106. 'url': 'http://smotri.com/video/view/?id=v7780025814',
  107. 'md5': 'b4599b068422559374a59300c5337d72',
  108. 'info_dict': {
  109. 'id': 'v7780025814',
  110. 'ext': 'mp4',
  111. 'title': 'Sexy Beach (пароль 123)',
  112. 'uploader': 'вАся',
  113. 'uploader_id': 'asya_prosto',
  114. 'upload_date': '20081218',
  115. 'thumbnail': r're:^https?://.*\.jpg$',
  116. 'age_limit': 18,
  117. },
  118. 'params': {
  119. 'videopassword': '123'
  120. },
  121. },
  122. # swf player
  123. {
  124. 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
  125. 'md5': '31099eeb4bc906712c5f40092045108d',
  126. 'info_dict': {
  127. 'id': 'v9188090500',
  128. 'ext': 'mp4',
  129. 'title': 'Shakira - Don\'t Bother',
  130. 'uploader': 'HannahL',
  131. 'uploader_id': 'lisaha95',
  132. 'upload_date': '20090331',
  133. 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
  134. },
  135. },
  136. ]
  137. @classmethod
  138. def _extract_url(cls, webpage):
  139. mobj = re.search(
  140. r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
  141. webpage)
  142. if mobj is not None:
  143. return mobj.group('url')
  144. mobj = re.search(
  145. r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
  146. <div\s+class="video_image">[^<]+</div>\s*
  147. <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
  148. if mobj is not None:
  149. return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
  150. def _search_meta(self, name, html, display_name=None):
  151. if display_name is None:
  152. display_name = name
  153. return self._html_search_meta(name, html, display_name)
  154. def _real_extract(self, url):
  155. video_id = self._match_id(url)
  156. video_form = {
  157. 'ticket': video_id,
  158. 'video_url': '1',
  159. 'frame_url': '1',
  160. 'devid': 'LoadupFlashPlayer',
  161. 'getvideoinfo': '1',
  162. }
  163. video_password = self._downloader.params.get('videopassword')
  164. if video_password:
  165. video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
  166. video = self._download_json(
  167. 'http://smotri.com/video/view/url/bot/',
  168. video_id, 'Downloading video JSON',
  169. data=urlencode_postdata(video_form),
  170. headers={'Content-Type': 'application/x-www-form-urlencoded'})
  171. video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
  172. if not video_url:
  173. if video.get('_moderate_no'):
  174. raise ExtractorError(
  175. 'Video %s has not been approved by moderator' % video_id, expected=True)
  176. if video.get('error'):
  177. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  178. if video.get('_pass_protected') == 1:
  179. msg = ('Invalid video password' if video_password
  180. else 'This video is protected by a password, use the --video-password option')
  181. raise ExtractorError(msg, expected=True)
  182. title = video['title']
  183. thumbnail = video.get('_imgURL')
  184. upload_date = unified_strdate(video.get('added'))
  185. uploader = video.get('userNick')
  186. uploader_id = video.get('userLogin')
  187. duration = int_or_none(video.get('duration'))
  188. # Video JSON does not provide enough meta data
  189. # We will extract some from the video web page instead
  190. webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
  191. webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
  192. # Warning if video is unavailable
  193. warning = self._html_search_regex(
  194. r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
  195. 'warning message', default=None)
  196. if warning is not None:
  197. self._downloader.report_warning(
  198. 'Video %s may not be available; smotri said: %s ' %
  199. (video_id, warning))
  200. # Adult content
  201. if 'EroConfirmText">' in webpage:
  202. self.report_age_confirmation()
  203. confirm_string = self._html_search_regex(
  204. r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
  205. webpage, 'confirm string')
  206. confirm_url = webpage_url + '&confirm=%s' % confirm_string
  207. webpage = self._download_webpage(
  208. confirm_url, video_id,
  209. 'Downloading video page (age confirmed)')
  210. adult_content = True
  211. else:
  212. adult_content = False
  213. view_count = self._html_search_regex(
  214. r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
  215. webpage, 'view count', fatal=False)
  216. return {
  217. 'id': video_id,
  218. 'url': video_url,
  219. 'title': title,
  220. 'thumbnail': thumbnail,
  221. 'uploader': uploader,
  222. 'upload_date': upload_date,
  223. 'uploader_id': uploader_id,
  224. 'duration': duration,
  225. 'view_count': int_or_none(view_count),
  226. 'age_limit': 18 if adult_content else 0,
  227. }
  228. class SmotriCommunityIE(InfoExtractor):
  229. IE_DESC = 'Smotri.com community videos'
  230. IE_NAME = 'smotri:community'
  231. _VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
  232. _TEST = {
  233. 'url': 'http://smotri.com/community/video/kommuna',
  234. 'info_dict': {
  235. 'id': 'kommuna',
  236. },
  237. 'playlist_mincount': 4,
  238. }
  239. def _real_extract(self, url):
  240. community_id = self._match_id(url)
  241. rss = self._download_xml(
  242. 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
  243. community_id, 'Downloading community RSS')
  244. entries = [
  245. self.url_result(video_url.text, SmotriIE.ie_key())
  246. for video_url in rss.findall('./channel/item/link')]
  247. return self.playlist_result(entries, community_id)
  248. class SmotriUserIE(InfoExtractor):
  249. IE_DESC = 'Smotri.com user videos'
  250. IE_NAME = 'smotri:user'
  251. _VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
  252. _TESTS = [{
  253. 'url': 'http://smotri.com/user/inspector',
  254. 'info_dict': {
  255. 'id': 'inspector',
  256. 'title': 'Inspector',
  257. },
  258. 'playlist_mincount': 9,
  259. }]
  260. def _real_extract(self, url):
  261. user_id = self._match_id(url)
  262. rss = self._download_xml(
  263. 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
  264. user_id, 'Downloading user RSS')
  265. entries = [self.url_result(video_url.text, 'Smotri')
  266. for video_url in rss.findall('./channel/item/link')]
  267. description_text = xpath_text(rss, './channel/description') or ''
  268. user_nickname = self._search_regex(
  269. '^Видео режиссера (.+)$', description_text,
  270. 'user nickname', fatal=False)
  271. return self.playlist_result(entries, user_id, user_nickname)
  272. class SmotriBroadcastIE(InfoExtractor):
  273. IE_DESC = 'Smotri.com broadcasts'
  274. IE_NAME = 'smotri:broadcast'
  275. _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
  276. _NETRC_MACHINE = 'smotri'
  277. def _real_extract(self, url):
  278. mobj = re.match(self._VALID_URL, url)
  279. broadcast_id = mobj.group('id')
  280. broadcast_url = 'http://' + mobj.group('url')
  281. broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
  282. if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
  283. raise ExtractorError(
  284. 'Broadcast %s does not exist' % broadcast_id, expected=True)
  285. # Adult content
  286. if re.search('EroConfirmText">', broadcast_page) is not None:
  287. (username, password) = self._get_login_info()
  288. if username is None:
  289. self.raise_login_required(
  290. 'Erotic broadcasts allowed only for registered users')
  291. login_form = {
  292. 'login-hint53': '1',
  293. 'confirm_erotic': '1',
  294. 'login': username,
  295. 'password': password,
  296. }
  297. request = sanitized_Request(
  298. broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form))
  299. request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  300. broadcast_page = self._download_webpage(
  301. request, broadcast_id, 'Logging in and confirming age')
  302. if '>Неверный логин или пароль<' in broadcast_page:
  303. raise ExtractorError(
  304. 'Unable to log in: bad username or password', expected=True)
  305. adult_content = True
  306. else:
  307. adult_content = False
  308. ticket = self._html_search_regex(
  309. (r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
  310. r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
  311. broadcast_page, 'broadcast ticket', group='ticket')
  312. broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
  313. broadcast_password = self._downloader.params.get('videopassword')
  314. if broadcast_password:
  315. broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
  316. broadcast_json_page = self._download_webpage(
  317. broadcast_url, broadcast_id, 'Downloading broadcast JSON')
  318. try:
  319. broadcast_json = json.loads(broadcast_json_page)
  320. protected_broadcast = broadcast_json['_pass_protected'] == 1
  321. if protected_broadcast and not broadcast_password:
  322. raise ExtractorError(
  323. 'This broadcast is protected by a password, use the --video-password option',
  324. expected=True)
  325. broadcast_offline = broadcast_json['is_play'] == 0
  326. if broadcast_offline:
  327. raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
  328. rtmp_url = broadcast_json['_server']
  329. mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
  330. if not mobj:
  331. raise ExtractorError('Unexpected broadcast rtmp URL')
  332. broadcast_playpath = broadcast_json['_streamName']
  333. broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
  334. broadcast_thumbnail = broadcast_json.get('_imgURL')
  335. broadcast_title = self._live_title(broadcast_json['title'])
  336. broadcast_description = broadcast_json.get('description')
  337. broadcaster_nick = broadcast_json.get('nick')
  338. broadcaster_login = broadcast_json.get('login')
  339. rtmp_conn = 'S:%s' % uuid.uuid4().hex
  340. except KeyError:
  341. if protected_broadcast:
  342. raise ExtractorError('Bad broadcast password', expected=True)
  343. raise ExtractorError('Unexpected broadcast JSON')
  344. return {
  345. 'id': broadcast_id,
  346. 'url': rtmp_url,
  347. 'title': broadcast_title,
  348. 'thumbnail': broadcast_thumbnail,
  349. 'description': broadcast_description,
  350. 'uploader': broadcaster_nick,
  351. 'uploader_id': broadcaster_login,
  352. 'age_limit': 18 if adult_content else 0,
  353. 'ext': 'flv',
  354. 'play_path': broadcast_playpath,
  355. 'player_url': 'http://pics.smotri.com/broadcast_play.swf',
  356. 'app': broadcast_app,
  357. 'rtmp_live': True,
  358. 'rtmp_conn': rtmp_conn,
  359. 'is_live': True,
  360. }