logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

thisvid.py (8452B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import itertools
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urlparse,
  8. )
  9. from ..utils import (
  10. clean_html,
  11. get_element_by_class,
  12. int_or_none,
  13. merge_dicts,
  14. url_or_none,
  15. urljoin,
  16. )
  17. class ThisVidIE(InfoExtractor):
  18. _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
  19. _TESTS = [{
  20. 'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
  21. 'md5': '839becb572995687e11a69dc4358a386',
  22. 'info_dict': {
  23. 'id': '3533241',
  24. 'ext': 'mp4',
  25. 'title': 'Sitting on ball tight jeans',
  26. 'description': 'md5:372353bb995883d1b65fddf507489acd',
  27. 'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
  28. 'uploader_id': '150629',
  29. 'uploader': 'jeanslevisjeans',
  30. 'age_limit': 18,
  31. }
  32. }, {
  33. 'url': 'https://thisvid.com/embed/3533241/',
  34. 'md5': '839becb572995687e11a69dc4358a386',
  35. 'info_dict': {
  36. 'id': '3533241',
  37. 'ext': 'mp4',
  38. 'title': 'Sitting on ball tight jeans',
  39. 'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
  40. 'uploader_id': '150629',
  41. 'uploader': 'jeanslevisjeans',
  42. 'age_limit': 18,
  43. }
  44. }]
  45. def _real_extract(self, url):
  46. main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
  47. webpage = self._download_webpage(url, main_id)
  48. title = self._html_search_regex(
  49. r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
  50. webpage, 'title')
  51. if type_ == 'embed':
  52. # look for more metadata
  53. video_alt_url = url_or_none(self._search_regex(
  54. r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
  55. webpage, 'video_alt_url', default=None))
  56. if video_alt_url and video_alt_url != url:
  57. webpage = self._download_webpage(
  58. video_alt_url, main_id,
  59. note='Redirecting embed to main page', fatal=False) or webpage
  60. video_holder = get_element_by_class('video-holder', webpage) or ''
  61. if '>This video is a private video' in video_holder:
  62. self.raise_login_required(
  63. (clean_html(video_holder) or 'Private video').split('\n', 1)[0])
  64. uploader = self._html_search_regex(
  65. r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
  66. webpage, 'uploader', default='')
  67. uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
  68. if len(uploader) == 2:
  69. # id must be non-empty, uploader could be ''
  70. uploader_id, uploader = uploader
  71. uploader = uploader or None
  72. else:
  73. uploader_id = uploader = None
  74. return merge_dicts({
  75. '_type': 'url_transparent',
  76. 'title': title,
  77. 'age_limit': 18,
  78. 'uploader': uploader,
  79. 'uploader_id': uploader_id,
  80. }, self.url_result(url, ie='Generic'))
  81. class ThisVidMemberIE(InfoExtractor):
  82. _VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
  83. _TESTS = [{
  84. 'url': 'https://thisvid.com/members/2140501/',
  85. 'info_dict': {
  86. 'id': '2140501',
  87. 'title': 'Rafflesia\'s Profile',
  88. },
  89. 'playlist_mincount': 16,
  90. }, {
  91. 'url': 'https://thisvid.com/members/2140501/favourite_videos/',
  92. 'info_dict': {
  93. 'id': '2140501',
  94. 'title': 'Rafflesia\'s Favourite Videos',
  95. },
  96. 'playlist_mincount': 15,
  97. }, {
  98. 'url': 'https://thisvid.com/members/636468/public_videos/',
  99. 'info_dict': {
  100. 'id': '636468',
  101. 'title': 'Happymouth\'s Public Videos',
  102. },
  103. 'playlist_mincount': 196,
  104. },
  105. ]
  106. def _urls(self, html):
  107. for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
  108. yield m.group('url')
  109. def _real_extract(self, url):
  110. pl_id = self._match_id(url)
  111. webpage = self._download_webpage(url, pl_id)
  112. title = re.split(
  113. r'(?i)\s*\|\s*ThisVid\.com\s*$',
  114. self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
  115. def entries(page_url, html=None):
  116. for page in itertools.count(1):
  117. if not html:
  118. html = self._download_webpage(
  119. page_url, pl_id, note='Downloading page %d' % (page, ),
  120. fatal=False) or ''
  121. for u in self._urls(html):
  122. yield u
  123. next_page = get_element_by_class('pagination-next', html) or ''
  124. if next_page:
  125. # member list page
  126. next_page = urljoin(url, self._search_regex(
  127. r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
  128. next_page, 'next page link', group='url', default=None))
  129. # in case a member page should have pagination-next with empty link, not just `else:`
  130. if next_page is None:
  131. # playlist page
  132. parsed_url = compat_urlparse.urlparse(page_url)
  133. base_path, num = parsed_url.path.rsplit('/', 1)
  134. num = int_or_none(num)
  135. if num is None:
  136. base_path, num = parsed_url.path.rstrip('/'), 1
  137. parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
  138. next_page = compat_urlparse.urlunparse(parsed_url)
  139. if page_url == next_page:
  140. next_page = None
  141. if not next_page:
  142. break
  143. page_url, html = next_page, None
  144. return self.playlist_from_matches(
  145. entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
  146. class ThisVidPlaylistIE(ThisVidMemberIE):
  147. _VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
  148. _TESTS = [{
  149. 'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
  150. 'info_dict': {
  151. 'id': '6615',
  152. 'title': 'Underwear Stuff',
  153. },
  154. 'playlist_mincount': 200,
  155. }, {
  156. 'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
  157. 'info_dict': {
  158. 'id': '1072387',
  159. 'ext': 'mp4',
  160. 'title': 'Big Italian Booty 28',
  161. 'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
  162. 'uploader_id': '367912',
  163. 'uploader': 'Jcmusclefun',
  164. 'age_limit': 18,
  165. },
  166. 'params': {
  167. 'noplaylist': True,
  168. },
  169. }]
  170. def _get_video_url(self, pl_url):
  171. video_id = re.match(self._VALID_URL, pl_url).group('video_id')
  172. return urljoin(pl_url, '/videos/%s/' % (video_id, ))
  173. def _urls(self, html):
  174. for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
  175. yield self._get_video_url(m.group('url'))
  176. def _real_extract(self, url):
  177. pl_id = self._match_id(url)
  178. if self._downloader.params.get('noplaylist'):
  179. self.to_screen('Downloading just the featured video because of --no-playlist')
  180. return self.url_result(self._get_video_url(url), 'ThisVid')
  181. self.to_screen(
  182. 'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
  183. result = super(ThisVidPlaylistIE, self)._real_extract(url)
  184. # rework title returned as `the title - the title`
  185. title = result['title']
  186. t_len = len(title)
  187. if t_len > 5 and t_len % 2 != 0:
  188. t_len = t_len // 2
  189. if title[t_len] == '-':
  190. title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
  191. if title[0] and title[0] == title[1]:
  192. result['title'] = title[0]
  193. return result