logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

motherless.py (9867B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import datetime
  4. import re
  5. from .common import InfoExtractor
  6. from ..compat import compat_urlparse
  7. from ..utils import (
  8. ExtractorError,
  9. InAdvancePagedList,
  10. orderedSet,
  11. str_to_int,
  12. unified_strdate,
  13. )
  14. class MotherlessIE(InfoExtractor):
  15. _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
  16. _TESTS = [{
  17. 'url': 'http://motherless.com/AC3FFE1',
  18. 'md5': '310f62e325a9fafe64f68c0bccb6e75f',
  19. 'info_dict': {
  20. 'id': 'AC3FFE1',
  21. 'ext': 'mp4',
  22. 'title': 'Fucked in the ass while playing PS3',
  23. 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
  24. 'upload_date': '20100913',
  25. 'uploader_id': 'famouslyfuckedup',
  26. 'thumbnail': r're:https?://.*\.jpg',
  27. 'age_limit': 18,
  28. }
  29. }, {
  30. 'url': 'http://motherless.com/532291B',
  31. 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
  32. 'info_dict': {
  33. 'id': '532291B',
  34. 'ext': 'mp4',
  35. 'title': 'Amazing girl playing the omegle game, PERFECT!',
  36. 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
  37. 'game', 'hairy'],
  38. 'upload_date': '20140622',
  39. 'uploader_id': 'Sulivana7x',
  40. 'thumbnail': r're:https?://.*\.jpg',
  41. 'age_limit': 18,
  42. },
  43. 'skip': '404',
  44. }, {
  45. 'url': 'http://motherless.com/g/cosplay/633979F',
  46. 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
  47. 'info_dict': {
  48. 'id': '633979F',
  49. 'ext': 'mp4',
  50. 'title': 'Turtlette',
  51. 'categories': ['superheroine heroine superher'],
  52. 'upload_date': '20140827',
  53. 'uploader_id': 'shade0230',
  54. 'thumbnail': r're:https?://.*\.jpg',
  55. 'age_limit': 18,
  56. }
  57. }, {
  58. # no keywords
  59. 'url': 'http://motherless.com/8B4BBC1',
  60. 'only_matching': True,
  61. }, {
  62. # see https://motherless.com/videos/recent for recent videos with
  63. # uploaded date in "ago" format
  64. 'url': 'https://motherless.com/3C3E2CF',
  65. 'info_dict': {
  66. 'id': '3C3E2CF',
  67. 'ext': 'mp4',
  68. 'title': 'a/ Hot Teens',
  69. 'categories': list,
  70. 'upload_date': '20210104',
  71. 'uploader_id': 'anonymous',
  72. 'thumbnail': r're:https?://.*\.jpg',
  73. 'age_limit': 18,
  74. },
  75. 'params': {
  76. 'skip_download': True,
  77. },
  78. }]
  79. def _real_extract(self, url):
  80. video_id = self._match_id(url)
  81. webpage = self._download_webpage(url, video_id)
  82. if any(p in webpage for p in (
  83. '<title>404 - MOTHERLESS.COM<',
  84. ">The page you're looking for cannot be found.<")):
  85. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  86. if '>The content you are trying to view is for friends only.' in webpage:
  87. raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
  88. title = self._html_search_regex(
  89. (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
  90. r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
  91. video_url = (self._html_search_regex(
  92. (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
  93. r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
  94. webpage, 'video URL', default=None, group='url')
  95. or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
  96. age_limit = self._rta_search(webpage)
  97. view_count = str_to_int(self._html_search_regex(
  98. (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
  99. webpage, 'view count', fatal=False))
  100. like_count = str_to_int(self._html_search_regex(
  101. (r'>([\d,.]+)\s+Favorites<',
  102. r'<strong>Favorited</strong>\s+([^<]+)<'),
  103. webpage, 'like count', fatal=False))
  104. upload_date = unified_strdate(self._search_regex(
  105. r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
  106. 'upload date', default=None))
  107. if not upload_date:
  108. uploaded_ago = self._search_regex(
  109. r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
  110. default=None)
  111. if uploaded_ago:
  112. delta = int(uploaded_ago[:-1])
  113. _AGO_UNITS = {
  114. 'h': 'hours',
  115. 'd': 'days',
  116. }
  117. kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
  118. upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
  119. comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
  120. uploader_id = self._html_search_regex(
  121. (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
  122. r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
  123. webpage, 'uploader_id')
  124. categories = self._html_search_meta('keywords', webpage, default=None)
  125. if categories:
  126. categories = [cat.strip() for cat in categories.split(',')]
  127. return {
  128. 'id': video_id,
  129. 'title': title,
  130. 'upload_date': upload_date,
  131. 'uploader_id': uploader_id,
  132. 'thumbnail': self._og_search_thumbnail(webpage),
  133. 'categories': categories,
  134. 'view_count': view_count,
  135. 'like_count': like_count,
  136. 'comment_count': comment_count,
  137. 'age_limit': age_limit,
  138. 'url': video_url,
  139. }
  140. class MotherlessGroupIE(InfoExtractor):
  141. _VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
  142. _TESTS = [{
  143. 'url': 'http://motherless.com/g/movie_scenes',
  144. 'info_dict': {
  145. 'id': 'movie_scenes',
  146. 'title': 'Movie Scenes',
  147. 'description': 'Hot and sexy scenes from "regular" movies... '
  148. 'Beautiful actresses fully nude... A looot of '
  149. 'skin! :)Enjoy!',
  150. },
  151. 'playlist_mincount': 662,
  152. }, {
  153. 'url': 'http://motherless.com/gv/sex_must_be_funny',
  154. 'info_dict': {
  155. 'id': 'sex_must_be_funny',
  156. 'title': 'Sex must be funny',
  157. 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
  158. 'any kind!'
  159. },
  160. 'playlist_mincount': 0,
  161. 'expected_warnings': [
  162. 'This group has no videos.',
  163. ]
  164. }, {
  165. 'url': 'https://motherless.com/g/beautiful_cock',
  166. 'info_dict': {
  167. 'id': 'beautiful_cock',
  168. 'title': 'Beautiful Cock',
  169. 'description': 'Group for lovely cocks yours, mine, a friends anything human',
  170. },
  171. 'playlist_mincount': 2500,
  172. }]
  173. @classmethod
  174. def suitable(cls, url):
  175. return (False if MotherlessIE.suitable(url)
  176. else super(MotherlessGroupIE, cls).suitable(url))
  177. def _extract_entries(self, webpage, base):
  178. entries = []
  179. for mobj in re.finditer(
  180. r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
  181. webpage):
  182. video_url = compat_urlparse.urljoin(base, mobj.group('href'))
  183. if not MotherlessIE.suitable(video_url):
  184. continue
  185. video_id = MotherlessIE._match_id(video_url)
  186. title = mobj.group('title')
  187. entries.append(self.url_result(
  188. video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
  189. video_title=title))
  190. # Alternative fallback
  191. if not entries:
  192. entries = [
  193. self.url_result(
  194. compat_urlparse.urljoin(base, '/' + entry_id),
  195. ie=MotherlessIE.ie_key(), video_id=entry_id)
  196. for entry_id in orderedSet(re.findall(
  197. r'data-codename=["\']([A-Z0-9]+)', webpage))]
  198. return entries
  199. def _real_extract(self, url):
  200. group_id = self._match_id(url)
  201. page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
  202. webpage = self._download_webpage(page_url, group_id)
  203. title = self._search_regex(
  204. r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
  205. description = self._html_search_meta(
  206. 'description', webpage, fatal=False)
  207. page_count = str_to_int(self._search_regex(
  208. r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
  209. webpage, 'page_count', default=0))
  210. if not page_count:
  211. message = self._search_regex(
  212. r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
  213. webpage, 'error_msg', default=None) or 'This group has no videos.'
  214. self.report_warning(message, group_id)
  215. page_count = 1
  216. PAGE_SIZE = 80
  217. def _get_page(idx):
  218. if idx > 0:
  219. webpage = self._download_webpage(
  220. page_url, group_id, query={'page': idx + 1},
  221. note='Downloading page %d/%d' % (idx + 1, page_count)
  222. )
  223. for entry in self._extract_entries(webpage, url):
  224. yield entry
  225. playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
  226. return {
  227. '_type': 'playlist',
  228. 'id': group_id,
  229. 'title': title,
  230. 'description': description,
  231. 'entries': playlist
  232. }