logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

youporn.py (27267B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. import re
  5. from time import sleep
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. clean_html,
  9. extract_attributes,
  10. ExtractorError,
  11. get_element_by_class,
  12. get_element_by_id,
  13. int_or_none,
  14. merge_dicts,
  15. parse_count,
  16. parse_qs,
  17. T,
  18. traverse_obj,
  19. unified_strdate,
  20. url_or_none,
  21. urljoin,
  22. )
  23. class YouPornIE(InfoExtractor):
  24. _VALID_URL = (
  25. r'youporn:(?P<id>\d+)',
  26. r'''(?x)
  27. https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
  28. (?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
  29. '''
  30. )
  31. _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
  32. _TESTS = [{
  33. 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
  34. 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
  35. 'info_dict': {
  36. 'id': '505835',
  37. 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
  38. 'ext': 'mp4',
  39. 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
  40. 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
  41. 'thumbnail': r're:^https?://.*\.jpg$',
  42. 'duration': 210,
  43. 'uploader': 'Ask Dan And Jennifer',
  44. 'upload_date': '20101217',
  45. 'average_rating': int,
  46. 'view_count': int,
  47. 'categories': list,
  48. 'tags': list,
  49. 'age_limit': 18,
  50. },
  51. 'skip': 'This video has been deactivated',
  52. }, {
  53. # Unknown uploader
  54. 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
  55. 'info_dict': {
  56. 'id': '561726',
  57. 'display_id': 'big-tits-awesome-brunette-on-amazing-webcam-show',
  58. 'ext': 'mp4',
  59. 'title': 'Big Tits Awesome Brunette On amazing webcam show',
  60. 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
  61. 'thumbnail': r're:^https?://.*\.jpg$',
  62. 'uploader': 'Unknown',
  63. 'upload_date': '20110418',
  64. 'average_rating': int,
  65. 'view_count': int,
  66. 'categories': list,
  67. 'tags': list,
  68. 'age_limit': 18,
  69. },
  70. 'params': {
  71. 'skip_download': True,
  72. },
  73. 'skip': '404',
  74. }, {
  75. 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
  76. 'only_matching': True,
  77. }, {
  78. 'url': 'http://www.youporn.com/watch/505835',
  79. 'only_matching': True,
  80. }, {
  81. 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
  82. 'only_matching': True,
  83. }, {
  84. 'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
  85. 'info_dict': {
  86. 'id': '16290308',
  87. 'age_limit': 18,
  88. 'categories': [],
  89. 'description': None, # SEO spam using title removed
  90. 'display_id': 'tinderspecial-trailer1',
  91. 'duration': 298.0,
  92. 'ext': 'mp4',
  93. 'upload_date': '20201123',
  94. 'uploader': 'Ersties',
  95. 'tags': [],
  96. 'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
  97. 'timestamp': 1606147564,
  98. 'title': 'Tinder In Real Life',
  99. 'view_count': int,
  100. }
  101. }]
  102. @classmethod
  103. def _extract_urls(cls, webpage):
  104. def yield_urls():
  105. for p in cls._EMBED_REGEX:
  106. for m in re.finditer(p, webpage):
  107. yield m.group('url')
  108. return list(yield_urls())
  109. def _real_extract(self, url):
  110. # A different video ID (data-video-id) is hidden in the page but
  111. # never seems to be used
  112. video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
  113. url = 'http://www.youporn.com/watch/%s' % (video_id,)
  114. webpage = self._download_webpage(
  115. url, video_id, headers={'Cookie': 'age_verified=1'})
  116. watchable = self._search_regex(
  117. r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
  118. webpage, 'watchability', default=None)
  119. if not watchable:
  120. msg = re.split(r'\s{4}', clean_html(get_element_by_id(
  121. 'mainContent', webpage)) or '')[0]
  122. raise ExtractorError(
  123. ('%s says: %s' % (self.IE_NAME, msg))
  124. if msg else 'Video unavailable: no reason found',
  125. expected=True)
  126. # internal ID ?
  127. # video_id = extract_attributes(watchable).get('data-video-id')
  128. playervars = self._search_json(
  129. r'\bplayervars\s*:', webpage, 'playervars', video_id)
  130. def get_fmt(x):
  131. v_url = url_or_none(x.get('videoUrl'))
  132. if v_url:
  133. x['videoUrl'] = v_url
  134. return (x['format'], x)
  135. defs_by_format = dict(traverse_obj(playervars, (
  136. 'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
  137. def get_format_data(f):
  138. if f not in defs_by_format:
  139. return []
  140. return self._download_json(
  141. defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
  142. formats = []
  143. # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
  144. for hls_url in traverse_obj(
  145. get_format_data('hls'),
  146. (lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
  147. (Ellipsis, 'videoUrl')):
  148. formats.extend(self._extract_m3u8_formats(
  149. hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
  150. entry_protocol='m3u8_native'))
  151. for f in traverse_obj(get_format_data('mp4'), (
  152. lambda _, v: v.get('videoUrl'), {
  153. 'url': ('videoUrl', T(url_or_none)),
  154. 'filesize': ('videoSize', T(int_or_none)),
  155. 'height': ('quality', T(int_or_none)),
  156. }, T(lambda x: x.get('videoUrl') and x))):
  157. # Video URL's path looks like this:
  158. # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
  159. # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
  160. # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
  161. # We will benefit from it by extracting some metadata
  162. mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
  163. if mobj:
  164. if not f.get('height'):
  165. f['height'] = int(mobj.group('height'))
  166. f['tbr'] = int(mobj.group('bitrate'))
  167. f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
  168. formats.append(f)
  169. self._sort_formats(formats)
  170. title = self._html_search_regex(
  171. r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
  172. webpage, 'title', default=None) or self._og_search_title(
  173. webpage, default=None) or self._html_search_meta(
  174. 'title', webpage, fatal=True)
  175. description = self._html_search_regex(
  176. r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
  177. webpage, 'description',
  178. default=None) or self._og_search_description(
  179. webpage, default=None)
  180. thumbnail = self._search_regex(
  181. r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
  182. webpage, 'thumbnail', fatal=False, group='thumbnail')
  183. duration = traverse_obj(playervars, ('duration', T(int_or_none)))
  184. if duration is None:
  185. duration = int_or_none(self._html_search_meta(
  186. 'video:duration', webpage, 'duration', fatal=False))
  187. uploader = self._html_search_regex(
  188. r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
  189. webpage, 'uploader', fatal=False)
  190. upload_date = unified_strdate(self._html_search_regex(
  191. (r'UPLOADED:\s*<span>([^<]+)',
  192. r'Date\s+[Aa]dded:\s*<span>([^<]+)',
  193. r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
  194. r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
  195. webpage, 'upload date', fatal=False))
  196. age_limit = self._rta_search(webpage)
  197. view_count = None
  198. views = self._search_regex(
  199. r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
  200. webpage, 'views', default=None)
  201. if views:
  202. view_count = parse_count(extract_attributes(views).get('data-value'))
  203. comment_count = parse_count(self._search_regex(
  204. r'>All [Cc]omments? \(([\d,.]+)\)',
  205. webpage, 'comment count', default=None))
  206. def extract_tag_box(regex, title):
  207. tag_box = self._search_regex(regex, webpage, title, default=None)
  208. if not tag_box:
  209. return []
  210. return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
  211. categories = extract_tag_box(
  212. r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
  213. tags = extract_tag_box(
  214. r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
  215. 'tags')
  216. data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
  217. data.pop('url', None)
  218. result = merge_dicts(data, {
  219. 'id': video_id,
  220. 'display_id': display_id,
  221. 'title': title,
  222. 'description': description,
  223. 'thumbnail': thumbnail,
  224. 'duration': duration,
  225. 'uploader': uploader,
  226. 'upload_date': upload_date,
  227. 'view_count': view_count,
  228. 'comment_count': comment_count,
  229. 'categories': categories,
  230. 'tags': tags,
  231. 'age_limit': age_limit,
  232. 'formats': formats,
  233. })
  234. # Remove promotional non-description
  235. if result.get('description', '').startswith(
  236. 'Watch %s online' % (result['title'],)):
  237. del result['description']
  238. return result
  239. class YouPornListBase(InfoExtractor):
  240. # pattern in '.title-text' element of page section containing videos
  241. _PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
  242. _PAGE_RETRY_COUNT = 0 # ie, no retry
  243. _PAGE_RETRY_DELAY = 2 # seconds
  244. def _get_next_url(self, url, pl_id, html):
  245. return urljoin(url, self._search_regex(
  246. r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
  247. get_element_by_id('next', html) or '', 'next page',
  248. group='url', default=None))
  249. @classmethod
  250. def _get_title_from_slug(cls, title_slug):
  251. return re.sub(r'[_-]', ' ', title_slug)
  252. def _entries(self, url, pl_id, html=None, page_num=None):
  253. # separates page sections
  254. PLAYLIST_SECTION_RE = (
  255. r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
  256. )
  257. # contains video link
  258. VIDEO_URL_RE = r'''(?x)
  259. <div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
  260. (?:<div\b[\s\S]+?</div>\s*)*
  261. <a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
  262. '''
  263. def yield_pages(url, html=html, page_num=page_num):
  264. fatal = not html
  265. for pnum in itertools.count(start=page_num or 1):
  266. if not html:
  267. html = self._download_webpage(
  268. url, pl_id, note='Downloading page %d' % pnum,
  269. fatal=fatal)
  270. if not html:
  271. break
  272. fatal = False
  273. yield (url, html, pnum)
  274. # explicit page: extract just that page
  275. if page_num is not None:
  276. break
  277. next_url = self._get_next_url(url, pl_id, html)
  278. if not next_url or next_url == url:
  279. break
  280. url, html = next_url, None
  281. def retry_page(msg, tries_left, page_data):
  282. if tries_left <= 0:
  283. return
  284. self.report_warning(msg, pl_id)
  285. sleep(self._PAGE_RETRY_DELAY)
  286. return next(
  287. yield_pages(page_data[0], page_num=page_data[2]), None)
  288. def yield_entries(html):
  289. for frag in re.split(PLAYLIST_SECTION_RE, html):
  290. if not frag:
  291. continue
  292. t_text = get_element_by_class('title-text', frag or '')
  293. if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
  294. continue
  295. for m in re.finditer(VIDEO_URL_RE, frag):
  296. video_url = urljoin(url, m.group('url'))
  297. if video_url:
  298. yield self.url_result(video_url)
  299. last_first_url = None
  300. for page_data in yield_pages(url, html=html, page_num=page_num):
  301. # page_data: url, html, page_num
  302. first_url = None
  303. tries_left = self._PAGE_RETRY_COUNT + 1
  304. while tries_left > 0:
  305. tries_left -= 1
  306. for from_ in yield_entries(page_data[1]):
  307. # may get the same page twice instead of empty page
  308. # or (site bug) intead of actual next page
  309. if not first_url:
  310. first_url = from_['url']
  311. if first_url == last_first_url:
  312. # sometimes (/porntags/) the site serves the previous page
  313. # instead but may provide the correct page after a delay
  314. page_data = retry_page(
  315. 'Retrying duplicate page...', tries_left, page_data)
  316. if page_data:
  317. first_url = None
  318. break
  319. continue
  320. yield from_
  321. else:
  322. if not first_url and 'no-result-paragarph1' in page_data[1]:
  323. page_data = retry_page(
  324. 'Retrying empty page...', tries_left, page_data)
  325. if page_data:
  326. continue
  327. else:
  328. # success/failure
  329. break
  330. # may get an infinite (?) sequence of empty pages
  331. if not first_url:
  332. break
  333. last_first_url = first_url
  334. def _real_extract(self, url, html=None):
  335. # exceptionally, id may be None
  336. m_dict = self._match_valid_url(url).groupdict()
  337. pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
  338. qs = parse_qs(url)
  339. for q, v in qs.items():
  340. if v:
  341. qs[q] = v[-1]
  342. else:
  343. del qs[q]
  344. base_id = pl_id or 'YouPorn'
  345. title = self._get_title_from_slug(base_id)
  346. if page_type:
  347. title = '%s %s' % (page_type.capitalize(), title)
  348. base_id = [base_id.lower()]
  349. if sort is None:
  350. title += ' videos'
  351. else:
  352. title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
  353. base_id.append(sort)
  354. if qs:
  355. ps = ['%s=%s' % item for item in sorted(qs.items())]
  356. title += ' (%s)' % ','.join(ps)
  357. base_id.extend(ps)
  358. pl_id = '/'.join(base_id)
  359. return self.playlist_result(
  360. self._entries(url, pl_id, html=html,
  361. page_num=int_or_none(qs.get('page'))),
  362. playlist_id=pl_id, playlist_title=title)
  363. class YouPornCategoryIE(YouPornListBase):
  364. IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
  365. _VALID_URL = r'''(?x)
  366. https?://(?:www\.)?youporn\.com/
  367. (?P<type>category)/(?P<id>[^/?#&]+)
  368. (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
  369. '''
  370. _TESTS = [{
  371. 'note': 'Full list with pagination',
  372. 'url': 'https://www.youporn.com/category/lingerie/popular/',
  373. 'info_dict': {
  374. 'id': 'lingerie/popular',
  375. 'title': 'Category lingerie videos by popular',
  376. },
  377. 'playlist_mincount': 39,
  378. }, {
  379. 'note': 'Filtered paginated list with single page result',
  380. 'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
  381. 'info_dict': {
  382. 'id': 'lingerie/duration/min_minutes=10',
  383. 'title': 'Category lingerie videos by duration (min_minutes=10)',
  384. },
  385. 'playlist_maxcount': 30,
  386. }, {
  387. 'note': 'Single page of full list',
  388. 'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
  389. 'info_dict': {
  390. 'id': 'lingerie/popular/page=1',
  391. 'title': 'Category lingerie videos by popular (page=1)',
  392. },
  393. 'playlist_count': 30,
  394. }]
  395. class YouPornChannelIE(YouPornListBase):
  396. IE_DESC = 'YouPorn channel, with sorting and pagination'
  397. _VALID_URL = r'''(?x)
  398. https?://(?:www\.)?youporn\.com/
  399. (?P<type>channel)/(?P<id>[^/?#&]+)
  400. (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
  401. '''
  402. _TESTS = [{
  403. 'note': 'Full list with pagination',
  404. 'url': 'https://www.youporn.com/channel/x-feeds/',
  405. 'info_dict': {
  406. 'id': 'x-feeds',
  407. 'title': 'Channel X-Feeds videos',
  408. },
  409. 'playlist_mincount': 37,
  410. }, {
  411. 'note': 'Single page of full list (no filters here)',
  412. 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
  413. 'info_dict': {
  414. 'id': 'x-feeds/duration/page=1',
  415. 'title': 'Channel X-Feeds videos by duration (page=1)',
  416. },
  417. 'playlist_count': 24,
  418. }]
  419. @staticmethod
  420. def _get_title_from_slug(title_slug):
  421. return re.sub(r'_', ' ', title_slug).title()
  422. class YouPornCollectionIE(YouPornListBase):
  423. IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
  424. _VALID_URL = r'''(?x)
  425. https?://(?:www\.)?youporn\.com/
  426. (?P<type>collection)s/videos/(?P<id>\d+)
  427. (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
  428. '''
  429. _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
  430. _TESTS = [{
  431. 'note': 'Full list with pagination',
  432. 'url': 'https://www.youporn.com/collections/videos/33044251/',
  433. 'info_dict': {
  434. 'id': '33044251',
  435. 'title': 'Collection Sexy Lips videos',
  436. 'uploader': 'ph-littlewillyb',
  437. },
  438. 'playlist_mincount': 50,
  439. }, {
  440. 'note': 'Single page of full list (no filters here)',
  441. 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
  442. 'info_dict': {
  443. 'id': '33044251/time/page=1',
  444. 'title': 'Collection Sexy Lips videos by time (page=1)',
  445. 'uploader': 'ph-littlewillyb',
  446. },
  447. 'playlist_count': 20,
  448. }]
  449. def _real_extract(self, url):
  450. pl_id = self._match_id(url)
  451. html = self._download_webpage(url, pl_id)
  452. playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
  453. infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
  454. 'collection-infos', html)) or '')
  455. title, uploader = self._search_regex(
  456. r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
  457. infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
  458. return merge_dicts({
  459. 'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
  460. 'uploader': uploader,
  461. }, playlist) if title else playlist
  462. class YouPornTagIE(YouPornListBase):
  463. IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
  464. _VALID_URL = r'''(?x)
  465. https?://(?:www\.)?youporn\.com/
  466. porn(?P<type>tag)s/(?P<id>[^/?#&]+)
  467. (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
  468. '''
  469. _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
  470. _PAGE_RETRY_COUNT = 1
  471. _TESTS = [{
  472. 'note': 'Full list with pagination',
  473. 'url': 'https://www.youporn.com/porntags/austrian',
  474. 'info_dict': {
  475. 'id': 'austrian',
  476. 'title': 'Tag austrian videos',
  477. },
  478. 'playlist_mincount': 35,
  479. 'expected_warnings': ['Retrying duplicate page'],
  480. }, {
  481. 'note': 'Filtered paginated list with single page result',
  482. 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
  483. 'info_dict': {
  484. 'id': 'austrian/duration/min_minutes=10',
  485. 'title': 'Tag austrian videos by duration (min_minutes=10)',
  486. },
  487. # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
  488. # or more, varying with number of ads; let's set max as 9x4
  489. # NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
  490. 'playlist_maxcount': 32,
  491. 'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
  492. }, {
  493. 'note': 'Single page of full list',
  494. 'url': 'https://www.youporn.com/porntags/austrian/?page=1',
  495. 'info_dict': {
  496. 'id': 'austrian/page=1',
  497. 'title': 'Tag austrian videos (page=1)',
  498. },
  499. 'playlist_mincount': 32,
  500. 'playlist_maxcount': 34,
  501. 'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
  502. }]
  503. # YP tag navigation is broken, loses sort
  504. def _get_next_url(self, url, pl_id, html):
  505. next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
  506. if next_url:
  507. n = self._match_valid_url(next_url)
  508. if n:
  509. s = n.groupdict().get('sort')
  510. if s:
  511. u = self._match_valid_url(url)
  512. if u:
  513. u = u.groupdict().get('sort')
  514. if s and not u:
  515. n = n.end('sort')
  516. next_url = next_url[:n] + '/' + u + next_url[n:]
  517. return next_url
  518. class YouPornStarIE(YouPornListBase):
  519. IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
  520. _VALID_URL = r'''(?x)
  521. https?://(?:www\.)?youporn\.com/
  522. (?P<type>pornstar)/(?P<id>[^/?#&]+)
  523. (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
  524. '''
  525. _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
  526. _TESTS = [{
  527. 'note': 'Full list with pagination',
  528. 'url': 'https://www.youporn.com/pornstar/daynia/',
  529. 'info_dict': {
  530. 'id': 'daynia',
  531. 'title': 'Pornstar Daynia videos',
  532. 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
  533. },
  534. 'playlist_mincount': 45,
  535. }, {
  536. 'note': 'Single page of full list (no filters here)',
  537. 'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
  538. 'info_dict': {
  539. 'id': 'daynia/page=1',
  540. 'title': 'Pornstar Daynia videos (page=1)',
  541. 'description': 're:.{180,}',
  542. },
  543. 'playlist_count': 26,
  544. }]
  545. @staticmethod
  546. def _get_title_from_slug(title_slug):
  547. return re.sub(r'_', ' ', title_slug).title()
  548. def _real_extract(self, url):
  549. pl_id = self._match_id(url)
  550. html = self._download_webpage(url, pl_id)
  551. playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
  552. INFO_ELEMENT_RE = r'''(?x)
  553. <div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
  554. (?P<info>[\s\S]+?)(?:</div>\s*){6,}
  555. '''
  556. infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
  557. if infos:
  558. infos = re.sub(
  559. r'(?:\s*nl=nl)+\s*', ' ',
  560. re.sub(r'(?u)\s+', ' ', clean_html(
  561. re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
  562. return merge_dicts({
  563. 'description': infos.strip() or None,
  564. }, playlist)
  565. class YouPornVideosIE(YouPornListBase):
  566. IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
  567. _VALID_URL = r'''(?x)
  568. https?://(?:www\.)?youporn\.com/
  569. (?:(?P<id>browse)/)?
  570. (?P<sort>(?(id)
  571. (?:duration|rating|time|views)|
  572. (?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
  573. (?:[/#?]|$)
  574. '''
  575. _PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
  576. _TESTS = [{
  577. 'note': 'Full list with pagination (too long for test)',
  578. 'url': 'https://www.youporn.com/',
  579. 'info_dict': {
  580. 'id': 'youporn',
  581. 'title': 'YouPorn videos',
  582. },
  583. 'only_matching': True,
  584. }, {
  585. 'note': 'Full list with pagination (too long for test)',
  586. 'url': 'https://www.youporn.com/recommended',
  587. 'info_dict': {
  588. 'id': 'youporn/recommended',
  589. 'title': 'YouPorn videos by recommended',
  590. },
  591. 'only_matching': True,
  592. }, {
  593. 'note': 'Full list with pagination (too long for test)',
  594. 'url': 'https://www.youporn.com/top_rated',
  595. 'info_dict': {
  596. 'id': 'youporn/top_rated',
  597. 'title': 'YouPorn videos by top rated',
  598. },
  599. 'only_matching': True,
  600. }, {
  601. 'note': 'Full list with pagination (too long for test)',
  602. 'url': 'https://www.youporn.com/browse/time',
  603. 'info_dict': {
  604. 'id': 'browse/time',
  605. 'title': 'YouPorn videos by time',
  606. },
  607. 'only_matching': True,
  608. }, {
  609. 'note': 'Filtered paginated list with single page result',
  610. 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
  611. 'info_dict': {
  612. 'id': 'youporn/most_favorited/max_minutes=2/res=VR',
  613. 'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
  614. },
  615. 'playlist_mincount': 10,
  616. 'playlist_maxcount': 28,
  617. }, {
  618. 'note': 'Filtered paginated list with several pages',
  619. 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
  620. 'info_dict': {
  621. 'id': 'youporn/most_favorited/max_minutes=5/res=VR',
  622. 'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
  623. },
  624. 'playlist_mincount': 45,
  625. }, {
  626. 'note': 'Single page of full list',
  627. 'url': 'https://www.youporn.com/browse/time?page=1',
  628. 'info_dict': {
  629. 'id': 'browse/time/page=1',
  630. 'title': 'YouPorn videos by time (page=1)',
  631. },
  632. 'playlist_count': 36,
  633. }]
  634. @staticmethod
  635. def _get_title_from_slug(title_slug):
  636. return 'YouPorn' if title_slug == 'browse' else title_slug