commit: 46cc54ca8f13c7b823c1a12446cdd76d060c74b6
parent 1e1c1960aa154a6e257e83e94e86ee6dc8b0b362
Author: Sergey M․ <dstftw@gmail.com>
Date: Tue, 3 Mar 2020 06:23:39 +0700
[pornhub] Improve title extraction (closes #24184)
Diffstat:
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
@@ -189,10 +189,10 @@ class PornHubIE(PornHubBaseIE):
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
title = self._html_search_meta(
- 'twitter:title', webpage, default=None) or self._search_regex(
- (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
- r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
- r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
+ 'twitter:title', webpage, default=None) or self._html_search_regex(
+ (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+ r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title')
video_urls = []