commit: fa8f6d858064cf07b9167b73647545b3007c6b21
parent 3bb7769c405e02fc1078252cafbbd982913fe50c
Author: Remita Amine <remitamine@gmail.com>
Date: Thu, 21 Jan 2021 23:53:09 +0100
[comedycentral] fix extraction(closes #27905)
Diffstat:
4 files changed, 38 insertions(+), 146 deletions(-)
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
@@ -1,142 +1,51 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
-from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
- /(?P<title>.*)'''
+ _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
- 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
- 'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+ 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+ 'md5': 'b8acb347177c680ff18a292aa2166f80',
'info_dict': {
- 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+ 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
'ext': 'mp4',
- 'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
- 'description': 'After a certain point, breastfeeding becomes c**kblocking.',
- 'timestamp': 1376798400,
- 'upload_date': '20130818',
+ 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+ 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+ 'timestamp': 1598670000,
+ 'upload_date': '20200829',
},
}, {
- 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+ 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
'only_matching': True,
- }]
-
-
-class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
- (?:full-episodes|shows(?=/[^/]+/full-episodes))
- /(?P<id>[^?]+)'''
- _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
-
- _TESTS = [{
- 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
- 'info_dict': {
- 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
- 'title': 'November 28, 2016 - Ryan Speedo Green',
- },
- 'playlist_count': 4,
}, {
- 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
- videos_info = self._get_videos_info(mgid)
- return videos_info
-
-
-class ToshIE(MTVServicesInfoExtractor):
- IE_DESC = 'Tosh.0'
- _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
- _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-
- _TESTS = [{
- 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
- 'info_dict': {
- 'description': 'Tosh asked fans to share their summer plans.',
- 'title': 'Twitter Users Share Summer Plans',
- },
- 'playlist': [{
- 'md5': 'f269e88114c1805bb6d7653fecea9e06',
- 'info_dict': {
- 'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
- 'ext': 'mp4',
- 'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
- 'description': 'Tosh asked fans to share their summer plans.',
- 'thumbnail': r're:^https?://.*\.jpg',
- # It's really reported to be published on year 2077
- 'upload_date': '20770610',
- 'timestamp': 3390510600,
- 'subtitles': {
- 'en': 'mincount:3',
- },
- },
- }]
- }, {
- 'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
+ 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
'only_matching': True,
}]
class ComedyCentralTVIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
_TESTS = [{
- 'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+ 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
'info_dict': {
- 'id': 'local_playlist-f99b626bdfe13568579a',
- 'ext': 'flv',
- 'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
+ 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Josh Investigates',
+ 'description': 'Steht uns das Ende der Welt bevor?',
},
- }, {
- 'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
- 'only_matching': True,
- }, {
- 'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
- 'only_matching': True,
}]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- mrss_url = self._search_regex(
- r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'mrss url', group='url')
-
- return self._get_videos_info_from_url(mrss_url, video_id)
-
-
-class ComedyCentralShortnameIE(InfoExtractor):
- _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
- _TESTS = [{
- 'url': ':tds',
- 'only_matching': True,
- }, {
- 'url': ':thedailyshow',
- 'only_matching': True,
- }, {
- 'url': ':theopposition',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- shortcut_map = {
- 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
- 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['DE']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'web.cc.tv',
+ 'ep': 'b9032c3a',
+ 'imageEp': 'web.cc.tv',
+ 'mgid': uri,
}
- return self.url_result(shortcut_map[video_id])
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -235,11 +235,8 @@ from .cnn import (
)
from .coub import CoubIE
from .comedycentral import (
- ComedyCentralFullEpisodesIE,
ComedyCentralIE,
- ComedyCentralShortnameIE,
ComedyCentralTVIE,
- ToshIE,
)
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
@@ -253,6 +253,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+ @staticmethod
+ def _extract_child_with_type(parent, t):
+ return next(c for c in parent['children'] if c.get('type') == t)
+
def _extract_mgid(self, webpage):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
@@ -278,6 +282,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
if not mgid:
mgid = self._extract_triforce_mgid(webpage)
+ if not mgid:
+ data = self._parse_json(self._search_regex(
+ r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+ main_container = self._extract_child_with_type(data, 'MainContainer')
+ video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+ mgid = video_player['props']['media']['video']['config']['uri']
+
return mgid
def _real_extract(self, url):
@@ -349,18 +360,6 @@ class MTVIE(MTVServicesInfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def extract_child_with_type(parent, t):
- children = parent['children']
- return next(c for c in children if c.get('type') == t)
-
- def _extract_mgid(self, webpage):
- data = self._parse_json(self._search_regex(
- r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
- main_container = self.extract_child_with_type(data, 'MainContainer')
- video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
- return video_player['props']['media']['video']['config']['uri']
-
class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvjapan'
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py
@@ -20,9 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
- def _extract_mgid(self, webpage):
- return self._extract_triforce_mgid(webpage)
-
class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@@ -46,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
def _get_feed_query(self, uri):
return {
'arcEp': 'paramountnetwork.com',
+ 'imageEp': 'paramountnetwork.com',
'mgid': uri,
}
-
- def _extract_mgid(self, webpage):
- root_data = self._parse_json(self._search_regex(
- r'window\.__DATA__\s*=\s*({.+})',
- webpage, 'data'), None)
-
- def find_sub_data(data, data_type):
- return next(c for c in data['children'] if c.get('type') == data_type)
-
- c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
- return c['props']['media']['video']['config']['uri']