commit: 7c2ecbc1cc34fcad5194448dbc6b06af200d065f
parent d562cac9dc67bfa2306c6225c261390162527d9e
Author: Sergey M․ <dstftw@gmail.com>
Date: Sun, 7 Apr 2019 21:05:50 +0700
[tiktok] Add support for new URL schema (closes #20573)
Diffstat:
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py
@@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?tiktok\.com/v|
+ (?:www\.)?tiktok\.com/share/video
+ )
+ /(?P<id>\d+)
+ '''
+ _TESTS = [{
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
'md5': 'd584b572e92fcd48888051f238022420',
'info_dict': {
@@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
'repost_count': int,
}
- }
+ }, {
+ 'url': 'https://www.tiktok.com/share/video/6606727368545406213',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ 'https://m.tiktok.com/v/%s.html' % video_id, video_id)
data = self._parse_json(self._search_regex(
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
return self._extract_aweme(data)
class TikTokUserIE(TikTokBaseIE):
- _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?tiktok\.com/h5/share/usr|
+ (?:www\.)?tiktok\.com/share/user
+ )
+ /(?P<id>\d+)
+ '''
+ _TESTS = [{
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'info_dict': {
'id': '188294915489964032',
},
'playlist_mincount': 24,
- }
+ }, {
+ 'url': 'https://www.tiktok.com/share/user/188294915489964032',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
user_id = self._match_id(url)