logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

nba.py (16675B)


  1. from __future__ import unicode_literals
  2. import functools
  3. import re
  4. from .turner import TurnerBaseIE
  5. from ..compat import (
  6. compat_parse_qs,
  7. compat_str,
  8. compat_urllib_parse_unquote,
  9. compat_urllib_parse_urlparse,
  10. )
  11. from ..utils import (
  12. int_or_none,
  13. merge_dicts,
  14. OnDemandPagedList,
  15. parse_duration,
  16. parse_iso8601,
  17. try_get,
  18. update_url_query,
  19. urljoin,
  20. )
  21. class NBACVPBaseIE(TurnerBaseIE):
  22. def _extract_nba_cvp_info(self, path, video_id, fatal=False):
  23. return self._extract_cvp_info(
  24. 'http://secure.nba.com/%s' % path, video_id, {
  25. 'default': {
  26. 'media_src': 'http://nba.cdn.turner.com/nba/big',
  27. },
  28. 'm3u8': {
  29. 'media_src': 'http://nbavod-f.akamaihd.net',
  30. },
  31. }, fatal=fatal)
  32. class NBAWatchBaseIE(NBACVPBaseIE):
  33. _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
  34. def _extract_video(self, filter_key, filter_value):
  35. video = self._download_json(
  36. 'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
  37. filter_value, query={
  38. 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
  39. 'q': filter_key + ':' + filter_value,
  40. 'wt': 'json',
  41. })['response']['docs'][0]
  42. video_id = str(video['pid'])
  43. title = video['name']
  44. formats = []
  45. m3u8_url = (self._download_json(
  46. 'https://watch.nba.com/service/publishpoint', video_id, query={
  47. 'type': 'video',
  48. 'format': 'json',
  49. 'id': video_id,
  50. }, headers={
  51. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
  52. }, fatal=False) or {}).get('path')
  53. if m3u8_url:
  54. m3u8_formats = self._extract_m3u8_formats(
  55. re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
  56. 'm3u8_native', m3u8_id='hls', fatal=False)
  57. formats.extend(m3u8_formats)
  58. for f in m3u8_formats:
  59. http_f = f.copy()
  60. http_f.update({
  61. 'format_id': http_f['format_id'].replace('hls-', 'http-'),
  62. 'protocol': 'http',
  63. 'url': http_f['url'].replace('.m3u8', ''),
  64. })
  65. formats.append(http_f)
  66. info = {
  67. 'id': video_id,
  68. 'title': title,
  69. 'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
  70. 'description': video.get('description'),
  71. 'duration': int_or_none(video.get('runtime')),
  72. 'timestamp': parse_iso8601(video.get('releaseDate')),
  73. 'tags': video.get('tags'),
  74. }
  75. seo_name = video.get('seoName')
  76. if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
  77. base_path = ''
  78. if seo_name.startswith('teams/'):
  79. base_path += seo_name.split('/')[1] + '/'
  80. base_path += 'video/'
  81. cvp_info = self._extract_nba_cvp_info(
  82. base_path + seo_name + '.xml', video_id, False)
  83. if cvp_info:
  84. formats.extend(cvp_info['formats'])
  85. info = merge_dicts(info, cvp_info)
  86. self._sort_formats(formats)
  87. info['formats'] = formats
  88. return info
  89. class NBAWatchEmbedIE(NBAWatchBaseIE):
  90. IENAME = 'nba:watch:embed'
  91. _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
  92. _TESTS = [{
  93. 'url': 'http://watch.nba.com/embed?id=659395',
  94. 'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
  95. 'info_dict': {
  96. 'id': '659395',
  97. 'ext': 'mp4',
  98. 'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
  99. 'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
  100. 'timestamp': 1492228800,
  101. 'upload_date': '20170415',
  102. },
  103. }]
  104. def _real_extract(self, url):
  105. video_id = self._match_id(url)
  106. return self._extract_video('pid', video_id)
  107. class NBAWatchIE(NBAWatchBaseIE):
  108. IE_NAME = 'nba:watch'
  109. _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
  110. _TESTS = [{
  111. 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
  112. 'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
  113. 'info_dict': {
  114. 'id': '70946',
  115. 'ext': 'mp4',
  116. 'title': 'Thunder vs. Nets',
  117. 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
  118. 'duration': 181,
  119. 'timestamp': 1354597200,
  120. 'upload_date': '20121204',
  121. },
  122. }, {
  123. 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
  124. 'only_matching': True,
  125. }, {
  126. 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
  127. 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
  128. 'info_dict': {
  129. 'id': '330865',
  130. 'ext': 'mp4',
  131. 'title': 'Hawks vs. Cavaliers Game 1',
  132. 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
  133. 'duration': 228,
  134. 'timestamp': 1432094400,
  135. 'upload_date': '20150521',
  136. },
  137. }, {
  138. 'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
  139. 'only_matching': True,
  140. }, {
  141. # only CVP mp4 format available
  142. 'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
  143. 'only_matching': True,
  144. }, {
  145. 'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
  146. 'only_matching': True,
  147. }]
  148. def _real_extract(self, url):
  149. display_id = self._match_id(url)
  150. collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
  151. if collection_id:
  152. if self._downloader.params.get('noplaylist'):
  153. self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
  154. else:
  155. self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
  156. return self.url_result(
  157. 'https://www.nba.com/watch/list/collection/' + collection_id,
  158. NBAWatchCollectionIE.ie_key(), collection_id)
  159. return self._extract_video('seoName', display_id)
  160. class NBAWatchCollectionIE(NBAWatchBaseIE):
  161. IE_NAME = 'nba:watch:collection'
  162. _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
  163. _TESTS = [{
  164. 'url': 'https://watch.nba.com/list/collection/season-preview-2020',
  165. 'info_dict': {
  166. 'id': 'season-preview-2020',
  167. },
  168. 'playlist_mincount': 43,
  169. }]
  170. _PAGE_SIZE = 100
  171. def _fetch_page(self, collection_id, page):
  172. page += 1
  173. videos = self._download_json(
  174. 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
  175. collection_id, 'Downloading page %d JSON metadata' % page, query={
  176. 'count': self._PAGE_SIZE,
  177. 'page': page,
  178. })['results']['videos']
  179. for video in videos:
  180. program = video.get('program') or {}
  181. seo_name = program.get('seoName') or program.get('slug')
  182. if not seo_name:
  183. continue
  184. yield {
  185. '_type': 'url',
  186. 'id': program.get('id'),
  187. 'title': program.get('title') or video.get('title'),
  188. 'url': 'https://www.nba.com/watch/video/' + seo_name,
  189. 'thumbnail': video.get('image'),
  190. 'description': program.get('description') or video.get('description'),
  191. 'duration': parse_duration(program.get('runtimeHours')),
  192. 'timestamp': parse_iso8601(video.get('releaseDate')),
  193. }
  194. def _real_extract(self, url):
  195. collection_id = self._match_id(url)
  196. entries = OnDemandPagedList(
  197. functools.partial(self._fetch_page, collection_id),
  198. self._PAGE_SIZE)
  199. return self.playlist_result(entries, collection_id)
  200. class NBABaseIE(NBACVPBaseIE):
  201. _VALID_URL_BASE = r'''(?x)
  202. https?://(?:www\.)?nba\.com/
  203. (?P<team>
  204. blazers|
  205. bucks|
  206. bulls|
  207. cavaliers|
  208. celtics|
  209. clippers|
  210. grizzlies|
  211. hawks|
  212. heat|
  213. hornets|
  214. jazz|
  215. kings|
  216. knicks|
  217. lakers|
  218. magic|
  219. mavericks|
  220. nets|
  221. nuggets|
  222. pacers|
  223. pelicans|
  224. pistons|
  225. raptors|
  226. rockets|
  227. sixers|
  228. spurs|
  229. suns|
  230. thunder|
  231. timberwolves|
  232. warriors|
  233. wizards
  234. )
  235. (?:/play\#)?/'''
  236. _CHANNEL_PATH_REGEX = r'video/channel|series'
  237. def _embed_url_result(self, team, content_id):
  238. return self.url_result(update_url_query(
  239. 'https://secure.nba.com/assets/amp/include/video/iframe.html', {
  240. 'contentId': content_id,
  241. 'team': team,
  242. }), NBAEmbedIE.ie_key())
  243. def _call_api(self, team, content_id, query, resource):
  244. return self._download_json(
  245. 'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
  246. content_id, 'Download %s JSON metadata' % resource,
  247. query=query, headers={
  248. 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
  249. })['response']['result']
  250. def _extract_video(self, video, team, extract_all=True):
  251. video_id = compat_str(video['nid'])
  252. team = video['brand']
  253. info = {
  254. 'id': video_id,
  255. 'title': video.get('title') or video.get('headline') or video['shortHeadline'],
  256. 'description': video.get('description'),
  257. 'timestamp': parse_iso8601(video.get('published')),
  258. }
  259. subtitles = {}
  260. captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
  261. for caption_url in captions.values():
  262. subtitles.setdefault('en', []).append({'url': caption_url})
  263. formats = []
  264. mp4_url = video.get('mp4')
  265. if mp4_url:
  266. formats.append({
  267. 'url': mp4_url,
  268. })
  269. if extract_all:
  270. source_url = video.get('videoSource')
  271. if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
  272. formats.append({
  273. 'format_id': 'source',
  274. 'url': source_url,
  275. 'preference': 1,
  276. })
  277. m3u8_url = video.get('m3u8')
  278. if m3u8_url:
  279. if '.akamaihd.net/i/' in m3u8_url:
  280. formats.extend(self._extract_akamai_formats(
  281. m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
  282. else:
  283. formats.extend(self._extract_m3u8_formats(
  284. m3u8_url, video_id, 'mp4',
  285. 'm3u8_native', m3u8_id='hls', fatal=False))
  286. content_xml = video.get('contentXml')
  287. if team and content_xml:
  288. cvp_info = self._extract_nba_cvp_info(
  289. team + content_xml, video_id, fatal=False)
  290. if cvp_info:
  291. formats.extend(cvp_info['formats'])
  292. subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
  293. info = merge_dicts(info, cvp_info)
  294. self._sort_formats(formats)
  295. else:
  296. info.update(self._embed_url_result(team, video['videoId']))
  297. info.update({
  298. 'formats': formats,
  299. 'subtitles': subtitles,
  300. })
  301. return info
  302. def _real_extract(self, url):
  303. team, display_id = re.match(self._VALID_URL, url).groups()
  304. if '/play#/' in url:
  305. display_id = compat_urllib_parse_unquote(display_id)
  306. else:
  307. webpage = self._download_webpage(url, display_id)
  308. display_id = self._search_regex(
  309. self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
  310. return self._extract_url_results(team, display_id)
  311. class NBAEmbedIE(NBABaseIE):
  312. IENAME = 'nba:embed'
  313. _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
  314. _TESTS = [{
  315. 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
  316. 'only_matching': True,
  317. }, {
  318. 'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
  319. 'only_matching': True,
  320. }]
  321. def _real_extract(self, url):
  322. qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
  323. content_id = qs['contentId'][0]
  324. team = qs.get('team', [None])[0]
  325. if not team:
  326. return self.url_result(
  327. 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
  328. video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
  329. return self._extract_video(video, team)
  330. class NBAIE(NBABaseIE):
  331. IENAME = 'nba'
  332. _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
  333. _TESTS = [{
  334. 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
  335. 'info_dict': {
  336. 'id': '45039',
  337. 'ext': 'mp4',
  338. 'title': 'AND WE BACK.',
  339. 'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
  340. 'duration': 94,
  341. 'timestamp': 1607112000,
  342. 'upload_date': '20201218',
  343. },
  344. }, {
  345. 'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
  346. 'only_matching': True,
  347. }, {
  348. 'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
  349. 'only_matching': True,
  350. }]
  351. _CONTENT_ID_REGEX = r'videoID'
  352. def _extract_url_results(self, team, content_id):
  353. return self._embed_url_result(team, content_id)
  354. class NBAChannelIE(NBABaseIE):
  355. IENAME = 'nba:channel'
  356. _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
  357. _TESTS = [{
  358. 'url': 'https://www.nba.com/blazers/video/channel/summer_league',
  359. 'info_dict': {
  360. 'title': 'Summer League',
  361. },
  362. 'playlist_mincount': 138,
  363. }, {
  364. 'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
  365. 'only_matching': True,
  366. }]
  367. _CONTENT_ID_REGEX = r'videoSubCategory'
  368. _PAGE_SIZE = 100
  369. def _fetch_page(self, team, channel, page):
  370. results = self._call_api(team, channel, {
  371. 'channels': channel,
  372. 'count': self._PAGE_SIZE,
  373. 'offset': page * self._PAGE_SIZE,
  374. }, 'page %d' % (page + 1))
  375. for video in results:
  376. yield self._extract_video(video, team, False)
  377. def _extract_url_results(self, team, content_id):
  378. entries = OnDemandPagedList(
  379. functools.partial(self._fetch_page, team, content_id),
  380. self._PAGE_SIZE)
  381. return self.playlist_result(entries, playlist_title=content_id)