logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

brightcove.py (42590B)


  1. import base64
  2. import re
  3. import struct
  4. import urllib.parse
  5. import xml.etree.ElementTree
  6. from .adobepass import AdobePassIE
  7. from .common import InfoExtractor
  8. from ..compat import compat_etree_fromstring
  9. from ..networking.exceptions import HTTPError
  10. from ..utils import (
  11. ExtractorError,
  12. UnsupportedError,
  13. clean_html,
  14. dict_get,
  15. extract_attributes,
  16. find_xpath_attr,
  17. fix_xml_ampersands,
  18. float_or_none,
  19. int_or_none,
  20. join_nonempty,
  21. js_to_json,
  22. mimetype2ext,
  23. parse_iso8601,
  24. parse_qs,
  25. smuggle_url,
  26. str_or_none,
  27. try_get,
  28. unescapeHTML,
  29. unsmuggle_url,
  30. update_url_query,
  31. url_or_none,
  32. )
  33. from ..utils.traversal import traverse_obj
  34. class BrightcoveLegacyIE(InfoExtractor):
  35. IE_NAME = 'brightcove:legacy'
  36. _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
  37. _TESTS = [
  38. {
  39. # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
  40. 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
  41. 'md5': '5423e113865d26e40624dce2e4b45d95',
  42. 'note': 'Test Brightcove downloads and detection in GenericIE',
  43. 'info_dict': {
  44. 'id': '2371591881001',
  45. 'ext': 'mp4',
  46. 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
  47. 'uploader': '8TV',
  48. 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
  49. 'timestamp': 1368213670,
  50. 'upload_date': '20130510',
  51. 'uploader_id': '1589608506001',
  52. },
  53. 'skip': 'The player has been deactivated by the content owner',
  54. },
  55. {
  56. # From http://medianetwork.oracle.com/video/player/1785452137001
  57. 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
  58. 'info_dict': {
  59. 'id': '1785452137001',
  60. 'ext': 'flv',
  61. 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
  62. 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
  63. 'uploader': 'Oracle',
  64. 'timestamp': 1344975024,
  65. 'upload_date': '20120814',
  66. 'uploader_id': '1460825906',
  67. },
  68. 'skip': 'video not playable',
  69. },
  70. {
  71. # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
  72. 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
  73. 'info_dict': {
  74. 'id': '2750934548001',
  75. 'ext': 'mp4',
  76. 'title': 'This Bracelet Acts as a Personal Thermostat',
  77. 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
  78. # 'uploader': 'Mashable',
  79. 'timestamp': 1382041798,
  80. 'upload_date': '20131017',
  81. 'uploader_id': '1130468786001',
  82. },
  83. },
  84. {
  85. # test that the default referer works
  86. # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
  87. 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
  88. 'info_dict': {
  89. 'id': '2878862109001',
  90. 'ext': 'mp4',
  91. 'title': 'Lost in Motion II',
  92. 'description': 'md5:363109c02998fee92ec02211bd8000df',
  93. 'uploader': 'National Ballet of Canada',
  94. },
  95. 'skip': 'Video gone',
  96. },
  97. {
  98. # test flv videos served by akamaihd.net
  99. # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
  100. 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
  101. # The md5 checksum changes on each download
  102. 'info_dict': {
  103. 'id': '3750436379001',
  104. 'ext': 'flv',
  105. 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
  106. 'uploader': 'RBTV Old (do not use)',
  107. 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
  108. 'timestamp': 1409122195,
  109. 'upload_date': '20140827',
  110. 'uploader_id': '710858724001',
  111. },
  112. 'skip': 'Video gone',
  113. },
  114. {
  115. # playlist with 'videoList'
  116. # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
  117. 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
  118. 'info_dict': {
  119. 'title': 'Sealife',
  120. 'id': '3550319591001',
  121. },
  122. 'playlist_mincount': 7,
  123. 'skip': 'Unsupported URL',
  124. },
  125. {
  126. # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
  127. 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
  128. 'info_dict': {
  129. 'id': '1522758701001',
  130. 'title': 'Lesson 08',
  131. },
  132. 'playlist_mincount': 10,
  133. 'skip': 'Unsupported URL',
  134. },
  135. {
  136. # playerID inferred from bcpid
  137. # from http://www.un.org/chinese/News/story.asp?NewsID=27724
  138. 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
  139. 'only_matching': True, # Tested in GenericIE
  140. },
  141. ]
  142. _WEBPAGE_TESTS = [{
  143. # embedded brightcove video
  144. # it also tests brightcove videos that need to set the 'Referer'
  145. # in the http requests
  146. 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  147. 'info_dict': {
  148. 'id': '2765128793001',
  149. 'ext': 'mp4',
  150. 'title': 'Le cours de bourse : l’analyse technique',
  151. 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  152. 'uploader': 'BFM BUSINESS',
  153. },
  154. 'params': {
  155. 'skip_download': True,
  156. },
  157. 'skip': '404 Not Found',
  158. }, {
  159. # embedded with itemprop embedURL and video id spelled as `idVideo`
  160. 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
  161. 'info_dict': {
  162. 'id': '5255628253001',
  163. 'ext': 'mp4',
  164. 'title': 'md5:37c519b1128915607601e75a87995fc0',
  165. 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
  166. 'uploader': 'BFM BUSINESS',
  167. 'uploader_id': '876450612001',
  168. 'timestamp': 1482255315,
  169. 'upload_date': '20161220',
  170. },
  171. 'params': {
  172. 'skip_download': True,
  173. },
  174. 'skip': 'Redirects, page gone',
  175. }, {
  176. # https://github.com/ytdl-org/youtube-dl/issues/2253
  177. 'url': 'http://bcove.me/i6nfkrc3',
  178. 'md5': '0ba9446db037002366bab3b3eb30c88c',
  179. 'info_dict': {
  180. 'id': '3101154703001',
  181. 'ext': 'mp4',
  182. 'title': 'Still no power',
  183. 'uploader': 'thestar.com',
  184. 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  185. },
  186. 'skip': 'video gone',
  187. }, {
  188. # https://github.com/ytdl-org/youtube-dl/issues/3541
  189. 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
  190. 'info_dict': {
  191. 'id': '3866516442001',
  192. 'ext': 'mp4',
  193. 'title': 'Leer mij vrouwen kennen: Aflevering 1',
  194. 'description': 'Leer mij vrouwen kennen: Aflevering 1',
  195. 'uploader': 'SBS Broadcasting',
  196. },
  197. 'skip': 'Restricted to Netherlands, 404 Not Found',
  198. 'params': {
  199. 'skip_download': True, # m3u8 download
  200. },
  201. }, {
  202. # Brightcove video in <iframe>
  203. 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
  204. 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
  205. 'info_dict': {
  206. 'id': '5360463607001',
  207. 'ext': 'mp4',
  208. 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
  209. 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
  210. 'uploader': 'United Nations',
  211. 'uploader_id': '1362235914001',
  212. 'timestamp': 1489593889,
  213. 'upload_date': '20170315',
  214. },
  215. 'skip': '404 Not Found',
  216. }, {
  217. # Brightcove with UUID in videoPlayer
  218. 'url': 'http://www8.hp.com/cn/zh/home.html',
  219. 'info_dict': {
  220. 'id': '5255815316001',
  221. 'ext': 'mp4',
  222. 'title': 'Sprocket Video - China',
  223. 'description': 'Sprocket Video - China',
  224. 'uploader': 'HP-Video Gallery',
  225. 'timestamp': 1482263210,
  226. 'upload_date': '20161220',
  227. 'uploader_id': '1107601872001',
  228. },
  229. 'params': {
  230. 'skip_download': True, # m3u8 download
  231. },
  232. 'skip': 'video rotates...weekly?',
  233. }, {
  234. # Multiple brightcove videos
  235. # https://github.com/ytdl-org/youtube-dl/issues/2283
  236. 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
  237. 'info_dict': {
  238. 'id': 'always-never',
  239. 'title': 'Always / Never - The New Yorker',
  240. },
  241. 'playlist_count': 3,
  242. 'params': {
  243. 'extract_flat': False,
  244. 'skip_download': True,
  245. },
  246. 'skip': 'Redirects, page gone',
  247. }, {
  248. # BrightcoveInPageEmbed embed
  249. 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
  250. 'info_dict': {
  251. 'id': '4238694884001',
  252. 'ext': 'flv',
  253. 'title': 'Tabletop: Dread, Last Thoughts',
  254. 'description': 'Tabletop: Dread, Last Thoughts',
  255. 'duration': 51690,
  256. },
  257. 'skip': 'Redirects, page gone',
  258. }, {
  259. # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
  260. # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
  261. 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
  262. 'info_dict': {
  263. 'id': '4785848093001',
  264. 'ext': 'mp4',
  265. 'title': 'The Cardinal Pell Interview',
  266. 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
  267. 'uploader': 'GlobeCast Australia - GlobeStream',
  268. 'uploader_id': '2733773828001',
  269. 'upload_date': '20160304',
  270. 'timestamp': 1457083087,
  271. },
  272. 'params': {
  273. # m3u8 downloads
  274. 'skip_download': True,
  275. },
  276. 'skip': '404 Not Found',
  277. }, {
  278. # Brightcove embed with whitespace around attribute names
  279. 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
  280. 'info_dict': {
  281. 'id': '3167554373001',
  282. 'ext': 'mp4',
  283. 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
  284. 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
  285. 'uploader_id': '1079349493',
  286. 'upload_date': '20140207',
  287. 'timestamp': 1391810548,
  288. },
  289. 'params': {
  290. 'skip_download': True,
  291. },
  292. 'skip': '410 Gone',
  293. }]
  294. @classmethod
  295. def _build_brightcove_url(cls, object_str):
  296. """
  297. Build a Brightcove url from a xml string containing
  298. <object class="BrightcoveExperience">{params}</object>
  299. """
  300. # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
  301. object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
  302. lambda m: m.group(1) + '/>', object_str)
  303. # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
  304. object_str = object_str.replace('<--', '<!--')
  305. # remove namespace to simplify extraction
  306. object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
  307. object_str = fix_xml_ampersands(object_str)
  308. try:
  309. object_doc = compat_etree_fromstring(object_str.encode())
  310. except xml.etree.ElementTree.ParseError:
  311. return
  312. fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
  313. if fv_el is not None:
  314. flashvars = dict(
  315. (k, v[0])
  316. for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items())
  317. else:
  318. flashvars = {}
  319. data_url = object_doc.attrib.get('data', '')
  320. data_url_params = parse_qs(data_url)
  321. def find_param(name):
  322. if name in flashvars:
  323. return flashvars[name]
  324. node = find_xpath_attr(object_doc, './param', 'name', name)
  325. if node is not None:
  326. return node.attrib['value']
  327. return data_url_params.get(name)
  328. params = {}
  329. player_id = find_param('playerID') or find_param('playerId')
  330. if player_id is None:
  331. raise ExtractorError('Cannot find player ID')
  332. params['playerID'] = player_id
  333. player_key = find_param('playerKey')
  334. # Not all pages define this value
  335. if player_key is not None:
  336. params['playerKey'] = player_key
  337. # These fields hold the id of the video
  338. video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
  339. if video_player is not None:
  340. if isinstance(video_player, list):
  341. video_player = video_player[0]
  342. video_player = video_player.strip()
  343. # UUID is also possible for videoPlayer (e.g.
  344. # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
  345. # or http://www8.hp.com/cn/zh/home.html)
  346. if not (re.match(
  347. r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
  348. video_player) or video_player.startswith('ref:')):
  349. return None
  350. params['@videoPlayer'] = video_player
  351. link_base = find_param('linkBaseURL')
  352. if link_base is not None:
  353. params['linkBaseURL'] = link_base
  354. return cls._make_brightcove_url(params)
  355. @classmethod
  356. def _build_brightcove_url_from_js(cls, object_js):
  357. # The layout of JS is as follows:
  358. # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
  359. # // build Brightcove <object /> XML
  360. # }
  361. m = re.search(
  362. r'''(?x)customBC\.createVideo\(
  363. .*? # skipping width and height
  364. ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
  365. ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
  366. # in length, however it's appended to itself
  367. # in places, so truncate
  368. ["\'](?P<videoID>\d+)["\'] # @videoPlayer
  369. ''', object_js)
  370. if m:
  371. return cls._make_brightcove_url(m.groupdict())
  372. @classmethod
  373. def _make_brightcove_url(cls, params):
  374. return update_url_query(
  375. 'https://c.brightcove.com/services/viewer/htmlFederated', params)
  376. @classmethod
  377. def _extract_brightcove_url(cls, webpage):
  378. """Try to extract the brightcove url from the webpage, returns None
  379. if it can't be found
  380. """
  381. urls = cls._extract_brightcove_urls(webpage)
  382. return urls[0] if urls else None
  383. @classmethod
  384. def _extract_brightcove_urls(cls, webpage):
  385. """Return a list of all Brightcove URLs from the webpage """
  386. url_m = re.search(
  387. r'''(?x)
  388. <meta\s+
  389. (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
  390. content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
  391. ''', webpage)
  392. if url_m:
  393. url = unescapeHTML(url_m.group('url'))
  394. # Some sites don't add it, we can't download with this url, for example:
  395. # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
  396. if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
  397. return [url]
  398. matches = re.findall(
  399. r'''(?sx)<object
  400. (?:
  401. [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
  402. [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
  403. ).+?>\s*</object>''',
  404. webpage)
  405. if matches:
  406. return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
  407. matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
  408. if matches:
  409. return list(filter(None, [
  410. cls._build_brightcove_url_from_js(custom_bc)
  411. for custom_bc in matches]))
  412. return [src for _, src in re.findall(
  413. r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
  414. def _extract_from_webpage(self, url, webpage):
  415. bc_urls = self._extract_brightcove_urls(webpage)
  416. for bc_url in bc_urls:
  417. yield self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveLegacyIE)
  418. def _real_extract(self, url):
  419. url, smuggled_data = unsmuggle_url(url, {})
  420. # Change the 'videoId' and others field to '@videoPlayer'
  421. url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
  422. # Change bckey (used by bcove.me urls) to playerKey
  423. url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
  424. mobj = self._match_valid_url(url)
  425. query_str = mobj.group('query')
  426. query = urllib.parse.parse_qs(query_str)
  427. video_player = query.get('@videoPlayer')
  428. if video_player:
  429. # We set the original url as the default 'Referer' header
  430. referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
  431. video_id = video_player[0]
  432. if 'playerID' not in query:
  433. mobj = re.search(r'/bcpid(\d+)', url)
  434. if mobj is not None:
  435. query['playerID'] = [mobj.group(1)]
  436. publisher_id = query.get('publisherId')
  437. if publisher_id and publisher_id[0].isdigit():
  438. publisher_id = publisher_id[0]
  439. if not publisher_id:
  440. player_key = query.get('playerKey')
  441. if player_key and ',' in player_key[0]:
  442. player_key = player_key[0]
  443. else:
  444. player_id = query.get('playerID')
  445. if player_id and player_id[0].isdigit():
  446. headers = {}
  447. if referer:
  448. headers['Referer'] = referer
  449. player_page = self._download_webpage(
  450. 'https://link.brightcove.com/services/player/bcpid' + player_id[0],
  451. video_id, headers=headers, fatal=False)
  452. if player_page:
  453. player_key = self._search_regex(
  454. r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
  455. player_page, 'player key', fatal=False)
  456. if player_key:
  457. enc_pub_id = player_key.split(',')[1].replace('~', '=')
  458. publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
  459. if publisher_id:
  460. brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
  461. if referer:
  462. brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
  463. return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
  464. # TODO: figure out if it's possible to extract playlistId from playerKey
  465. # elif 'playerKey' in query:
  466. # player_key = query['playerKey']
  467. # return self._get_playlist_info(player_key[0])
  468. raise UnsupportedError(url)
  469. class BrightcoveNewBaseIE(AdobePassIE):
  470. def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
  471. title = json_data['name'].strip()
  472. formats, subtitles = [], {}
  473. sources = json_data.get('sources') or []
  474. for source in sources:
  475. container = source.get('container')
  476. ext = mimetype2ext(source.get('type'))
  477. src = source.get('src')
  478. if ext == 'm3u8' or container == 'M2TS':
  479. if not src:
  480. continue
  481. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  482. src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
  483. subtitles = self._merge_subtitles(subtitles, subs)
  484. elif ext == 'mpd':
  485. if not src:
  486. continue
  487. fmts, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False)
  488. subtitles = self._merge_subtitles(subtitles, subs)
  489. else:
  490. streaming_src = source.get('streaming_src')
  491. stream_name, app_name = source.get('stream_name'), source.get('app_name')
  492. if not src and not streaming_src and (not stream_name or not app_name):
  493. continue
  494. tbr = float_or_none(source.get('avg_bitrate'), 1000)
  495. height = int_or_none(source.get('height'))
  496. width = int_or_none(source.get('width'))
  497. f = {
  498. 'tbr': tbr,
  499. 'filesize': int_or_none(source.get('size')),
  500. 'container': container,
  501. 'ext': ext or container.lower(),
  502. }
  503. if width == 0 and height == 0:
  504. f.update({
  505. 'vcodec': 'none',
  506. })
  507. else:
  508. f.update({
  509. 'width': width,
  510. 'height': height,
  511. 'vcodec': source.get('codec'),
  512. })
  513. def build_format_id(kind):
  514. return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
  515. if src or streaming_src:
  516. f.update({
  517. 'url': src or streaming_src,
  518. 'format_id': build_format_id('http' if src else 'http-streaming'),
  519. 'source_preference': 0 if src else -1,
  520. })
  521. else:
  522. f.update({
  523. 'url': app_name,
  524. 'play_path': stream_name,
  525. 'format_id': build_format_id('rtmp'),
  526. })
  527. fmts = [f]
  528. # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
  529. if container == 'WVM' or source.get('key_systems') or ext == 'ism':
  530. for f in fmts:
  531. f['has_drm'] = True
  532. formats.extend(fmts)
  533. if not formats:
  534. errors = json_data.get('errors')
  535. if errors:
  536. error = errors[0]
  537. self.raise_no_formats(
  538. error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
  539. headers.pop('Authorization', None) # or else http formats will give error 400
  540. for f in formats:
  541. f.setdefault('http_headers', {}).update(headers)
  542. for text_track in json_data.get('text_tracks', []):
  543. if text_track.get('kind') != 'captions':
  544. continue
  545. text_track_url = url_or_none(text_track.get('src'))
  546. if not text_track_url:
  547. continue
  548. lang = (str_or_none(text_track.get('srclang'))
  549. or str_or_none(text_track.get('label')) or 'en').lower()
  550. subtitles.setdefault(lang, []).append({
  551. 'url': text_track_url,
  552. })
  553. is_live = False
  554. duration = float_or_none(json_data.get('duration'), 1000)
  555. if duration is not None and duration <= 0:
  556. is_live = True
  557. common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)]
  558. thumb_base_url = dict_get(json_data, ('poster', 'thumbnail'))
  559. thumbnails = [{
  560. 'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url),
  561. 'width': w,
  562. 'height': h,
  563. } for w, h in common_res] if thumb_base_url else None
  564. return {
  565. 'id': video_id,
  566. 'title': title,
  567. 'description': clean_html(json_data.get('description')),
  568. 'thumbnails': thumbnails,
  569. 'duration': duration,
  570. 'timestamp': parse_iso8601(json_data.get('published_at')),
  571. 'uploader_id': json_data.get('account_id'),
  572. 'formats': formats,
  573. 'subtitles': subtitles,
  574. 'tags': json_data.get('tags', []),
  575. 'is_live': is_live,
  576. }
  577. class BrightcoveNewIE(BrightcoveNewBaseIE):
  578. IE_NAME = 'brightcove:new'
  579. _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
  580. _TESTS = [{
  581. 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
  582. 'md5': 'c8100925723840d4b0d243f7025703be',
  583. 'info_dict': {
  584. 'id': '4463358922001',
  585. 'ext': 'mp4',
  586. 'title': 'Meet the man behind Popcorn Time',
  587. 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
  588. 'duration': 165.768,
  589. 'timestamp': 1441391203,
  590. 'upload_date': '20150904',
  591. 'uploader_id': '929656772001',
  592. 'formats': 'mincount:20',
  593. },
  594. 'skip': '404 Not Found',
  595. }, {
  596. # with rtmp streams
  597. 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
  598. 'info_dict': {
  599. 'id': '4279049078001',
  600. 'ext': 'mp4',
  601. 'title': 'Titansgrave: Chapter 0',
  602. 'description': 'Titansgrave: Chapter 0',
  603. 'duration': 1242.058,
  604. 'timestamp': 1433556729,
  605. 'upload_date': '20150606',
  606. 'uploader_id': '4036320279001',
  607. 'formats': 'mincount:39',
  608. },
  609. 'params': {
  610. # m3u8 download
  611. 'skip_download': True,
  612. },
  613. }, {
  614. # playlist stream
  615. 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
  616. 'info_dict': {
  617. 'id': '5718313430001',
  618. 'title': 'No Audio Playlist',
  619. },
  620. 'playlist_count': 7,
  621. 'params': {
  622. # m3u8 download
  623. 'skip_download': True,
  624. },
  625. }, {
  626. 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
  627. 'only_matching': True,
  628. }, {
  629. # ref: prefixed video id
  630. 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
  631. 'only_matching': True,
  632. }, {
  633. # non numeric ref: prefixed video id
  634. 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
  635. 'only_matching': True,
  636. }, {
  637. # unavailable video without message but with error_code
  638. 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
  639. 'only_matching': True,
  640. }]
  641. _WEBPAGE_TESTS = [{
  642. # brightcove player url embed
  643. 'url': 'https://nbc-2.com/weather/forecast/2022/11/16/forecast-warmest-day-of-the-week/',
  644. 'md5': '2934d5372b354d27083ccf8575dbfee2',
  645. 'info_dict': {
  646. 'id': '6315650313112',
  647. 'title': 'First Alert Forecast: November 15, 2022',
  648. 'ext': 'mp4',
  649. 'tags': ['nbc2', 'forecast'],
  650. 'uploader_id': '6146886170001',
  651. 'thumbnail': r're:^https?://.*\.jpg$',
  652. 'timestamp': 1668574571,
  653. 'duration': 233.375,
  654. 'upload_date': '20221116',
  655. },
  656. }, {
  657. # embedded with video tag only
  658. 'url': 'https://www.gooddishtv.com/tiktok-rapping-chef-mr-pyrex',
  659. 'info_dict': {
  660. 'id': 'tiktok-rapping-chef-mr-pyrex',
  661. 'title': 'TikTok\'s Rapping Chef Makes Jambalaya for the Hosts',
  662. 'thumbnail': r're:^https?://.*\.jpg$',
  663. 'age_limit': 0,
  664. 'description': 'Just in time for Mardi Gras',
  665. },
  666. 'playlist': [{
  667. 'info_dict': {
  668. 'id': '6299189544001',
  669. 'ext': 'mp4',
  670. 'title': 'TGD_01-032_5',
  671. 'thumbnail': r're:^https?://.*\.jpg$',
  672. 'tags': [],
  673. 'timestamp': 1646078943,
  674. 'uploader_id': '1569565978001',
  675. 'upload_date': '20220228',
  676. 'duration': 217.195,
  677. },
  678. }, {
  679. 'info_dict': {
  680. 'id': '6305565995112',
  681. 'ext': 'mp4',
  682. 'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5',
  683. 'thumbnail': r're:^https?://.*\.jpg$',
  684. 'tags': [],
  685. 'timestamp': 1651604591,
  686. 'uploader_id': '1569565978001',
  687. 'upload_date': '20220503',
  688. 'duration': 310.421,
  689. },
  690. }],
  691. }, {
  692. # Brightcove:new type [2].
  693. 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
  694. 'md5': '2b35148fcf48da41c9fb4591650784f3',
  695. 'info_dict': {
  696. 'id': '5348741021001',
  697. 'ext': 'mp4',
  698. 'upload_date': '20170306',
  699. 'uploader_id': '4191638492001',
  700. 'timestamp': 1488769918,
  701. 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
  702. },
  703. 'skip': '404 Not Found',
  704. }, {
  705. # Alternative brightcove <video> attributes
  706. 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
  707. 'info_dict': {
  708. 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
  709. 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
  710. },
  711. 'playlist': [{
  712. 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
  713. 'info_dict': {
  714. 'id': '5311302538001',
  715. 'ext': 'mp4',
  716. 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
  717. 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
  718. 'timestamp': 1486321708,
  719. 'upload_date': '20170205',
  720. 'uploader_id': '800000640001',
  721. },
  722. 'only_matching': True,
  723. }],
  724. 'skip': '404 Not Found',
  725. }, {
  726. # Brightcove URL in single quotes
  727. 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
  728. 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
  729. 'info_dict': {
  730. 'id': '4255764656001',
  731. 'ext': 'mp4',
  732. 'title': 'SN Presents: Russell Martin, World Citizen',
  733. 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
  734. 'uploader': 'Rogers Sportsnet',
  735. 'uploader_id': '1704050871',
  736. 'upload_date': '20150525',
  737. 'timestamp': 1432570283,
  738. },
  739. 'skip': 'Page no longer has URL, now has javascript',
  740. }]
  741. @staticmethod
  742. def _extract_url(ie, webpage):
  743. urls = BrightcoveNewIE._extract_brightcove_urls(ie, webpage)
  744. return urls[0] if urls else None
  745. @staticmethod
  746. def _extract_brightcove_urls(ie, webpage):
  747. # Reference:
  748. # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
  749. # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
  750. # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
  751. # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
  752. # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
  753. entries = []
  754. # Look for iframe embeds [1]
  755. for _, url in re.findall(
  756. r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
  757. entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
  758. # Look for <video> tags [2] and embed_in_page embeds [3]
  759. # [2] looks like:
  760. for video, script_tag, account_id, player_id, embed in re.findall(
  761. r'''(?isx)
  762. (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
  763. (?:.*?
  764. (<script[^>]+
  765. src=["\'](?:https?:)?//players\.brightcove\.net/
  766. (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
  767. )
  768. )?
  769. ''', webpage):
  770. attrs = extract_attributes(video)
  771. # According to examples from [4] it's unclear whether video id
  772. # may be optional and what to do when it is
  773. video_id = attrs.get('data-video-id')
  774. if not video_id:
  775. continue
  776. account_id = account_id or attrs.get('data-account')
  777. if not account_id:
  778. continue
  779. player_id = player_id or attrs.get('data-player') or 'default'
  780. embed = embed or attrs.get('data-embed') or 'default'
  781. bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
  782. # Some brightcove videos may be embedded with video tag only and
  783. # without script tag or any mentioning of brightcove at all. Such
  784. # embeds are considered ambiguous since they are matched based only
  785. # on data-video-id and data-account attributes and in the wild may
  786. # not be brightcove embeds at all. Let's check reconstructed
  787. # brightcove URLs in case of such embeds and only process valid
  788. # ones. By this we ensure there is indeed a brightcove embed.
  789. if not script_tag and not ie._is_valid_url(
  790. bc_url, video_id, 'possible brightcove video'):
  791. continue
  792. entries.append(bc_url)
  793. return entries
  794. def _extract_from_webpage(self, url, webpage):
  795. bc_urls = self._extract_brightcove_urls(self, webpage)
  796. for bc_url in bc_urls:
  797. yield self.url_result(smuggle_url(bc_url, {'referrer': url}), BrightcoveNewIE)
  798. def _real_extract(self, url):
  799. url, smuggled_data = unsmuggle_url(url, {})
  800. self._initialize_geo_bypass({
  801. 'countries': smuggled_data.get('geo_countries'),
  802. 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
  803. })
  804. account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
  805. policy_key_id = f'{account_id}_{player_id}'
  806. policy_key = self.cache.load('brightcove', policy_key_id)
  807. policy_key_extracted = False
  808. store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
  809. def extract_policy_key():
  810. base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
  811. config = self._download_json(
  812. base_url + 'config.json', video_id, fatal=False) or {}
  813. policy_key = try_get(
  814. config, lambda x: x['video_cloud']['policy_key'])
  815. if not policy_key:
  816. webpage = self._download_webpage(
  817. base_url + 'index.min.js', video_id)
  818. catalog = self._search_regex(
  819. r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
  820. if catalog:
  821. catalog = self._parse_json(
  822. js_to_json(catalog), video_id, fatal=False)
  823. if catalog:
  824. policy_key = catalog.get('policyKey')
  825. if not policy_key:
  826. policy_key = self._search_regex(
  827. r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
  828. webpage, 'policy key', group='pk')
  829. store_pk(policy_key)
  830. return policy_key
  831. token = smuggled_data.get('token')
  832. api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
  833. headers = {'Authorization': f'Bearer {token}'} if token else {}
  834. referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
  835. if referrer:
  836. headers.update({
  837. 'Referer': referrer,
  838. 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
  839. })
  840. for _ in range(2):
  841. if not policy_key:
  842. policy_key = extract_policy_key()
  843. policy_key_extracted = True
  844. headers['Accept'] = f'application/json;pk={policy_key}'
  845. try:
  846. json_data = self._download_json(api_url, video_id, headers=headers)
  847. break
  848. except ExtractorError as e:
  849. if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
  850. json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
  851. message = json_data.get('message') or json_data['error_code']
  852. if json_data.get('error_subcode') == 'CLIENT_GEO':
  853. self.raise_geo_restricted(msg=message)
  854. elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
  855. policy_key = None
  856. store_pk(None)
  857. continue
  858. raise ExtractorError(message, expected=True)
  859. raise
  860. errors = json_data.get('errors')
  861. if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
  862. custom_fields = json_data['custom_fields']
  863. tve_token = self._extract_mvpd_auth(
  864. smuggled_data['source_url'], video_id,
  865. custom_fields['bcadobepassrequestorid'],
  866. custom_fields['bcadobepassresourceid'])
  867. json_data = self._download_json(
  868. api_url, video_id, headers={
  869. 'Accept': f'application/json;pk={policy_key}',
  870. }, query={
  871. 'tveToken': tve_token,
  872. })
  873. if content_type == 'playlist':
  874. return self.playlist_result(
  875. (self._parse_brightcove_metadata(vid, vid['id'], headers)
  876. for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
  877. json_data.get('id'), json_data.get('name'),
  878. json_data.get('description'))
  879. return self._parse_brightcove_metadata(
  880. json_data, video_id, headers=headers)