logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

bfmtv.py (6400B)


  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import ExtractorError, extract_attributes
  4. class BFMTVBaseIE(InfoExtractor):
  5. _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
  6. _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
  7. _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
  8. _VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
  9. BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
  10. def _extract_video(self, video_block):
  11. video_element = self._search_regex(
  12. self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
  13. if video_element:
  14. video_element_attrs = extract_attributes(video_element)
  15. video_id = video_element_attrs.get('data-video-id')
  16. if not video_id:
  17. return
  18. account_id = video_element_attrs.get('data-account') or '876450610001'
  19. player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
  20. else:
  21. video_block_attrs = extract_attributes(video_block)
  22. video_id = video_block_attrs.get('videoid')
  23. if not video_id:
  24. return
  25. account_id = video_block_attrs.get('accountid') or '876630703001'
  26. player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
  27. return self.url_result(
  28. self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
  29. 'BrightcoveNew', video_id)
  30. class BFMTVIE(BFMTVBaseIE):
  31. IE_NAME = 'bfmtv'
  32. _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
  33. _TESTS = [{
  34. 'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
  35. 'info_dict': {
  36. 'id': '6196747868001',
  37. 'ext': 'mp4',
  38. 'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
  39. 'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
  40. 'uploader_id': '876450610001',
  41. 'upload_date': '20201002',
  42. 'timestamp': 1601629620,
  43. 'duration': 44.757,
  44. 'tags': ['bfmactu', 'politique'],
  45. 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876450610001/5041f4c1-bc48-4af8-a256-1b8300ad8ef0/cf2f9114-e8e2-4494-82b4-ab794ea4bc7d/1920x1080/match/image.jpg',
  46. },
  47. }]
  48. def _real_extract(self, url):
  49. bfmtv_id = self._match_id(url)
  50. webpage = self._download_webpage(url, bfmtv_id)
  51. video = self._extract_video(self._search_regex(
  52. self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
  53. if not video:
  54. raise ExtractorError('Failed to extract video')
  55. return video
  56. class BFMTVLiveIE(BFMTVBaseIE):
  57. IE_NAME = 'bfmtv:live'
  58. _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
  59. _TESTS = [{
  60. 'url': 'https://www.bfmtv.com/en-direct/',
  61. 'info_dict': {
  62. 'id': '6346069778112',
  63. 'ext': 'mp4',
  64. 'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
  65. 'uploader_id': '876450610001',
  66. 'upload_date': '20240202',
  67. 'timestamp': 1706887572,
  68. 'live_status': 'is_live',
  69. 'thumbnail': r're:https://.+/image\.jpg',
  70. 'tags': [],
  71. },
  72. 'params': {
  73. 'skip_download': True,
  74. },
  75. }, {
  76. 'url': 'https://www.bfmtv.com/economie/en-direct/',
  77. 'only_matching': True,
  78. }]
  79. def _real_extract(self, url):
  80. bfmtv_id = self._match_id(url)
  81. webpage = self._download_webpage(url, bfmtv_id)
  82. video = self._extract_video(self._search_regex(
  83. self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
  84. if not video:
  85. raise ExtractorError('Failed to extract video')
  86. return video
  87. class BFMTVArticleIE(BFMTVBaseIE):
  88. IE_NAME = 'bfmtv:article'
  89. _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
  90. _TESTS = [{
  91. 'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
  92. 'info_dict': {
  93. 'id': '202101060198',
  94. 'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
  95. 'description': 'md5:947974089c303d3ac6196670ae262843',
  96. },
  97. 'playlist_count': 2,
  98. }, {
  99. 'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
  100. 'only_matching': True,
  101. }, {
  102. 'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
  103. 'only_matching': True,
  104. }, {
  105. 'url': 'https://rmc.bfmtv.com/actualites/societe/transports/ce-n-est-plus-tout-rentable-le-bioethanol-e85-depasse-1eu-le-litre-des-automobilistes-regrettent_AV-202301100268.html',
  106. 'info_dict': {
  107. 'id': '6318445464112',
  108. 'ext': 'mp4',
  109. 'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
  110. 'uploader_id': '876630703001',
  111. 'upload_date': '20230110',
  112. 'timestamp': 1673341692,
  113. 'duration': 109.269,
  114. 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
  115. 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg',
  116. },
  117. }]
  118. def _entries(self, webpage):
  119. for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
  120. video = self._extract_video(video_block_el)
  121. if video:
  122. yield video
  123. def _real_extract(self, url):
  124. bfmtv_id = self._match_id(url)
  125. webpage = self._download_webpage(url, bfmtv_id)
  126. return self.playlist_result(
  127. self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
  128. self._html_search_meta(['og:description', 'description'], webpage))