logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

ccma.py (5482B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import calendar
  4. import datetime
  5. import re
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. clean_html,
  9. extract_timezone,
  10. int_or_none,
  11. parse_duration,
  12. parse_resolution,
  13. try_get,
  14. url_or_none,
  15. )
  16. class CCMAIE(InfoExtractor):
  17. _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
  18. _TESTS = [{
  19. 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
  20. 'md5': '7296ca43977c8ea4469e719c609b0871',
  21. 'info_dict': {
  22. 'id': '5630208',
  23. 'ext': 'mp4',
  24. 'title': 'L\'espot de La Marató de TV3',
  25. 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
  26. 'timestamp': 1478608140,
  27. 'upload_date': '20161108',
  28. 'age_limit': 0,
  29. }
  30. }, {
  31. 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
  32. 'md5': 'fa3e38f269329a278271276330261425',
  33. 'info_dict': {
  34. 'id': '943685',
  35. 'ext': 'mp3',
  36. 'title': 'El Consell de Savis analitza el derbi',
  37. 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
  38. 'upload_date': '20170512',
  39. 'timestamp': 1494622500,
  40. 'vcodec': 'none',
  41. 'categories': ['Esports'],
  42. }
  43. }, {
  44. 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
  45. 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
  46. 'info_dict': {
  47. 'id': '6031387',
  48. 'ext': 'mp4',
  49. 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
  50. 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
  51. 'timestamp': 1582577700,
  52. 'upload_date': '20200224',
  53. 'subtitles': 'mincount:4',
  54. 'age_limit': 16,
  55. 'series': 'Crims',
  56. }
  57. }]
  58. def _real_extract(self, url):
  59. media_type, media_id = re.match(self._VALID_URL, url).groups()
  60. media = self._download_json(
  61. 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
  62. 'media': media_type,
  63. 'idint': media_id,
  64. })
  65. formats = []
  66. media_url = media['media']['url']
  67. if isinstance(media_url, list):
  68. for format_ in media_url:
  69. format_url = url_or_none(format_.get('file'))
  70. if not format_url:
  71. continue
  72. label = format_.get('label')
  73. f = parse_resolution(label)
  74. f.update({
  75. 'url': format_url,
  76. 'format_id': label,
  77. })
  78. formats.append(f)
  79. else:
  80. formats.append({
  81. 'url': media_url,
  82. 'vcodec': 'none' if media_type == 'audio' else None,
  83. })
  84. self._sort_formats(formats)
  85. informacio = media['informacio']
  86. title = informacio['titol']
  87. durada = informacio.get('durada') or {}
  88. duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
  89. tematica = try_get(informacio, lambda x: x['tematica']['text'])
  90. timestamp = None
  91. data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
  92. try:
  93. timezone, data_utc = extract_timezone(data_utc)
  94. timestamp = calendar.timegm((datetime.datetime.strptime(
  95. data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
  96. except TypeError:
  97. pass
  98. subtitles = {}
  99. subtitols = media.get('subtitols') or []
  100. if isinstance(subtitols, dict):
  101. subtitols = [subtitols]
  102. for st in subtitols:
  103. sub_url = st.get('url')
  104. if sub_url:
  105. subtitles.setdefault(
  106. st.get('iso') or st.get('text') or 'ca', []).append({
  107. 'url': sub_url,
  108. })
  109. thumbnails = []
  110. imatges = media.get('imatges', {})
  111. if imatges:
  112. thumbnail_url = imatges.get('url')
  113. if thumbnail_url:
  114. thumbnails = [{
  115. 'url': thumbnail_url,
  116. 'width': int_or_none(imatges.get('amplada')),
  117. 'height': int_or_none(imatges.get('alcada')),
  118. }]
  119. age_limit = None
  120. codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
  121. if codi_etic:
  122. codi_etic_s = codi_etic.split('_')
  123. if len(codi_etic_s) == 2:
  124. if codi_etic_s[1] == 'TP':
  125. age_limit = 0
  126. else:
  127. age_limit = int_or_none(codi_etic_s[1])
  128. return {
  129. 'id': media_id,
  130. 'title': title,
  131. 'description': clean_html(informacio.get('descripcio')),
  132. 'duration': duration,
  133. 'timestamp': timestamp,
  134. 'thumbnails': thumbnails,
  135. 'subtitles': subtitles,
  136. 'formats': formats,
  137. 'age_limit': age_limit,
  138. 'alt_title': informacio.get('titol_complet'),
  139. 'episode_number': int_or_none(informacio.get('capitol')),
  140. 'categories': [tematica] if tematica else None,
  141. 'series': informacio.get('programa'),
  142. }