logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

urplay.py (6174B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. dict_get,
  6. ExtractorError,
  7. int_or_none,
  8. ISO639Utils,
  9. parse_age_limit,
  10. try_get,
  11. unified_timestamp,
  12. )
  13. class URPlayIE(InfoExtractor):
  14. _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
  15. _TESTS = [{
  16. 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
  17. 'md5': 'ff5b0c89928f8083c74bbd5099c9292d',
  18. 'info_dict': {
  19. 'id': '203704',
  20. 'ext': 'mp4',
  21. 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
  22. 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
  23. 'timestamp': 1513292400,
  24. 'upload_date': '20171214',
  25. 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
  26. 'duration': 2269,
  27. 'categories': ['Vetenskap & teknik'],
  28. 'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
  29. 'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
  30. 'age_limit': 15,
  31. },
  32. }, {
  33. 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
  34. 'info_dict': {
  35. 'id': '190031',
  36. 'ext': 'mp4',
  37. 'title': 'Tripp, Trapp, Träd : Sovkudde',
  38. 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
  39. 'timestamp': 1440086400,
  40. 'upload_date': '20150820',
  41. 'series': 'Tripp, Trapp, Träd',
  42. 'duration': 865,
  43. 'tags': ['Sova'],
  44. 'episode': 'Sovkudde',
  45. },
  46. }, {
  47. 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
  48. 'only_matching': True,
  49. }]
  50. def _real_extract(self, url):
  51. video_id = self._match_id(url)
  52. url = url.replace('skola.se/Produkter', 'play.se/program')
  53. webpage = self._download_webpage(url, video_id)
  54. urplayer_data = self._search_regex(
  55. r'(?s)\bid\s*=\s*"__NEXT_DATA__"[^>]*>\s*({.+?})\s*</script',
  56. webpage, 'urplayer next data', fatal=False) or {}
  57. if urplayer_data:
  58. urplayer_data = self._parse_json(urplayer_data, video_id, fatal=False)
  59. urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
  60. if not urplayer_data:
  61. raise ExtractorError('Unable to parse __NEXT_DATA__')
  62. else:
  63. accessible_episodes = self._parse_json(self._html_search_regex(
  64. r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
  65. webpage, 'urplayer data'), video_id)['accessibleEpisodes']
  66. urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
  67. episode = urplayer_data['title']
  68. raw_streaming_info = urplayer_data['streamingInfo']['raw']
  69. host = self._download_json(
  70. 'http://streaming-loadbalancer.ur.se/loadbalancer.json',
  71. video_id)['redirect']
  72. formats = []
  73. for k, v in raw_streaming_info.items():
  74. if not (k in ('sd', 'hd') and isinstance(v, dict)):
  75. continue
  76. file_http = v.get('location')
  77. if file_http:
  78. formats.extend(self._extract_wowza_formats(
  79. 'http://%s/%splaylist.m3u8' % (host, file_http),
  80. video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
  81. self._sort_formats(formats)
  82. subtitles = {}
  83. def parse_lang_code(code):
  84. "3-character language code or None (utils candidate)"
  85. if code is None:
  86. return
  87. lang = code.lower()
  88. if not ISO639Utils.long2short(lang):
  89. lang = ISO639Utils.short2long(lang)
  90. return lang or None
  91. for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
  92. if (k in ('sd', 'hd') or not isinstance(v, dict)):
  93. continue
  94. lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
  95. if not sttl_url:
  96. continue
  97. lang = parse_lang_code(lang)
  98. if not lang:
  99. continue
  100. sttl = subtitles.get(lang) or []
  101. sttl.append({'ext': k, 'url': sttl_url, })
  102. subtitles[lang] = sttl
  103. image = urplayer_data.get('image') or {}
  104. thumbnails = []
  105. for k, v in image.items():
  106. t = {
  107. 'id': k,
  108. 'url': v,
  109. }
  110. wh = k.split('x')
  111. if len(wh) == 2:
  112. t.update({
  113. 'width': int_or_none(wh[0]),
  114. 'height': int_or_none(wh[1]),
  115. })
  116. thumbnails.append(t)
  117. series = urplayer_data.get('series') or {}
  118. series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
  119. return {
  120. 'id': video_id,
  121. 'title': '%s : %s' % (series_title, episode) if series_title else episode,
  122. 'description': urplayer_data.get('description'),
  123. 'thumbnails': thumbnails,
  124. 'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
  125. 'series': series_title,
  126. 'formats': formats,
  127. 'duration': int_or_none(urplayer_data.get('duration')),
  128. 'categories': urplayer_data.get('categories'),
  129. 'tags': urplayer_data.get('keywords'),
  130. 'season': series.get('label'),
  131. 'episode': episode,
  132. 'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
  133. 'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
  134. for a in urplayer_data.get('ageRanges', []))),
  135. 'subtitles': subtitles,
  136. }