logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

tvn24.py (3902B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. int_or_none,
  6. NO_DEFAULT,
  7. unescapeHTML,
  8. )
  9. class TVN24IE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
  11. _TESTS = [{
  12. 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
  13. 'md5': 'fbdec753d7bc29d96036808275f2130c',
  14. 'info_dict': {
  15. 'id': '1584444',
  16. 'ext': 'mp4',
  17. 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
  18. 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
  19. 'thumbnail': 're:https?://.*[.]jpeg',
  20. }
  21. }, {
  22. # different layout
  23. 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
  24. 'info_dict': {
  25. 'id': '1771763',
  26. 'ext': 'mp4',
  27. 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
  28. 'thumbnail': 're:https?://.*',
  29. },
  30. 'params': {
  31. 'skip_download': True,
  32. },
  33. }, {
  34. 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
  35. 'only_matching': True,
  36. }, {
  37. 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html',
  38. 'only_matching': True,
  39. }, {
  40. 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
  41. 'only_matching': True,
  42. }, {
  43. 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
  44. 'only_matching': True,
  45. }]
  46. def _real_extract(self, url):
  47. display_id = self._match_id(url)
  48. webpage = self._download_webpage(url, display_id)
  49. title = self._og_search_title(
  50. webpage, default=None) or self._search_regex(
  51. r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
  52. webpage, 'title')
  53. def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
  54. return self._parse_json(
  55. self._search_regex(
  56. r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
  57. name, group='json', default=default, fatal=fatal) or '{}',
  58. display_id, transform_source=unescapeHTML, fatal=fatal)
  59. quality_data = extract_json('data-quality', 'formats')
  60. formats = []
  61. for format_id, url in quality_data.items():
  62. formats.append({
  63. 'url': url,
  64. 'format_id': format_id,
  65. 'height': int_or_none(format_id.rstrip('p')),
  66. })
  67. self._sort_formats(formats)
  68. description = self._og_search_description(webpage, default=None)
  69. thumbnail = self._og_search_thumbnail(
  70. webpage, default=None) or self._html_search_regex(
  71. r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
  72. 'thumbnail', group='url')
  73. video_id = None
  74. share_params = extract_json(
  75. 'data-share-params', 'share params', default=None)
  76. if isinstance(share_params, dict):
  77. video_id = share_params.get('id')
  78. if not video_id:
  79. video_id = self._search_regex(
  80. r'data-vid-id=["\'](\d+)', webpage, 'video id',
  81. default=None) or self._search_regex(
  82. r',(\d+)\.html', url, 'video id', default=display_id)
  83. return {
  84. 'id': video_id,
  85. 'title': title,
  86. 'description': description,
  87. 'thumbnail': thumbnail,
  88. 'formats': formats,
  89. }