logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

reddit.py (5331B)


  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. int_or_none,
  7. float_or_none,
  8. try_get,
  9. unescapeHTML,
  10. url_or_none,
  11. )
  12. class RedditIE(InfoExtractor):
  13. _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
  14. _TEST = {
  15. # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
  16. 'url': 'https://v.redd.it/zv89llsvexdz',
  17. 'md5': '0a070c53eba7ec4534d95a5a1259e253',
  18. 'info_dict': {
  19. 'id': 'zv89llsvexdz',
  20. 'ext': 'mp4',
  21. 'title': 'zv89llsvexdz',
  22. },
  23. 'params': {
  24. 'format': 'bestvideo',
  25. },
  26. }
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. formats = self._extract_m3u8_formats(
  30. 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
  31. 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
  32. formats.extend(self._extract_mpd_formats(
  33. 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
  34. mpd_id='dash', fatal=False))
  35. self._sort_formats(formats)
  36. return {
  37. 'id': video_id,
  38. 'title': video_id,
  39. 'formats': formats,
  40. }
  41. class RedditRIE(InfoExtractor):
  42. _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
  43. _TESTS = [{
  44. 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
  45. 'info_dict': {
  46. 'id': 'zv89llsvexdz',
  47. 'ext': 'mp4',
  48. 'title': 'That small heart attack.',
  49. 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
  50. 'thumbnails': 'count:4',
  51. 'timestamp': 1501941939,
  52. 'upload_date': '20170805',
  53. 'uploader': 'Antw87',
  54. 'duration': 12,
  55. 'like_count': int,
  56. 'dislike_count': int,
  57. 'comment_count': int,
  58. 'age_limit': 0,
  59. },
  60. 'params': {
  61. 'format': 'bestvideo',
  62. 'skip_download': True,
  63. },
  64. }, {
  65. 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
  66. 'only_matching': True,
  67. }, {
  68. # imgur
  69. 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
  70. 'only_matching': True,
  71. }, {
  72. # imgur @ old reddit
  73. 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
  74. 'only_matching': True,
  75. }, {
  76. # streamable
  77. 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
  78. 'only_matching': True,
  79. }, {
  80. # youtube
  81. 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
  82. 'only_matching': True,
  83. }, {
  84. # reddit video @ nm reddit
  85. 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
  86. 'only_matching': True,
  87. }]
  88. def _real_extract(self, url):
  89. mobj = re.match(self._VALID_URL, url)
  90. url, video_id = mobj.group('url', 'id')
  91. video_id = self._match_id(url)
  92. data = self._download_json(
  93. url + '/.json', video_id)[0]['data']['children'][0]['data']
  94. video_url = data['url']
  95. # Avoid recursing into the same reddit URL
  96. if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
  97. raise ExtractorError('No media found', expected=True)
  98. over_18 = data.get('over_18')
  99. if over_18 is True:
  100. age_limit = 18
  101. elif over_18 is False:
  102. age_limit = 0
  103. else:
  104. age_limit = None
  105. thumbnails = []
  106. def add_thumbnail(src):
  107. if not isinstance(src, dict):
  108. return
  109. thumbnail_url = url_or_none(src.get('url'))
  110. if not thumbnail_url:
  111. return
  112. thumbnails.append({
  113. 'url': unescapeHTML(thumbnail_url),
  114. 'width': int_or_none(src.get('width')),
  115. 'height': int_or_none(src.get('height')),
  116. })
  117. for image in try_get(data, lambda x: x['preview']['images']) or []:
  118. if not isinstance(image, dict):
  119. continue
  120. add_thumbnail(image.get('source'))
  121. resolutions = image.get('resolutions')
  122. if isinstance(resolutions, list):
  123. for resolution in resolutions:
  124. add_thumbnail(resolution)
  125. return {
  126. '_type': 'url_transparent',
  127. 'url': video_url,
  128. 'title': data.get('title'),
  129. 'thumbnails': thumbnails,
  130. 'timestamp': float_or_none(data.get('created_utc')),
  131. 'uploader': data.get('author'),
  132. 'duration': int_or_none(try_get(
  133. data,
  134. (lambda x: x['media']['reddit_video']['duration'],
  135. lambda x: x['secure_media']['reddit_video']['duration']))),
  136. 'like_count': int_or_none(data.get('ups')),
  137. 'dislike_count': int_or_none(data.get('downs')),
  138. 'comment_count': int_or_none(data.get('num_comments')),
  139. 'age_limit': age_limit,
  140. }