logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

mixcloud.py (12454B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. import re
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_b64decode,
  8. compat_chr,
  9. compat_ord,
  10. compat_str,
  11. compat_urllib_parse_unquote,
  12. compat_zip as zip,
  13. )
  14. from ..utils import (
  15. int_or_none,
  16. parse_iso8601,
  17. strip_or_none,
  18. try_get,
  19. )
  20. class MixcloudBaseIE(InfoExtractor):
  21. def _call_api(self, object_type, object_fields, display_id, username, slug=None):
  22. lookup_key = object_type + 'Lookup'
  23. return self._download_json(
  24. 'https://app.mixcloud.com/graphql', display_id, query={
  25. 'query': '''{
  26. %s(lookup: {username: "%s"%s}) {
  27. %s
  28. }
  29. }''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
  30. })['data'][lookup_key]
  31. class MixcloudIE(MixcloudBaseIE):
  32. _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
  33. IE_NAME = 'mixcloud'
  34. _TESTS = [{
  35. 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
  36. 'info_dict': {
  37. 'id': 'dholbach_cryptkeeper',
  38. 'ext': 'm4a',
  39. 'title': 'Cryptkeeper',
  40. 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
  41. 'uploader': 'dholbach', # was: 'Daniel Holbach',
  42. 'uploader_id': 'dholbach',
  43. 'thumbnail': r're:https?://.*\.jpg',
  44. 'view_count': int,
  45. 'timestamp': 1321359578,
  46. 'upload_date': '20111115',
  47. },
  48. }, {
  49. 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
  50. 'info_dict': {
  51. 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
  52. 'ext': 'mp3',
  53. 'title': 'Caribou 7 inch Vinyl Mix & Chat',
  54. 'description': r're:Last week Dan Snaith aka Caribou swung by the Brownswood.{136}',
  55. 'uploader': 'Gilles Peterson Worldwide',
  56. 'uploader_id': 'gillespeterson',
  57. 'thumbnail': 're:https?://.*',
  58. 'view_count': int,
  59. 'timestamp': 1422987057,
  60. 'upload_date': '20150203',
  61. },
  62. 'params': {
  63. 'skip_download': '404 not found',
  64. },
  65. }, {
  66. 'url': 'https://www.mixcloud.com/gillespeterson/carnival-m%C3%BAsica-popular-brasileira-mix/',
  67. 'info_dict': {
  68. 'id': 'gillespeterson_carnival-música-popular-brasileira-mix',
  69. 'ext': 'm4a',
  70. 'title': 'Carnival Música Popular Brasileira Mix',
  71. 'description': r're:Gilles was recently in Brazil to play at Boiler Room.{208}',
  72. 'timestamp': 1454347174,
  73. 'upload_date': '20160201',
  74. 'uploader': 'Gilles Peterson Worldwide',
  75. 'uploader_id': 'gillespeterson',
  76. 'thumbnail': 're:https?://.*',
  77. 'view_count': int,
  78. },
  79. }, {
  80. 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
  81. 'only_matching': True,
  82. }]
  83. _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
  84. @staticmethod
  85. def _decrypt_xor_cipher(key, ciphertext):
  86. """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
  87. return ''.join([
  88. compat_chr(compat_ord(ch) ^ compat_ord(k))
  89. for ch, k in zip(ciphertext, itertools.cycle(key))])
  90. def _real_extract(self, url):
  91. username, slug = self._match_valid_url(url).groups()
  92. username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
  93. track_id = '%s_%s' % (username, slug)
  94. cloudcast = self._call_api('cloudcast', '''audioLength
  95. comments(first: 100) {
  96. edges {
  97. node {
  98. comment
  99. created
  100. user {
  101. displayName
  102. username
  103. }
  104. }
  105. }
  106. totalCount
  107. }
  108. description
  109. favorites {
  110. totalCount
  111. }
  112. featuringArtistList
  113. isExclusive
  114. name
  115. owner {
  116. displayName
  117. url
  118. username
  119. }
  120. picture(width: 1024, height: 1024) {
  121. url
  122. }
  123. plays
  124. publishDate
  125. reposts {
  126. totalCount
  127. }
  128. streamInfo {
  129. dashUrl
  130. hlsUrl
  131. url
  132. }
  133. tags {
  134. tag {
  135. name
  136. }
  137. }''', track_id, username, slug)
  138. title = cloudcast['name']
  139. stream_info = cloudcast['streamInfo']
  140. formats = []
  141. for url_key in ('url', 'hlsUrl', 'dashUrl'):
  142. format_url = stream_info.get(url_key)
  143. if not format_url:
  144. continue
  145. decrypted = self._decrypt_xor_cipher(
  146. self._DECRYPTION_KEY, compat_b64decode(format_url))
  147. if url_key == 'hlsUrl':
  148. formats.extend(self._extract_m3u8_formats(
  149. decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
  150. m3u8_id='hls', fatal=False))
  151. elif url_key == 'dashUrl':
  152. formats.extend(self._extract_mpd_formats(
  153. decrypted, track_id, mpd_id='dash', fatal=False))
  154. else:
  155. formats.append({
  156. 'format_id': 'http',
  157. 'url': decrypted,
  158. 'downloader_options': {
  159. # Mixcloud starts throttling at >~5M
  160. 'http_chunk_size': 5242880,
  161. },
  162. })
  163. if not formats and cloudcast.get('isExclusive'):
  164. self.raise_login_required()
  165. self._sort_formats(formats)
  166. comments = []
  167. for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
  168. node = edge.get('node') or {}
  169. text = strip_or_none(node.get('comment'))
  170. if not text:
  171. continue
  172. user = node.get('user') or {}
  173. comments.append({
  174. 'author': user.get('displayName'),
  175. 'author_id': user.get('username'),
  176. 'text': text,
  177. 'timestamp': parse_iso8601(node.get('created')),
  178. })
  179. tags = []
  180. for t in cloudcast.get('tags'):
  181. tag = try_get(t, lambda x: x['tag']['name'], compat_str)
  182. if not tag:
  183. tags.append(tag)
  184. get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
  185. owner = cloudcast.get('owner') or {}
  186. return {
  187. 'id': track_id,
  188. 'title': title,
  189. 'formats': formats,
  190. 'description': cloudcast.get('description'),
  191. 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
  192. 'uploader': owner.get('displayName'),
  193. 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
  194. 'uploader_id': owner.get('username'),
  195. 'uploader_url': owner.get('url'),
  196. 'duration': int_or_none(cloudcast.get('audioLength')),
  197. 'view_count': int_or_none(cloudcast.get('plays')),
  198. 'like_count': get_count('favorites'),
  199. 'repost_count': get_count('reposts'),
  200. 'comment_count': get_count('comments'),
  201. 'comments': comments,
  202. 'tags': tags,
  203. 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
  204. }
  205. class MixcloudPlaylistBaseIE(MixcloudBaseIE):
  206. def _get_cloudcast(self, node):
  207. return node
  208. def _get_playlist_title(self, title, slug):
  209. return title
  210. def _real_extract(self, url):
  211. username, slug = re.match(self._VALID_URL, url).groups()
  212. username = compat_urllib_parse_unquote(username)
  213. if not slug:
  214. slug = 'uploads'
  215. else:
  216. slug = compat_urllib_parse_unquote(slug)
  217. playlist_id = '%s_%s' % (username, slug)
  218. is_playlist_type = self._ROOT_TYPE == 'playlist'
  219. playlist_type = 'items' if is_playlist_type else slug
  220. list_filter = ''
  221. has_next_page = True
  222. entries = []
  223. while has_next_page:
  224. playlist = self._call_api(
  225. self._ROOT_TYPE, '''%s
  226. %s
  227. %s(first: 100%s) {
  228. edges {
  229. node {
  230. %s
  231. }
  232. }
  233. pageInfo {
  234. endCursor
  235. hasNextPage
  236. }
  237. }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
  238. playlist_id, username, slug if is_playlist_type else None)
  239. items = playlist.get(playlist_type) or {}
  240. for edge in items.get('edges', []):
  241. cloudcast = self._get_cloudcast(edge.get('node') or {})
  242. cloudcast_url = cloudcast.get('url')
  243. if not cloudcast_url:
  244. continue
  245. slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
  246. owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
  247. video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
  248. entries.append(self.url_result(
  249. cloudcast_url, MixcloudIE.ie_key(), video_id))
  250. page_info = items['pageInfo']
  251. has_next_page = page_info['hasNextPage']
  252. list_filter = ', after: "%s"' % page_info['endCursor']
  253. return self.playlist_result(
  254. entries, playlist_id,
  255. self._get_playlist_title(playlist[self._TITLE_KEY], slug),
  256. playlist.get(self._DESCRIPTION_KEY))
  257. class MixcloudUserIE(MixcloudPlaylistBaseIE):
  258. _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
  259. IE_NAME = 'mixcloud:user'
  260. _TESTS = [{
  261. 'url': 'http://www.mixcloud.com/dholbach/',
  262. 'info_dict': {
  263. 'id': 'dholbach_uploads',
  264. 'title': 'Daniel Holbach (uploads)',
  265. 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
  266. },
  267. 'playlist_mincount': 36,
  268. }, {
  269. 'url': 'http://www.mixcloud.com/dholbach/uploads/',
  270. 'info_dict': {
  271. 'id': 'dholbach_uploads',
  272. 'title': 'Daniel Holbach (uploads)',
  273. 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
  274. },
  275. 'playlist_mincount': 36,
  276. }, {
  277. 'url': 'http://www.mixcloud.com/dholbach/favorites/',
  278. 'info_dict': {
  279. 'id': 'dholbach_favorites',
  280. 'title': 'Daniel Holbach (favorites)',
  281. 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
  282. },
  283. # 'params': {
  284. # 'playlist_items': '1-100',
  285. # },
  286. 'playlist_mincount': 396,
  287. }, {
  288. 'url': 'http://www.mixcloud.com/dholbach/listens/',
  289. 'info_dict': {
  290. 'id': 'dholbach_listens',
  291. 'title': 'Daniel Holbach (listens)',
  292. 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
  293. },
  294. # 'params': {
  295. # 'playlist_items': '1-100',
  296. # },
  297. 'playlist_mincount': 1623,
  298. 'skip': 'Large list',
  299. }, {
  300. 'url': 'https://www.mixcloud.com/FirstEar/stream/',
  301. 'info_dict': {
  302. 'id': 'FirstEar_stream',
  303. 'title': 'First Ear (stream)',
  304. 'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
  305. },
  306. 'playlist_mincount': 271,
  307. }]
  308. _TITLE_KEY = 'displayName'
  309. _DESCRIPTION_KEY = 'biog'
  310. _ROOT_TYPE = 'user'
  311. _NODE_TEMPLATE = '''slug
  312. url
  313. owner { username }'''
  314. def _get_playlist_title(self, title, slug):
  315. return '%s (%s)' % (title, slug)
  316. class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
  317. _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
  318. IE_NAME = 'mixcloud:playlist'
  319. _TESTS = [{
  320. 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
  321. 'info_dict': {
  322. 'id': 'maxvibes_jazzcat-on-ness-radio',
  323. 'title': 'Ness Radio sessions',
  324. },
  325. 'playlist_mincount': 59,
  326. }]
  327. _TITLE_KEY = 'name'
  328. _DESCRIPTION_KEY = 'description'
  329. _ROOT_TYPE = 'playlist'
  330. _NODE_TEMPLATE = '''cloudcast {
  331. slug
  332. url
  333. owner { username }
  334. }'''
  335. def _get_cloudcast(self, node):
  336. return node.get('cloudcast') or {}