logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

yandexmusic.py (17743B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import hashlib
  4. import itertools
  5. import re
  6. from .common import InfoExtractor
  7. from ..compat import compat_str
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. float_or_none,
  12. try_get,
  13. )
  14. class YandexMusicBaseIE(InfoExtractor):
  15. _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
  16. @staticmethod
  17. def _handle_error(response):
  18. if isinstance(response, dict):
  19. error = response.get('error')
  20. if error:
  21. raise ExtractorError(error, expected=True)
  22. if response.get('type') == 'captcha' or 'captcha' in response:
  23. YandexMusicBaseIE._raise_captcha()
  24. @staticmethod
  25. def _raise_captcha():
  26. raise ExtractorError(
  27. 'YandexMusic has considered youtube-dl requests automated and '
  28. 'asks you to solve a CAPTCHA. You can either wait for some '
  29. 'time until unblocked and optionally use --sleep-interval '
  30. 'in future or alternatively you can go to https://music.yandex.ru/ '
  31. 'solve CAPTCHA, then export cookies and pass cookie file to '
  32. 'youtube-dl with --cookies',
  33. expected=True)
  34. def _download_webpage_handle(self, *args, **kwargs):
  35. webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
  36. if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
  37. self._raise_captcha()
  38. return webpage
  39. def _download_json(self, *args, **kwargs):
  40. response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
  41. self._handle_error(response)
  42. return response
  43. def _call_api(self, ep, tld, url, item_id, note, query):
  44. return self._download_json(
  45. 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
  46. item_id, note,
  47. fatal=False,
  48. headers={
  49. 'Referer': url,
  50. 'X-Requested-With': 'XMLHttpRequest',
  51. 'X-Retpath-Y': url,
  52. },
  53. query=query)
  54. class YandexMusicTrackIE(YandexMusicBaseIE):
  55. IE_NAME = 'yandexmusic:track'
  56. IE_DESC = 'Яндекс.Музыка - Трек'
  57. _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  58. _TESTS = [{
  59. 'url': 'http://music.yandex.ru/album/540508/track/4878838',
  60. 'md5': 'dec8b661f12027ceaba33318787fff76',
  61. 'info_dict': {
  62. 'id': '4878838',
  63. 'ext': 'mp3',
  64. 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
  65. 'filesize': int,
  66. 'duration': 193.04,
  67. 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
  68. 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
  69. 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
  70. 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
  71. 'release_year': 2009,
  72. },
  73. # 'skip': 'Travis CI servers blocked by YandexMusic',
  74. }, {
  75. # multiple disks
  76. 'url': 'http://music.yandex.ru/album/3840501/track/705105',
  77. 'md5': '82a54e9e787301dd45aba093cf6e58c0',
  78. 'info_dict': {
  79. 'id': '705105',
  80. 'ext': 'mp3',
  81. 'title': 'md5:f86d4a9188279860a83000277024c1a6',
  82. 'filesize': int,
  83. 'duration': 239.27,
  84. 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
  85. 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
  86. 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  87. 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  88. 'release_year': 2016,
  89. 'genre': 'pop',
  90. 'disc_number': 2,
  91. 'track_number': 9,
  92. },
  93. # 'skip': 'Travis CI servers blocked by YandexMusic',
  94. }, {
  95. 'url': 'http://music.yandex.com/album/540508/track/4878838',
  96. 'only_matching': True,
  97. }]
  98. def _real_extract(self, url):
  99. mobj = re.match(self._VALID_URL, url)
  100. tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
  101. track = self._call_api(
  102. 'track', tld, url, track_id, 'Downloading track JSON',
  103. {'track': '%s:%s' % (track_id, album_id)})['track']
  104. track_title = track['title']
  105. download_data = self._download_json(
  106. 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
  107. track_id, 'Downloading track location url JSON',
  108. headers={'X-Retpath-Y': url})
  109. fd_data = self._download_json(
  110. download_data['src'], track_id,
  111. 'Downloading track location JSON',
  112. query={'format': 'json'})
  113. key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
  114. f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
  115. thumbnail = None
  116. cover_uri = track.get('albums', [{}])[0].get('coverUri')
  117. if cover_uri:
  118. thumbnail = cover_uri.replace('%%', 'orig')
  119. if not thumbnail.startswith('http'):
  120. thumbnail = 'http://' + thumbnail
  121. track_info = {
  122. 'id': track_id,
  123. 'ext': 'mp3',
  124. 'url': f_url,
  125. 'filesize': int_or_none(track.get('fileSize')),
  126. 'duration': float_or_none(track.get('durationMs'), 1000),
  127. 'thumbnail': thumbnail,
  128. 'track': track_title,
  129. 'acodec': download_data.get('codec'),
  130. 'abr': int_or_none(download_data.get('bitrate')),
  131. }
  132. def extract_artist_name(artist):
  133. decomposed = artist.get('decomposed')
  134. if not isinstance(decomposed, list):
  135. return artist['name']
  136. parts = [artist['name']]
  137. for element in decomposed:
  138. if isinstance(element, dict) and element.get('name'):
  139. parts.append(element['name'])
  140. elif isinstance(element, compat_str):
  141. parts.append(element)
  142. return ''.join(parts)
  143. def extract_artist(artist_list):
  144. if artist_list and isinstance(artist_list, list):
  145. artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
  146. if artists_names:
  147. return ', '.join(artists_names)
  148. albums = track.get('albums')
  149. if albums and isinstance(albums, list):
  150. album = albums[0]
  151. if isinstance(album, dict):
  152. year = album.get('year')
  153. disc_number = int_or_none(try_get(
  154. album, lambda x: x['trackPosition']['volume']))
  155. track_number = int_or_none(try_get(
  156. album, lambda x: x['trackPosition']['index']))
  157. track_info.update({
  158. 'album': album.get('title'),
  159. 'album_artist': extract_artist(album.get('artists')),
  160. 'release_year': int_or_none(year),
  161. 'genre': album.get('genre'),
  162. 'disc_number': disc_number,
  163. 'track_number': track_number,
  164. })
  165. track_artist = extract_artist(track.get('artists'))
  166. if track_artist:
  167. track_info.update({
  168. 'artist': track_artist,
  169. 'title': '%s - %s' % (track_artist, track_title),
  170. })
  171. else:
  172. track_info['title'] = track_title
  173. return track_info
  174. class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
  175. def _extract_tracks(self, source, item_id, url, tld):
  176. tracks = source['tracks']
  177. track_ids = [compat_str(track_id) for track_id in source['trackIds']]
  178. # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
  179. # missing tracks should be retrieved manually.
  180. if len(tracks) < len(track_ids):
  181. present_track_ids = set([
  182. compat_str(track['id'])
  183. for track in tracks if track.get('id')])
  184. missing_track_ids = [
  185. track_id for track_id in track_ids
  186. if track_id not in present_track_ids]
  187. # Request missing tracks in chunks to avoid exceeding max HTTP header size,
  188. # see https://github.com/ytdl-org/youtube-dl/issues/27355
  189. _TRACKS_PER_CHUNK = 250
  190. for chunk_num in itertools.count(0):
  191. start = chunk_num * _TRACKS_PER_CHUNK
  192. end = start + _TRACKS_PER_CHUNK
  193. missing_track_ids_req = missing_track_ids[start:end]
  194. assert missing_track_ids_req
  195. missing_tracks = self._call_api(
  196. 'track-entries', tld, url, item_id,
  197. 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
  198. 'entries': ','.join(missing_track_ids_req),
  199. 'lang': tld,
  200. 'external-domain': 'music.yandex.%s' % tld,
  201. 'overembed': 'false',
  202. 'strict': 'true',
  203. })
  204. if missing_tracks:
  205. tracks.extend(missing_tracks)
  206. if end >= len(missing_track_ids):
  207. break
  208. return tracks
  209. def _build_playlist(self, tracks):
  210. entries = []
  211. for track in tracks:
  212. track_id = track.get('id') or track.get('realId')
  213. if not track_id:
  214. continue
  215. albums = track.get('albums')
  216. if not albums or not isinstance(albums, list):
  217. continue
  218. album = albums[0]
  219. if not isinstance(album, dict):
  220. continue
  221. album_id = album.get('id')
  222. if not album_id:
  223. continue
  224. entries.append(self.url_result(
  225. 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
  226. ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
  227. return entries
  228. class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
  229. IE_NAME = 'yandexmusic:album'
  230. IE_DESC = 'Яндекс.Музыка - Альбом'
  231. _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  232. _TESTS = [{
  233. 'url': 'http://music.yandex.ru/album/540508',
  234. 'info_dict': {
  235. 'id': '540508',
  236. 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
  237. },
  238. 'playlist_count': 50,
  239. # 'skip': 'Travis CI servers blocked by YandexMusic',
  240. }, {
  241. 'url': 'https://music.yandex.ru/album/3840501',
  242. 'info_dict': {
  243. 'id': '3840501',
  244. 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
  245. },
  246. 'playlist_count': 33,
  247. # 'skip': 'Travis CI servers blocked by YandexMusic',
  248. }, {
  249. # empty artists
  250. 'url': 'https://music.yandex.ru/album/9091882',
  251. 'info_dict': {
  252. 'id': '9091882',
  253. 'title': 'ТЕД на русском',
  254. },
  255. 'playlist_count': 187,
  256. }]
  257. @classmethod
  258. def suitable(cls, url):
  259. return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
  260. def _real_extract(self, url):
  261. mobj = re.match(self._VALID_URL, url)
  262. tld = mobj.group('tld')
  263. album_id = mobj.group('id')
  264. album = self._call_api(
  265. 'album', tld, url, album_id, 'Downloading album JSON',
  266. {'album': album_id})
  267. entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
  268. title = album['title']
  269. artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
  270. if artist:
  271. title = '%s - %s' % (artist, title)
  272. year = album.get('year')
  273. if year:
  274. title += ' (%s)' % year
  275. return self.playlist_result(entries, compat_str(album['id']), title)
  276. class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
  277. IE_NAME = 'yandexmusic:playlist'
  278. IE_DESC = 'Яндекс.Музыка - Плейлист'
  279. _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  280. _TESTS = [{
  281. 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
  282. 'info_dict': {
  283. 'id': '1245',
  284. 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
  285. 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
  286. },
  287. 'playlist_count': 5,
  288. # 'skip': 'Travis CI servers blocked by YandexMusic',
  289. }, {
  290. 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
  291. 'only_matching': True,
  292. }, {
  293. # playlist exceeding the limit of 150 tracks (see
  294. # https://github.com/ytdl-org/youtube-dl/issues/6666)
  295. 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
  296. 'info_dict': {
  297. 'id': '1364',
  298. 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
  299. },
  300. 'playlist_mincount': 437,
  301. # 'skip': 'Travis CI servers blocked by YandexMusic',
  302. }]
  303. def _real_extract(self, url):
  304. mobj = re.match(self._VALID_URL, url)
  305. tld = mobj.group('tld')
  306. user = mobj.group('user')
  307. playlist_id = mobj.group('id')
  308. playlist = self._call_api(
  309. 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
  310. 'owner': user,
  311. 'kinds': playlist_id,
  312. 'light': 'true',
  313. 'lang': tld,
  314. 'external-domain': 'music.yandex.%s' % tld,
  315. 'overembed': 'false',
  316. })['playlist']
  317. tracks = self._extract_tracks(playlist, playlist_id, url, tld)
  318. return self.playlist_result(
  319. self._build_playlist(tracks),
  320. compat_str(playlist_id),
  321. playlist.get('title'), playlist.get('description'))
  322. class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
  323. def _call_artist(self, tld, url, artist_id):
  324. return self._call_api(
  325. 'artist', tld, url, artist_id,
  326. 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
  327. 'artist': artist_id,
  328. 'what': self._ARTIST_WHAT,
  329. 'sort': self._ARTIST_SORT or '',
  330. 'dir': '',
  331. 'period': '',
  332. 'lang': tld,
  333. 'external-domain': 'music.yandex.%s' % tld,
  334. 'overembed': 'false',
  335. })
  336. def _real_extract(self, url):
  337. mobj = re.match(self._VALID_URL, url)
  338. tld = mobj.group('tld')
  339. artist_id = mobj.group('id')
  340. data = self._call_artist(tld, url, artist_id)
  341. tracks = self._extract_tracks(data, artist_id, url, tld)
  342. title = try_get(data, lambda x: x['artist']['name'], compat_str)
  343. return self.playlist_result(
  344. self._build_playlist(tracks), artist_id, title)
  345. class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
  346. IE_NAME = 'yandexmusic:artist:tracks'
  347. IE_DESC = 'Яндекс.Музыка - Артист - Треки'
  348. _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
  349. _TESTS = [{
  350. 'url': 'https://music.yandex.ru/artist/617526/tracks',
  351. 'info_dict': {
  352. 'id': '617526',
  353. 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
  354. },
  355. 'playlist_count': 507,
  356. # 'skip': 'Travis CI servers blocked by YandexMusic',
  357. }]
  358. _ARTIST_SORT = ''
  359. _ARTIST_WHAT = 'tracks'
  360. def _real_extract(self, url):
  361. mobj = re.match(self._VALID_URL, url)
  362. tld = mobj.group('tld')
  363. artist_id = mobj.group('id')
  364. data = self._call_artist(tld, url, artist_id)
  365. tracks = self._extract_tracks(data, artist_id, url, tld)
  366. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  367. title = '%s - %s' % (artist or artist_id, 'Треки')
  368. return self.playlist_result(
  369. self._build_playlist(tracks), artist_id, title)
  370. class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
  371. IE_NAME = 'yandexmusic:artist:albums'
  372. IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
  373. _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
  374. _TESTS = [{
  375. 'url': 'https://music.yandex.ru/artist/617526/albums',
  376. 'info_dict': {
  377. 'id': '617526',
  378. 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
  379. },
  380. 'playlist_count': 8,
  381. # 'skip': 'Travis CI servers blocked by YandexMusic',
  382. }]
  383. _ARTIST_SORT = 'year'
  384. _ARTIST_WHAT = 'albums'
  385. def _real_extract(self, url):
  386. mobj = re.match(self._VALID_URL, url)
  387. tld = mobj.group('tld')
  388. artist_id = mobj.group('id')
  389. data = self._call_artist(tld, url, artist_id)
  390. entries = []
  391. for album in data['albums']:
  392. if not isinstance(album, dict):
  393. continue
  394. album_id = album.get('id')
  395. if not album_id:
  396. continue
  397. entries.append(self.url_result(
  398. 'http://music.yandex.ru/album/%s' % album_id,
  399. ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
  400. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  401. title = '%s - %s' % (artist or artist_id, 'Альбомы')
  402. return self.playlist_result(entries, artist_id, title)