logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

yandexmusic.py (18661B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import hashlib
  4. import itertools
  5. import re
  6. from .common import InfoExtractor
  7. from ..compat import compat_str
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. float_or_none,
  12. try_get,
  13. )
  14. class YandexMusicBaseIE(InfoExtractor):
  15. _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
  16. @staticmethod
  17. def _handle_error(response):
  18. if isinstance(response, dict):
  19. error = response.get('error')
  20. if error:
  21. raise ExtractorError(error, expected=True)
  22. if response.get('type') == 'captcha' or 'captcha' in response:
  23. YandexMusicBaseIE._raise_captcha()
  24. @staticmethod
  25. def _raise_captcha():
  26. raise ExtractorError(
  27. 'YandexMusic has considered youtube-dl requests automated and '
  28. 'asks you to solve a CAPTCHA. You can either wait for some '
  29. 'time until unblocked and optionally use --sleep-interval '
  30. 'in future or alternatively you can go to https://music.yandex.ru/ '
  31. 'solve CAPTCHA, then export cookies and pass cookie file to '
  32. 'youtube-dl with --cookies',
  33. expected=True)
  34. def _download_webpage_handle(self, *args, **kwargs):
  35. webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
  36. if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
  37. self._raise_captcha()
  38. return webpage
  39. def _download_json(self, *args, **kwargs):
  40. response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
  41. self._handle_error(response)
  42. return response
  43. def _call_api(self, ep, tld, url, item_id, note, query):
  44. return self._download_json(
  45. 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
  46. item_id, note,
  47. fatal=False,
  48. headers={
  49. 'Referer': url,
  50. 'X-Requested-With': 'XMLHttpRequest',
  51. 'X-Retpath-Y': url,
  52. },
  53. query=query)
  54. class YandexMusicTrackIE(YandexMusicBaseIE):
  55. IE_NAME = 'yandexmusic:track'
  56. IE_DESC = 'Яндекс.Музыка - Трек'
  57. _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  58. _TESTS = [{
  59. 'url': 'http://music.yandex.ru/album/540508/track/4878838',
  60. 'md5': 'dec8b661f12027ceaba33318787fff76',
  61. 'info_dict': {
  62. 'id': '4878838',
  63. 'ext': 'mp3',
  64. 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
  65. 'filesize': int,
  66. 'duration': 193.04,
  67. 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
  68. 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
  69. 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
  70. 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
  71. 'release_year': 2009,
  72. },
  73. # 'skip': 'Travis CI servers blocked by YandexMusic',
  74. }, {
  75. # multiple disks
  76. 'url': 'http://music.yandex.ru/album/3840501/track/705105',
  77. 'md5': '82a54e9e787301dd45aba093cf6e58c0',
  78. 'info_dict': {
  79. 'id': '705105',
  80. 'ext': 'mp3',
  81. 'title': 'md5:f86d4a9188279860a83000277024c1a6',
  82. 'filesize': int,
  83. 'duration': 239.27,
  84. 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
  85. 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
  86. 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  87. 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  88. 'release_year': 2016,
  89. 'genre': 'pop',
  90. 'disc_number': 2,
  91. 'track_number': 9,
  92. },
  93. # 'skip': 'Travis CI servers blocked by YandexMusic',
  94. }, {
  95. 'url': 'http://music.yandex.com/album/540508/track/4878838',
  96. 'only_matching': True,
  97. }, {
  98. 'url': 'https://music.yandex.ru/album/16302456/track/85430762',
  99. 'md5': '11b8d50ab03b57738deeaadf661a0a48',
  100. 'info_dict': {
  101. 'id': '85430762',
  102. 'ext': 'mp3',
  103. 'abr': 128,
  104. 'title': 'Haddadi Von Engst, Phonic Youth, Super Flu - Til The End (Super Flu Remix)',
  105. 'filesize': int,
  106. 'duration': 431.14,
  107. 'track': 'Til The End (Super Flu Remix)',
  108. 'album': 'Til The End',
  109. 'album_artist': 'Haddadi Von Engst, Phonic Youth',
  110. 'artist': 'Haddadi Von Engst, Phonic Youth, Super Flu',
  111. 'release_year': 2021,
  112. 'genre': 'house',
  113. 'disc_number': 1,
  114. 'track_number': 2,
  115. }
  116. }]
  117. def _real_extract(self, url):
  118. mobj = re.match(self._VALID_URL, url)
  119. tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
  120. track = self._call_api(
  121. 'track', tld, url, track_id, 'Downloading track JSON',
  122. {'track': '%s:%s' % (track_id, album_id)})['track']
  123. track_title = track['title']
  124. track_version = track.get('version')
  125. if track_version:
  126. track_title = '%s (%s)' % (track_title, track_version)
  127. download_data = self._download_json(
  128. 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
  129. track_id, 'Downloading track location url JSON',
  130. query={'hq': 1},
  131. headers={'X-Retpath-Y': url})
  132. fd_data = self._download_json(
  133. download_data['src'], track_id,
  134. 'Downloading track location JSON',
  135. query={'format': 'json'})
  136. key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
  137. f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
  138. thumbnail = None
  139. cover_uri = track.get('albums', [{}])[0].get('coverUri')
  140. if cover_uri:
  141. thumbnail = cover_uri.replace('%%', 'orig')
  142. if not thumbnail.startswith('http'):
  143. thumbnail = 'http://' + thumbnail
  144. track_info = {
  145. 'id': track_id,
  146. 'ext': 'mp3',
  147. 'url': f_url,
  148. 'filesize': int_or_none(track.get('fileSize')),
  149. 'duration': float_or_none(track.get('durationMs'), 1000),
  150. 'thumbnail': thumbnail,
  151. 'track': track_title,
  152. 'acodec': download_data.get('codec'),
  153. 'abr': int_or_none(download_data.get('bitrate')),
  154. }
  155. def extract_artist_name(artist):
  156. decomposed = artist.get('decomposed')
  157. if not isinstance(decomposed, list):
  158. return artist['name']
  159. parts = [artist['name']]
  160. for element in decomposed:
  161. if isinstance(element, dict) and element.get('name'):
  162. parts.append(element['name'])
  163. elif isinstance(element, compat_str):
  164. parts.append(element)
  165. return ''.join(parts)
  166. def extract_artist(artist_list):
  167. if artist_list and isinstance(artist_list, list):
  168. artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
  169. if artists_names:
  170. return ', '.join(artists_names)
  171. albums = track.get('albums')
  172. if albums and isinstance(albums, list):
  173. album = albums[0]
  174. if isinstance(album, dict):
  175. year = album.get('year')
  176. disc_number = int_or_none(try_get(
  177. album, lambda x: x['trackPosition']['volume']))
  178. track_number = int_or_none(try_get(
  179. album, lambda x: x['trackPosition']['index']))
  180. track_info.update({
  181. 'album': album.get('title'),
  182. 'album_artist': extract_artist(album.get('artists')),
  183. 'release_year': int_or_none(year),
  184. 'genre': album.get('genre'),
  185. 'disc_number': disc_number,
  186. 'track_number': track_number,
  187. })
  188. track_artist = extract_artist(track.get('artists'))
  189. if track_artist:
  190. track_info.update({
  191. 'artist': track_artist,
  192. 'title': '%s - %s' % (track_artist, track_title),
  193. })
  194. else:
  195. track_info['title'] = track_title
  196. return track_info
  197. class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
  198. def _extract_tracks(self, source, item_id, url, tld):
  199. tracks = source['tracks']
  200. track_ids = [compat_str(track_id) for track_id in source['trackIds']]
  201. # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
  202. # missing tracks should be retrieved manually.
  203. if len(tracks) < len(track_ids):
  204. present_track_ids = set([
  205. compat_str(track['id'])
  206. for track in tracks if track.get('id')])
  207. missing_track_ids = [
  208. track_id for track_id in track_ids
  209. if track_id not in present_track_ids]
  210. # Request missing tracks in chunks to avoid exceeding max HTTP header size,
  211. # see https://github.com/ytdl-org/youtube-dl/issues/27355
  212. _TRACKS_PER_CHUNK = 250
  213. for chunk_num in itertools.count(0):
  214. start = chunk_num * _TRACKS_PER_CHUNK
  215. end = start + _TRACKS_PER_CHUNK
  216. missing_track_ids_req = missing_track_ids[start:end]
  217. assert missing_track_ids_req
  218. missing_tracks = self._call_api(
  219. 'track-entries', tld, url, item_id,
  220. 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
  221. 'entries': ','.join(missing_track_ids_req),
  222. 'lang': tld,
  223. 'external-domain': 'music.yandex.%s' % tld,
  224. 'overembed': 'false',
  225. 'strict': 'true',
  226. })
  227. if missing_tracks:
  228. tracks.extend(missing_tracks)
  229. if end >= len(missing_track_ids):
  230. break
  231. return tracks
  232. def _build_playlist(self, tracks):
  233. entries = []
  234. for track in tracks:
  235. track_id = track.get('id') or track.get('realId')
  236. if not track_id:
  237. continue
  238. albums = track.get('albums')
  239. if not albums or not isinstance(albums, list):
  240. continue
  241. album = albums[0]
  242. if not isinstance(album, dict):
  243. continue
  244. album_id = album.get('id')
  245. if not album_id:
  246. continue
  247. entries.append(self.url_result(
  248. 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
  249. ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
  250. return entries
  251. class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
  252. IE_NAME = 'yandexmusic:album'
  253. IE_DESC = 'Яндекс.Музыка - Альбом'
  254. _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  255. _TESTS = [{
  256. 'url': 'http://music.yandex.ru/album/540508',
  257. 'info_dict': {
  258. 'id': '540508',
  259. 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
  260. },
  261. 'playlist_count': 50,
  262. # 'skip': 'Travis CI servers blocked by YandexMusic',
  263. }, {
  264. 'url': 'https://music.yandex.ru/album/3840501',
  265. 'info_dict': {
  266. 'id': '3840501',
  267. 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
  268. },
  269. 'playlist_count': 33,
  270. # 'skip': 'Travis CI servers blocked by YandexMusic',
  271. }, {
  272. # empty artists
  273. 'url': 'https://music.yandex.ru/album/9091882',
  274. 'info_dict': {
  275. 'id': '9091882',
  276. 'title': 'ТЕД на русском',
  277. },
  278. 'playlist_count': 187,
  279. }]
  280. @classmethod
  281. def suitable(cls, url):
  282. return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
  283. def _real_extract(self, url):
  284. mobj = re.match(self._VALID_URL, url)
  285. tld = mobj.group('tld')
  286. album_id = mobj.group('id')
  287. album = self._call_api(
  288. 'album', tld, url, album_id, 'Downloading album JSON',
  289. {'album': album_id})
  290. entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
  291. title = album['title']
  292. artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
  293. if artist:
  294. title = '%s - %s' % (artist, title)
  295. year = album.get('year')
  296. if year:
  297. title += ' (%s)' % year
  298. return self.playlist_result(entries, compat_str(album['id']), title)
  299. class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
  300. IE_NAME = 'yandexmusic:playlist'
  301. IE_DESC = 'Яндекс.Музыка - Плейлист'
  302. _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  303. _TESTS = [{
  304. 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
  305. 'info_dict': {
  306. 'id': '1245',
  307. 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
  308. 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
  309. },
  310. 'playlist_count': 5,
  311. # 'skip': 'Travis CI servers blocked by YandexMusic',
  312. }, {
  313. 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
  314. 'only_matching': True,
  315. }, {
  316. # playlist exceeding the limit of 150 tracks (see
  317. # https://github.com/ytdl-org/youtube-dl/issues/6666)
  318. 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
  319. 'info_dict': {
  320. 'id': '1364',
  321. 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
  322. },
  323. 'playlist_mincount': 437,
  324. # 'skip': 'Travis CI servers blocked by YandexMusic',
  325. }]
  326. def _real_extract(self, url):
  327. mobj = re.match(self._VALID_URL, url)
  328. tld = mobj.group('tld')
  329. user = mobj.group('user')
  330. playlist_id = mobj.group('id')
  331. playlist = self._call_api(
  332. 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
  333. 'owner': user,
  334. 'kinds': playlist_id,
  335. 'light': 'true',
  336. 'lang': tld,
  337. 'external-domain': 'music.yandex.%s' % tld,
  338. 'overembed': 'false',
  339. })['playlist']
  340. tracks = self._extract_tracks(playlist, playlist_id, url, tld)
  341. return self.playlist_result(
  342. self._build_playlist(tracks),
  343. compat_str(playlist_id),
  344. playlist.get('title'), playlist.get('description'))
  345. class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
  346. def _call_artist(self, tld, url, artist_id):
  347. return self._call_api(
  348. 'artist', tld, url, artist_id,
  349. 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
  350. 'artist': artist_id,
  351. 'what': self._ARTIST_WHAT,
  352. 'sort': self._ARTIST_SORT or '',
  353. 'dir': '',
  354. 'period': '',
  355. 'lang': tld,
  356. 'external-domain': 'music.yandex.%s' % tld,
  357. 'overembed': 'false',
  358. })
  359. def _real_extract(self, url):
  360. mobj = re.match(self._VALID_URL, url)
  361. tld = mobj.group('tld')
  362. artist_id = mobj.group('id')
  363. data = self._call_artist(tld, url, artist_id)
  364. tracks = self._extract_tracks(data, artist_id, url, tld)
  365. title = try_get(data, lambda x: x['artist']['name'], compat_str)
  366. return self.playlist_result(
  367. self._build_playlist(tracks), artist_id, title)
  368. class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
  369. IE_NAME = 'yandexmusic:artist:tracks'
  370. IE_DESC = 'Яндекс.Музыка - Артист - Треки'
  371. _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
  372. _TESTS = [{
  373. 'url': 'https://music.yandex.ru/artist/617526/tracks',
  374. 'info_dict': {
  375. 'id': '617526',
  376. 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
  377. },
  378. 'playlist_count': 507,
  379. # 'skip': 'Travis CI servers blocked by YandexMusic',
  380. }]
  381. _ARTIST_SORT = ''
  382. _ARTIST_WHAT = 'tracks'
  383. def _real_extract(self, url):
  384. mobj = re.match(self._VALID_URL, url)
  385. tld = mobj.group('tld')
  386. artist_id = mobj.group('id')
  387. data = self._call_artist(tld, url, artist_id)
  388. tracks = self._extract_tracks(data, artist_id, url, tld)
  389. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  390. title = '%s - %s' % (artist or artist_id, 'Треки')
  391. return self.playlist_result(
  392. self._build_playlist(tracks), artist_id, title)
  393. class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
  394. IE_NAME = 'yandexmusic:artist:albums'
  395. IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
  396. _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
  397. _TESTS = [{
  398. 'url': 'https://music.yandex.ru/artist/617526/albums',
  399. 'info_dict': {
  400. 'id': '617526',
  401. 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
  402. },
  403. 'playlist_count': 8,
  404. # 'skip': 'Travis CI servers blocked by YandexMusic',
  405. }]
  406. _ARTIST_SORT = 'year'
  407. _ARTIST_WHAT = 'albums'
  408. def _real_extract(self, url):
  409. mobj = re.match(self._VALID_URL, url)
  410. tld = mobj.group('tld')
  411. artist_id = mobj.group('id')
  412. data = self._call_artist(tld, url, artist_id)
  413. entries = []
  414. for album in data['albums']:
  415. if not isinstance(album, dict):
  416. continue
  417. album_id = album.get('id')
  418. if not album_id:
  419. continue
  420. entries.append(self.url_result(
  421. 'http://music.yandex.ru/album/%s' % album_id,
  422. ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
  423. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  424. title = '%s - %s' % (artist or artist_id, 'Альбомы')
  425. return self.playlist_result(entries, artist_id, title)