logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

abematv.py (19063B)


  1. import base64
  2. import binascii
  3. import functools
  4. import hashlib
  5. import hmac
  6. import io
  7. import json
  8. import re
  9. import time
  10. import urllib.parse
  11. import uuid
  12. from .common import InfoExtractor
  13. from ..aes import aes_ecb_decrypt
  14. from ..networking import RequestHandler, Response
  15. from ..networking.exceptions import TransportError
  16. from ..utils import (
  17. ExtractorError,
  18. OnDemandPagedList,
  19. decode_base_n,
  20. int_or_none,
  21. time_seconds,
  22. traverse_obj,
  23. update_url_query,
  24. )
  25. class AbemaLicenseRH(RequestHandler):
  26. _SUPPORTED_URL_SCHEMES = ('abematv-license',)
  27. _SUPPORTED_PROXY_SCHEMES = None
  28. _SUPPORTED_FEATURES = None
  29. RH_NAME = 'abematv_license'
  30. _STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
  31. _HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
  32. def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
  33. super().__init__(**kwargs)
  34. self.ie = ie
  35. def _send(self, request):
  36. url = request.url
  37. ticket = urllib.parse.urlparse(url).netloc
  38. try:
  39. response_data = self._get_videokey_from_ticket(ticket)
  40. except ExtractorError as e:
  41. raise TransportError(cause=e.cause) from e
  42. except (IndexError, KeyError, TypeError) as e:
  43. raise TransportError(cause=repr(e)) from e
  44. return Response(
  45. io.BytesIO(response_data), url,
  46. headers={'Content-Length': str(len(response_data))})
  47. def _get_videokey_from_ticket(self, ticket):
  48. to_show = self.ie.get_param('verbose', False)
  49. media_token = self.ie._get_media_token(to_show=to_show)
  50. license_response = self.ie._download_json(
  51. 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
  52. query={'t': media_token},
  53. data=json.dumps({
  54. 'kv': 'a',
  55. 'lt': ticket,
  56. }).encode(),
  57. headers={
  58. 'Content-Type': 'application/json',
  59. })
  60. res = decode_base_n(license_response['k'], table=self._STRTABLE)
  61. encvideokey = list(res.to_bytes(16, 'big'))
  62. h = hmac.new(
  63. binascii.unhexlify(self._HKEY),
  64. (license_response['cid'] + self.ie._DEVICE_ID).encode(),
  65. digestmod=hashlib.sha256)
  66. enckey = list(h.digest())
  67. return bytes(aes_ecb_decrypt(encvideokey, enckey))
  68. class AbemaTVBaseIE(InfoExtractor):
  69. _NETRC_MACHINE = 'abematv'
  70. _USERTOKEN = None
  71. _DEVICE_ID = None
  72. _MEDIATOKEN = None
  73. _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
  74. @classmethod
  75. def _generate_aks(cls, deviceid):
  76. deviceid = deviceid.encode()
  77. # add 1 hour and then drop minute and secs
  78. ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
  79. time_struct = time.gmtime(ts_1hour)
  80. ts_1hour_str = str(ts_1hour).encode()
  81. tmp = None
  82. def mix_once(nonce):
  83. nonlocal tmp
  84. h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
  85. h.update(nonce)
  86. tmp = h.digest()
  87. def mix_tmp(count):
  88. nonlocal tmp
  89. for _ in range(count):
  90. mix_once(tmp)
  91. def mix_twist(nonce):
  92. nonlocal tmp
  93. mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
  94. mix_once(cls._SECRETKEY)
  95. mix_tmp(time_struct.tm_mon)
  96. mix_twist(deviceid)
  97. mix_tmp(time_struct.tm_mday % 5)
  98. mix_twist(ts_1hour_str)
  99. mix_tmp(time_struct.tm_hour % 5)
  100. return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
  101. def _get_device_token(self):
  102. if self._USERTOKEN:
  103. return self._USERTOKEN
  104. self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
  105. username, _ = self._get_login_info()
  106. auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
  107. AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
  108. if AbemaTVBaseIE._USERTOKEN:
  109. # try authentication with locally stored token
  110. try:
  111. AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
  112. self._get_media_token(True)
  113. return
  114. except ExtractorError as e:
  115. self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})')
  116. AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
  117. aks = self._generate_aks(self._DEVICE_ID)
  118. user_data = self._download_json(
  119. 'https://api.abema.io/v1/users', None, note='Authorizing',
  120. data=json.dumps({
  121. 'deviceId': self._DEVICE_ID,
  122. 'applicationKeySecret': aks,
  123. }).encode(),
  124. headers={
  125. 'Content-Type': 'application/json',
  126. })
  127. AbemaTVBaseIE._USERTOKEN = user_data['token']
  128. return self._USERTOKEN
  129. def _get_media_token(self, invalidate=False, to_show=True):
  130. if not invalidate and self._MEDIATOKEN:
  131. return self._MEDIATOKEN
  132. AbemaTVBaseIE._MEDIATOKEN = self._download_json(
  133. 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
  134. query={
  135. 'osName': 'android',
  136. 'osVersion': '6.0.1',
  137. 'osLang': 'ja_JP',
  138. 'osTimezone': 'Asia/Tokyo',
  139. 'appId': 'tv.abema',
  140. 'appVersion': '3.27.1',
  141. }, headers={
  142. 'Authorization': f'bearer {self._get_device_token()}',
  143. })['token']
  144. return self._MEDIATOKEN
  145. def _perform_login(self, username, password):
  146. self._get_device_token()
  147. if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
  148. self.write_debug('Skipping logging in')
  149. return
  150. if '@' in username: # don't strictly check if it's email address or not
  151. ep, method = 'user/email', 'email'
  152. else:
  153. ep, method = 'oneTimePassword', 'userId'
  154. login_response = self._download_json(
  155. f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
  156. data=json.dumps({
  157. method: username,
  158. 'password': password,
  159. }).encode(), headers={
  160. 'Authorization': f'bearer {self._get_device_token()}',
  161. 'Origin': 'https://abema.tv',
  162. 'Referer': 'https://abema.tv/',
  163. 'Content-Type': 'application/json',
  164. })
  165. AbemaTVBaseIE._USERTOKEN = login_response['token']
  166. self._get_media_token(True)
  167. auth_cache = {
  168. 'device_id': AbemaTVBaseIE._DEVICE_ID,
  169. 'usertoken': AbemaTVBaseIE._USERTOKEN,
  170. }
  171. self.cache.store(self._NETRC_MACHINE, username, auth_cache)
  172. def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
  173. return self._download_json(
  174. f'https://api.abema.io/{endpoint}', video_id, query=query or {},
  175. note=note,
  176. headers={
  177. 'Authorization': f'bearer {self._get_device_token()}',
  178. })
  179. def _extract_breadcrumb_list(self, webpage, video_id):
  180. for jld in re.finditer(
  181. r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
  182. webpage):
  183. jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
  184. if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
  185. continue
  186. items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
  187. if items:
  188. return items
  189. return []
  190. class AbemaTVIE(AbemaTVBaseIE):
  191. _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
  192. _TESTS = [{
  193. 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
  194. 'info_dict': {
  195. 'id': '194-25_s2_p1',
  196. 'title': '第1話 「チーズケーキ」 「モーニング再び」',
  197. 'series': '異世界食堂2',
  198. 'season': 'シーズン2',
  199. 'season_number': 2,
  200. 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
  201. 'episode_number': 1,
  202. },
  203. 'skip': 'expired',
  204. }, {
  205. 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
  206. 'info_dict': {
  207. 'id': 'E8tvAnMJ7a9a5d',
  208. 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
  209. 'series': 'ゆるキャン△ SEASON2',
  210. 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
  211. 'season_number': 2,
  212. 'episode_number': 1,
  213. 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
  214. },
  215. 'skip': 'expired',
  216. }, {
  217. 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
  218. 'info_dict': {
  219. 'id': 'E8tvAnMJ7a9a5d',
  220. 'title': '第5話『光射す』',
  221. 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
  222. 'thumbnail': r're:https://hayabusa\.io/.+',
  223. 'series': '相棒',
  224. 'episode': '第5話『光射す』',
  225. },
  226. 'skip': 'expired',
  227. }, {
  228. 'url': 'https://abema.tv/now-on-air/abema-anime',
  229. 'info_dict': {
  230. 'id': 'abema-anime',
  231. # this varies
  232. # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
  233. 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
  234. 'is_live': True,
  235. },
  236. 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
  237. }]
  238. _TIMETABLE = None
  239. def _real_extract(self, url):
  240. # starting download using infojson from this extractor is undefined behavior,
  241. # and never be fixed in the future; you must trigger downloads by directly specifying URL.
  242. # (unless there's a way to hook before downloading by extractor)
  243. video_id, video_type = self._match_valid_url(url).group('id', 'type')
  244. headers = {
  245. 'Authorization': 'Bearer ' + self._get_device_token(),
  246. }
  247. video_type = video_type.split('/')[-1]
  248. webpage = self._download_webpage(url, video_id)
  249. canonical_url = self._search_regex(
  250. r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
  251. default=url)
  252. info = self._search_json_ld(webpage, video_id, default={})
  253. title = self._search_regex(
  254. r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
  255. if not title:
  256. jsonld = None
  257. for jld in re.finditer(
  258. r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
  259. webpage):
  260. jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
  261. if jsonld:
  262. break
  263. if jsonld:
  264. title = jsonld.get('caption')
  265. if not title and video_type == 'now-on-air':
  266. if not self._TIMETABLE:
  267. # cache the timetable because it goes to 5MiB in size (!!)
  268. self._TIMETABLE = self._download_json(
  269. 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
  270. headers=headers)
  271. now = time_seconds(hours=9)
  272. for slot in self._TIMETABLE.get('slots', []):
  273. if slot.get('channelId') != video_id:
  274. continue
  275. if slot['startAt'] <= now and now < slot['endAt']:
  276. title = slot['title']
  277. break
  278. # read breadcrumb on top of page
  279. breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
  280. if breadcrumb:
  281. # breadcrumb list translates to: (e.g. 1st test for this IE)
  282. # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
  283. # hence this works
  284. info['series'] = breadcrumb[-2]
  285. info['episode'] = breadcrumb[-1]
  286. if not title:
  287. title = info['episode']
  288. description = self._html_search_regex(
  289. (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
  290. r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
  291. webpage, 'description', default=None, group=1)
  292. if not description:
  293. og_desc = self._html_search_meta(
  294. ('description', 'og:description', 'twitter:description'), webpage)
  295. if og_desc:
  296. description = re.sub(r'''(?sx)
  297. ^(.+?)(?:
  298. アニメの動画を無料で見るならABEMA!| # anime
  299. 等、.+ # applies for most of categories
  300. )?
  301. ''', r'\1', og_desc)
  302. # canonical URL may contain season and episode number
  303. mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
  304. if mobj:
  305. seri = int_or_none(mobj.group(1), default=float('inf'))
  306. epis = int_or_none(mobj.group(2), default=float('inf'))
  307. info['season_number'] = seri if seri < 100 else None
  308. # some anime like Detective Conan (though not available in AbemaTV)
  309. # has more than 1000 episodes (1026 as of 2021/11/15)
  310. info['episode_number'] = epis if epis < 2000 else None
  311. is_live, m3u8_url = False, None
  312. availability = 'public'
  313. if video_type == 'now-on-air':
  314. is_live = True
  315. channel_url = 'https://api.abema.io/v1/channels'
  316. if video_id == 'news-global':
  317. channel_url = update_url_query(channel_url, {'division': '1'})
  318. onair_channels = self._download_json(channel_url, video_id)
  319. for ch in onair_channels['channels']:
  320. if video_id == ch['id']:
  321. m3u8_url = ch['playback']['hls']
  322. break
  323. else:
  324. raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
  325. elif video_type == 'episode':
  326. api_response = self._download_json(
  327. f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
  328. note='Checking playability',
  329. headers=headers)
  330. if not traverse_obj(api_response, ('label', 'free', {bool})):
  331. # cannot acquire decryption key for these streams
  332. self.report_warning('This is a premium-only stream')
  333. availability = 'premium_only'
  334. info.update(traverse_obj(api_response, {
  335. 'series': ('series', 'title'),
  336. 'season': ('season', 'name'),
  337. 'season_number': ('season', 'sequence'),
  338. 'episode_number': ('episode', 'number'),
  339. }))
  340. if not title:
  341. title = traverse_obj(api_response, ('episode', 'title'))
  342. if not description:
  343. description = traverse_obj(api_response, ('episode', 'content'))
  344. m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
  345. elif video_type == 'slots':
  346. api_response = self._download_json(
  347. f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
  348. note='Checking playability',
  349. headers=headers)
  350. if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
  351. self.report_warning('This is a premium-only stream')
  352. availability = 'premium_only'
  353. m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
  354. else:
  355. raise ExtractorError('Unreachable')
  356. if is_live:
  357. self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
  358. self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
  359. formats = self._extract_m3u8_formats(
  360. m3u8_url, video_id, ext='mp4', live=is_live)
  361. info.update({
  362. 'id': video_id,
  363. 'title': title,
  364. 'description': description,
  365. 'formats': formats,
  366. 'is_live': is_live,
  367. 'availability': availability,
  368. })
  369. return info
  370. class AbemaTVTitleIE(AbemaTVBaseIE):
  371. _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
  372. _PAGE_SIZE = 25
  373. _TESTS = [{
  374. 'url': 'https://abema.tv/video/title/90-1597',
  375. 'info_dict': {
  376. 'id': '90-1597',
  377. 'title': 'シャッフルアイランド',
  378. },
  379. 'playlist_mincount': 2,
  380. }, {
  381. 'url': 'https://abema.tv/video/title/193-132',
  382. 'info_dict': {
  383. 'id': '193-132',
  384. 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
  385. },
  386. 'playlist_mincount': 16,
  387. }, {
  388. 'url': 'https://abema.tv/video/title/25-102',
  389. 'info_dict': {
  390. 'id': '25-102',
  391. 'title': 'ソードアート・オンライン アリシゼーション',
  392. },
  393. 'playlist_mincount': 24,
  394. }]
  395. def _fetch_page(self, playlist_id, series_version, page):
  396. programs = self._call_api(
  397. f'v1/video/series/{playlist_id}/programs', playlist_id,
  398. note=f'Downloading page {page + 1}',
  399. query={
  400. 'seriesVersion': series_version,
  401. 'offset': str(page * self._PAGE_SIZE),
  402. 'order': 'seq',
  403. 'limit': str(self._PAGE_SIZE),
  404. })
  405. yield from (
  406. self.url_result(f'https://abema.tv/video/episode/{x}')
  407. for x in traverse_obj(programs, ('programs', ..., 'id')))
  408. def _entries(self, playlist_id, series_version):
  409. return OnDemandPagedList(
  410. functools.partial(self._fetch_page, playlist_id, series_version),
  411. self._PAGE_SIZE)
  412. def _real_extract(self, url):
  413. playlist_id = self._match_id(url)
  414. series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
  415. return self.playlist_result(
  416. self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
  417. playlist_title=series_info.get('title'),
  418. playlist_description=series_info.get('content'))