logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

douyutv.py (11968B)


  1. import hashlib
  2. import time
  3. import urllib
  4. import uuid
  5. from .common import InfoExtractor
  6. from .openload import PhantomJSwrapper
  7. from ..utils import (
  8. ExtractorError,
  9. UserNotLive,
  10. determine_ext,
  11. int_or_none,
  12. js_to_json,
  13. parse_resolution,
  14. str_or_none,
  15. traverse_obj,
  16. unescapeHTML,
  17. url_or_none,
  18. urlencode_postdata,
  19. urljoin,
  20. )
  21. class DouyuBaseIE(InfoExtractor):
  22. def _download_cryptojs_md5(self, video_id):
  23. for url in [
  24. # XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
  25. 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
  26. 'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
  27. ]:
  28. js_code = self._download_webpage(
  29. url, video_id, note='Downloading signing dependency', fatal=False)
  30. if js_code:
  31. self.cache.store('douyu', 'crypto-js-md5', js_code)
  32. return js_code
  33. raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
  34. def _get_cryptojs_md5(self, video_id):
  35. return self.cache.load(
  36. 'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id)
  37. def _calc_sign(self, sign_func, video_id, a):
  38. b = uuid.uuid4().hex
  39. c = round(time.time())
  40. js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
  41. phantom = PhantomJSwrapper(self)
  42. result = phantom.execute(js_script, video_id,
  43. note='Executing JS signing script').strip()
  44. return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
  45. def _search_js_sign_func(self, webpage, fatal=True):
  46. # The greedy look-behind ensures last possible script tag is matched
  47. return self._search_regex(
  48. r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
  49. class DouyuTVIE(DouyuBaseIE):
  50. IE_DESC = '斗鱼直播'
  51. _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
  52. _TESTS = [{
  53. 'url': 'https://www.douyu.com/pigff',
  54. 'info_dict': {
  55. 'id': '24422',
  56. 'display_id': 'pigff',
  57. 'ext': 'mp4',
  58. 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  59. 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
  60. 'thumbnail': str,
  61. 'uploader': 'pigff',
  62. 'is_live': True,
  63. 'live_status': 'is_live',
  64. },
  65. 'params': {
  66. 'skip_download': True,
  67. },
  68. }, {
  69. 'url': 'http://www.douyutv.com/85982',
  70. 'info_dict': {
  71. 'id': '85982',
  72. 'display_id': '85982',
  73. 'ext': 'flv',
  74. 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  75. 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
  76. 'thumbnail': r're:^https?://.*\.png',
  77. 'uploader': 'douyu小漠',
  78. 'is_live': True,
  79. },
  80. 'params': {
  81. 'skip_download': True,
  82. },
  83. 'skip': 'Room not found',
  84. }, {
  85. 'url': 'http://www.douyutv.com/17732',
  86. 'info_dict': {
  87. 'id': '17732',
  88. 'display_id': '17732',
  89. 'ext': 'flv',
  90. 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  91. 'description': r're:.*m7show@163\.com.*',
  92. 'thumbnail': r're:^https?://.*\.png',
  93. 'uploader': '7师傅',
  94. 'is_live': True,
  95. },
  96. 'params': {
  97. 'skip_download': True,
  98. },
  99. }, {
  100. 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603',
  101. 'info_dict': {
  102. 'id': '6560603',
  103. 'display_id': '6560603',
  104. 'ext': 'flv',
  105. 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  106. 'description': 're:.*直播时间.*',
  107. 'thumbnail': r're:^https?://.*\.png',
  108. 'uploader': '阿涛皎月Carry',
  109. 'live_status': 'is_live',
  110. },
  111. 'params': {
  112. 'skip_download': True,
  113. },
  114. }, {
  115. 'url': 'http://www.douyu.com/xiaocang',
  116. 'only_matching': True,
  117. }, {
  118. # \"room_id\"
  119. 'url': 'http://www.douyu.com/t/lpl',
  120. 'only_matching': True,
  121. }]
  122. def _get_sign_func(self, room_id, video_id):
  123. return self._download_json(
  124. f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
  125. note='Getting signing script')['data'][f'room{room_id}']
  126. def _extract_stream_formats(self, stream_formats):
  127. formats = []
  128. for stream_info in traverse_obj(stream_formats, (..., 'data')):
  129. stream_url = urljoin(
  130. traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
  131. if stream_url:
  132. rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
  133. rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
  134. ext = determine_ext(stream_url)
  135. formats.append({
  136. 'url': stream_url,
  137. 'format_id': str_or_none(rate_id),
  138. 'ext': 'mp4' if ext == 'm3u8' else ext,
  139. 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
  140. 'quality': rate_id % -10000 if rate_id is not None else None,
  141. **traverse_obj(rate_info, {
  142. 'format': ('name', {str_or_none}),
  143. 'tbr': ('bit', {int_or_none}),
  144. }),
  145. })
  146. return formats
  147. def _real_extract(self, url):
  148. video_id = self._match_id(url)
  149. webpage = self._download_webpage(url, video_id)
  150. room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
  151. if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
  152. raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
  153. if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
  154. raise UserNotLive(video_id=video_id)
  155. # Grab metadata from API
  156. params = {
  157. 'aid': 'wp',
  158. 'client_sys': 'wp',
  159. 'time': int(time.time()),
  160. }
  161. params['auth'] = hashlib.md5(
  162. f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
  163. room = traverse_obj(self._download_json(
  164. f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
  165. note='Downloading room info', query=params, fatal=False), 'data')
  166. # 1 = live, 2 = offline
  167. if traverse_obj(room, 'show_status') == '2':
  168. raise UserNotLive(video_id=video_id)
  169. js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
  170. form_data = {
  171. 'rate': 0,
  172. **self._calc_sign(js_sign_func, video_id, room_id),
  173. }
  174. stream_formats = [self._download_json(
  175. f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
  176. video_id, note='Downloading livestream format',
  177. data=urlencode_postdata(form_data))]
  178. for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
  179. if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
  180. form_data['rate'] = rate_id
  181. stream_formats.append(self._download_json(
  182. f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
  183. video_id, note=f'Downloading livestream format {rate_id}',
  184. data=urlencode_postdata(form_data)))
  185. return {
  186. 'id': room_id,
  187. 'formats': self._extract_stream_formats(stream_formats),
  188. 'is_live': True,
  189. **traverse_obj(room, {
  190. 'display_id': ('url', {str}, {lambda i: i[1:]}),
  191. 'title': ('room_name', {unescapeHTML}),
  192. 'description': ('show_details', {str}),
  193. 'uploader': ('nickname', {str}),
  194. 'thumbnail': ('room_src', {url_or_none}),
  195. }),
  196. }
  197. class DouyuShowIE(DouyuBaseIE):
  198. _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
  199. _TESTS = [{
  200. 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
  201. 'info_dict': {
  202. 'id': 'mPyq7oVNe5Yv1gLY',
  203. 'ext': 'mp4',
  204. 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
  205. 'duration': 633,
  206. 'thumbnail': str,
  207. 'uploader': '美食作家王刚V',
  208. 'uploader_id': 'OVAO4NVx1m7Q',
  209. 'timestamp': 1661850002,
  210. 'upload_date': '20220830',
  211. 'view_count': int,
  212. 'tags': ['美食', '美食综合'],
  213. },
  214. }, {
  215. 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
  216. 'only_matching': True,
  217. }]
  218. _FORMATS = {
  219. 'super': '原画',
  220. 'high': '超清',
  221. 'normal': '高清',
  222. }
  223. _QUALITIES = {
  224. 'super': -1,
  225. 'high': -2,
  226. 'normal': -3,
  227. }
  228. _RESOLUTIONS = {
  229. 'super': '1920x1080',
  230. 'high': '1280x720',
  231. 'normal': '852x480',
  232. }
  233. def _real_extract(self, url):
  234. url = url.replace('vmobile.', 'v.')
  235. video_id = self._match_id(url)
  236. webpage = self._download_webpage(url, video_id)
  237. video_info = self._search_json(
  238. r'<script>\s*window\.\$DATA\s*=', webpage,
  239. 'video info', video_id, transform_source=js_to_json)
  240. js_sign_func = self._search_js_sign_func(webpage)
  241. form_data = {
  242. 'vid': video_id,
  243. **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
  244. }
  245. url_info = self._download_json(
  246. 'https://v.douyu.com/api/stream/getStreamUrl', video_id,
  247. data=urlencode_postdata(form_data), note='Downloading video formats')
  248. formats = []
  249. for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
  250. video_url = traverse_obj(url, ('url', {url_or_none}))
  251. if video_url:
  252. ext = determine_ext(video_url)
  253. formats.append({
  254. 'format': self._FORMATS.get(name),
  255. 'format_id': name,
  256. 'url': video_url,
  257. 'quality': self._QUALITIES.get(name),
  258. 'ext': 'mp4' if ext == 'm3u8' else ext,
  259. 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
  260. **parse_resolution(self._RESOLUTIONS.get(name)),
  261. })
  262. else:
  263. self.to_screen(
  264. f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
  265. return {
  266. 'id': video_id,
  267. 'formats': formats,
  268. **traverse_obj(video_info, ('DATA', {
  269. 'title': ('content', 'title', {str}),
  270. 'uploader': ('content', 'author', {str}),
  271. 'uploader_id': ('content', 'up_id', {str_or_none}),
  272. 'duration': ('content', 'video_duration', {int_or_none}),
  273. 'thumbnail': ('content', 'video_pic', {url_or_none}),
  274. 'timestamp': ('content', 'create_time', {int_or_none}),
  275. 'view_count': ('content', 'view_num', {int_or_none}),
  276. 'tags': ('videoTag', ..., 'tagName', {str}),
  277. })),
  278. }