logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

xfileshare.py (10939B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_chr,
  7. compat_zip as zip,
  8. )
  9. from ..utils import (
  10. clean_html,
  11. decode_packed_codes,
  12. determine_ext,
  13. ExtractorError,
  14. get_element_by_id,
  15. int_or_none,
  16. merge_dicts,
  17. T,
  18. traverse_obj,
  19. url_or_none,
  20. urlencode_postdata,
  21. )
  22. # based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
  23. def aa_decode(aa_code):
  24. symbol_table = (
  25. ('7', '((゚ー゚) + (o^_^o))'),
  26. ('6', '((o^_^o) +(o^_^o))'),
  27. ('5', '((゚ー゚) + (゚Θ゚))'),
  28. ('2', '((o^_^o) - (゚Θ゚))'),
  29. ('4', '(゚ー゚)'),
  30. ('3', '(o^_^o)'),
  31. ('1', '(゚Θ゚)'),
  32. ('0', '(c^_^o)'),
  33. ('+', ''),
  34. )
  35. delim = '(゚Д゚)[゚ε゚]+'
  36. def chr_from_code(c):
  37. for val, pat in symbol_table:
  38. c = c.replace(pat, val)
  39. if c.startswith(('u', 'U')):
  40. base = 16
  41. c = c[1:]
  42. else:
  43. base = 10
  44. c = int_or_none(c, base=base)
  45. return '' if c is None else compat_chr(c)
  46. return ''.join(
  47. chr_from_code(aa_char)
  48. for aa_char in aa_code.split(delim))
  49. class XFileShareIE(InfoExtractor):
  50. _SITES = (
  51. # status check 2024-02: site availability, G site: search
  52. (r'aparat\.cam', 'Aparat'), # Cloudflare says host error 522, apparently changed to wolfstreeam.tv
  53. (r'filemoon\.sx/.', 'FileMoon'),
  54. (r'gounlimited\.to', 'GoUnlimited'), # no media pages listed
  55. (r'govid\.me', 'GoVid'), # no media pages listed
  56. (r'highstream\.tv', 'HighStream'), # clipwatching.com redirects here
  57. (r'holavid\.com', 'HolaVid'), # Cloudflare says host error 522
  58. # (r'streamty\.com', 'Streamty'), # no media pages listed, connection timeout
  59. # (r'thevideobee\.to', 'TheVideoBee'), # no pages listed, refuses connection
  60. (r'uqload\.to', 'Uqload'), # .com, .co redirect here
  61. (r'(?:vedbam\.xyz|vadbam.net)', 'V?dB?m'), # vidbom.com redirects here, but no valid media pages listed
  62. (r'vidlo\.us', 'vidlo'), # no valid media pages listed
  63. (r'vidlocker\.xyz', 'VidLocker'), # no media pages listed
  64. (r'(?:w\d\.)?viidshar\.com', 'VidShare'), # vidshare.tv redirects here
  65. # (r'vup\.to', 'VUp'), # domain not found
  66. (r'wolfstream\.tv', 'WolfStream'),
  67. (r'xvideosharing\.com', 'XVideoSharing'), # just started showing 'maintenance mode'
  68. )
  69. IE_DESC = 'XFileShare-based sites: %s' % ', '.join(list(zip(*_SITES))[1])
  70. _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
  71. % '|'.join(site for site in list(zip(*_SITES))[0]))
  72. _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
  73. _FILE_NOT_FOUND_REGEXES = (
  74. r'>(?:404 - )?File Not Found<',
  75. r'>The file was removed by administrator<',
  76. )
  77. _TITLE_REGEXES = (
  78. r'style="z-index: [0-9]+;">([^<]+)</span>',
  79. r'<td nowrap>([^<]+)</td>',
  80. r'h4-fine[^>]*>([^<]+)<',
  81. r'>Watch (.+)[ <]',
  82. r'<h2 class="video-page-head">([^<]+)</h2>',
  83. r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to (dead)
  84. r'title\s*:\s*"([^"]+)"', # govid.me
  85. )
  86. _SOURCE_URL_REGEXES = (
  87. r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
  88. r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
  89. r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
  90. r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
  91. )
  92. _THUMBNAIL_REGEXES = (
  93. r'<video[^>]+poster="([^"]+)"',
  94. r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
  95. )
  96. _TESTS = [{
  97. 'note': 'link in `sources`',
  98. 'url': 'https://uqload.to/dcsu06gdb45o',
  99. 'md5': '7f8db187b254379440bf4fcad094ae86',
  100. 'info_dict': {
  101. 'id': 'dcsu06gdb45o',
  102. 'ext': 'mp4',
  103. 'title': 'f2e31015957e74c8c8427982e161c3fc mp4',
  104. 'thumbnail': r're:https://.*\.jpg'
  105. },
  106. 'params': {
  107. 'nocheckcertificate': True,
  108. },
  109. 'expected_warnings': ['Unable to extract JWPlayer data'],
  110. }, {
  111. 'note': 'link in decoded `sources`',
  112. 'url': 'https://xvideosharing.com/1tlg6agrrdgc',
  113. 'md5': '2608ce41932c1657ae56258a64e647d9',
  114. 'info_dict': {
  115. 'id': '1tlg6agrrdgc',
  116. 'ext': 'mp4',
  117. 'title': '0121',
  118. 'thumbnail': r're:https?://.*\.jpg',
  119. },
  120. 'skip': 'This server is in maintenance mode.',
  121. }, {
  122. 'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
  123. 'url': 'https://filemoon.sx/e/dw40rxrzruqz',
  124. 'md5': '5a713742f57ac4aef29b74733e8dda01',
  125. 'info_dict': {
  126. 'id': 'dw40rxrzruqz',
  127. 'title': 'dw40rxrzruqz',
  128. 'ext': 'mp4'
  129. },
  130. }, {
  131. 'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
  132. 'url': 'https://vadbam.net/6lnbkci96wly.html',
  133. 'md5': 'a1616800076177e2ac769203957c54bc',
  134. 'info_dict': {
  135. 'id': '6lnbkci96wly',
  136. 'title': 'Heart Crime S01 E03 weciima autos',
  137. 'ext': 'mp4'
  138. },
  139. }, {
  140. 'note': 'JWPlayer link in clear',
  141. 'url': 'https://w1.viidshar.com/nnibe0xf0h79.html',
  142. 'md5': 'f0a580ce9df06cc61b4a5c979d672367',
  143. 'info_dict': {
  144. 'id': 'nnibe0xf0h79',
  145. 'title': 'JaGa 68ar',
  146. 'ext': 'mp4'
  147. },
  148. 'params': {
  149. 'skip_download': 'ffmpeg',
  150. },
  151. 'expected_warnings': ['hlsnative has detected features it does not support'],
  152. }, {
  153. 'note': 'JWPlayer link in clear',
  154. 'url': 'https://wolfstream.tv/a3drtehyrg52.html',
  155. 'md5': '1901d86a79c5e0c6a51bdc9a4cfd3769',
  156. 'info_dict': {
  157. 'id': 'a3drtehyrg52',
  158. 'title': 'NFL 2023 W04 DET@GB',
  159. 'ext': 'mp4'
  160. },
  161. }, {
  162. 'url': 'https://aparat.cam/n4d6dh0wvlpr',
  163. 'only_matching': True,
  164. }, {
  165. 'url': 'https://uqload.to/ug5somm0ctnk.html',
  166. 'only_matching': True,
  167. }, {
  168. 'url': 'https://highstream.tv/2owiyz3sjoux',
  169. 'only_matching': True,
  170. }, {
  171. 'url': 'https://vedbam.xyz/6lnbkci96wly.html',
  172. 'only_matching': True,
  173. }]
  174. @classmethod
  175. def _extract_urls(cls, webpage):
  176. def yield_urls():
  177. for regex in cls._EMBED_REGEX:
  178. for mobj in re.finditer(regex, webpage):
  179. yield mobj.group('url')
  180. return list(yield_urls())
  181. def _real_extract(self, url):
  182. host, video_id = self._match_valid_url(url).group('host', 'id')
  183. url = 'https://%s/%s' % (
  184. host,
  185. 'embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
  186. webpage = self._download_webpage(url, video_id)
  187. container_div = get_element_by_id('container', webpage) or webpage
  188. if self._search_regex(
  189. r'>This server is in maintenance mode\.', container_div,
  190. 'maint error', group=0, default=None):
  191. raise ExtractorError(clean_html(container_div), expected=True)
  192. if self._search_regex(
  193. self._FILE_NOT_FOUND_REGEXES, container_div,
  194. 'missing video error', group=0, default=None):
  195. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  196. fields = self._hidden_inputs(webpage)
  197. if fields.get('op') == 'download1':
  198. countdown = int_or_none(self._search_regex(
  199. r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
  200. webpage, 'countdown', default=None))
  201. if countdown:
  202. self._sleep(countdown, video_id)
  203. webpage = self._download_webpage(
  204. url, video_id, 'Downloading video page',
  205. data=urlencode_postdata(fields), headers={
  206. 'Referer': url,
  207. 'Content-type': 'application/x-www-form-urlencoded',
  208. })
  209. title = (
  210. self._search_regex(self._TITLE_REGEXES, webpage, 'title', default=None)
  211. or self._og_search_title(webpage, default=None)
  212. or video_id).strip()
  213. obf_code = True
  214. while obf_code:
  215. for regex, func in (
  216. (r'(?s)(?<!-)\b(eval\(function\(p,a,c,k,e,d\)\{(?:(?!</script>).)+\)\))',
  217. decode_packed_codes),
  218. (r'(゚.+)', aa_decode)):
  219. obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
  220. if obf_code:
  221. webpage = webpage.replace(obf_code, func(obf_code))
  222. break
  223. jwplayer_data = self._find_jwplayer_data(
  224. webpage.replace(r'\'', '\''), video_id)
  225. result = self._parse_jwplayer_data(
  226. jwplayer_data, video_id, require_title=False,
  227. m3u8_id='hls', mpd_id='dash')
  228. if not traverse_obj(result, 'formats'):
  229. if jwplayer_data:
  230. self.report_warning(
  231. 'Failed to extract JWPlayer formats', video_id=video_id)
  232. urls = set()
  233. for regex in self._SOURCE_URL_REGEXES:
  234. for mobj in re.finditer(regex, webpage):
  235. urls.add(mobj.group('url'))
  236. sources = self._search_regex(
  237. r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
  238. urls.update(traverse_obj(sources, (T(lambda s: self._parse_json(s, video_id)), Ellipsis)))
  239. formats = []
  240. for video_url in traverse_obj(urls, (Ellipsis, T(url_or_none))):
  241. if determine_ext(video_url) == 'm3u8':
  242. formats.extend(self._extract_m3u8_formats(
  243. video_url, video_id, 'mp4',
  244. entry_protocol='m3u8_native', m3u8_id='hls',
  245. fatal=False))
  246. else:
  247. formats.append({
  248. 'url': video_url,
  249. 'format_id': 'sd',
  250. })
  251. result = {'formats': formats}
  252. self._sort_formats(result['formats'])
  253. thumbnail = self._search_regex(
  254. self._THUMBNAIL_REGEXES, webpage, 'thumbnail', default=None)
  255. if not (title or result.get('title')):
  256. title = self._generic_title(url) or video_id
  257. return merge_dicts(result, {
  258. 'id': video_id,
  259. 'title': title or None,
  260. 'thumbnail': thumbnail,
  261. 'http_headers': {'Referer': url}
  262. })