logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

patreon.py (26865B)


  1. import functools
  2. import itertools
  3. import urllib.parse
  4. from .common import InfoExtractor
  5. from .sproutvideo import VidsIoIE
  6. from .vimeo import VimeoIE
  7. from ..networking.exceptions import HTTPError
  8. from ..utils import (
  9. KNOWN_EXTENSIONS,
  10. ExtractorError,
  11. clean_html,
  12. determine_ext,
  13. int_or_none,
  14. mimetype2ext,
  15. parse_iso8601,
  16. smuggle_url,
  17. str_or_none,
  18. url_or_none,
  19. urljoin,
  20. )
  21. from ..utils.traversal import traverse_obj, value
  22. class PatreonBaseIE(InfoExtractor):
  23. @functools.cached_property
  24. def patreon_user_agent(self):
  25. # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
  26. # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
  27. if self._get_cookies('https://www.patreon.com/').get('session_id'):
  28. return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
  29. return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
  30. def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
  31. if headers is None:
  32. headers = {}
  33. if 'User-Agent' not in headers:
  34. headers['User-Agent'] = self.patreon_user_agent
  35. if query:
  36. query.update({'json-api-version': 1.0})
  37. try:
  38. return self._download_json(
  39. f'https://www.patreon.com/api/{ep}',
  40. item_id, note=note if note else 'Downloading API JSON',
  41. query=query, fatal=fatal, headers=headers)
  42. except ExtractorError as e:
  43. if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json':
  44. raise
  45. err_json = self._parse_json(self._webpage_read_content(e.cause.response, None, item_id), item_id, fatal=False)
  46. err_message = traverse_obj(err_json, ('errors', ..., 'detail'), get_all=False)
  47. if err_message:
  48. raise ExtractorError(f'Patreon said: {err_message}', expected=True)
  49. raise
  50. class PatreonIE(PatreonBaseIE):
  51. IE_NAME = 'patreon'
  52. _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
  53. _TESTS = [{
  54. 'url': 'http://www.patreon.com/creation?hid=743933',
  55. 'md5': 'e25505eec1053a6e6813b8ed369875cc',
  56. 'info_dict': {
  57. 'id': '743933',
  58. 'ext': 'mp3',
  59. 'title': 'Episode 166: David Smalley of Dogma Debate',
  60. 'description': 'md5:34d207dd29aa90e24f1b3f58841b81c7',
  61. 'uploader': 'Cognitive Dissonance Podcast',
  62. 'thumbnail': 're:^https?://.*$',
  63. 'timestamp': 1406473987,
  64. 'upload_date': '20140727',
  65. 'uploader_id': '87145',
  66. 'like_count': int,
  67. 'comment_count': int,
  68. 'uploader_url': 'https://www.patreon.com/dissonancepod',
  69. 'channel_id': '80642',
  70. 'channel_url': 'https://www.patreon.com/dissonancepod',
  71. 'channel_follower_count': int,
  72. },
  73. }, {
  74. 'url': 'http://www.patreon.com/creation?hid=754133',
  75. 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
  76. 'info_dict': {
  77. 'id': '754133',
  78. 'ext': 'mp3',
  79. 'title': 'CD 167 Extra',
  80. 'uploader': 'Cognitive Dissonance Podcast',
  81. 'thumbnail': 're:^https?://.*$',
  82. 'like_count': int,
  83. 'comment_count': int,
  84. 'uploader_url': 'https://www.patreon.com/dissonancepod',
  85. },
  86. 'skip': 'Patron-only content',
  87. }, {
  88. 'url': 'https://www.patreon.com/creation?hid=1682498',
  89. 'info_dict': {
  90. 'id': 'SU4fj_aEMVw',
  91. 'ext': 'mp4',
  92. 'title': 'I\'m on Patreon!',
  93. 'uploader': 'TraciJHines',
  94. 'thumbnail': 're:^https?://.*$',
  95. 'upload_date': '20150211',
  96. 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
  97. 'uploader_id': '@TraciHinesMusic',
  98. 'categories': ['Entertainment'],
  99. 'duration': 282,
  100. 'view_count': int,
  101. 'tags': 'count:39',
  102. 'age_limit': 0,
  103. 'channel': 'TraciJHines',
  104. 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
  105. 'live_status': 'not_live',
  106. 'like_count': int,
  107. 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
  108. 'availability': 'public',
  109. 'channel_follower_count': int,
  110. 'playable_in_embed': True,
  111. 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
  112. 'comment_count': int,
  113. 'channel_is_verified': True,
  114. 'chapters': 'count:4',
  115. 'timestamp': 1423689666,
  116. },
  117. 'params': {
  118. 'noplaylist': True,
  119. 'skip_download': True,
  120. },
  121. }, {
  122. 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
  123. 'only_matching': True,
  124. }, {
  125. 'url': 'https://www.patreon.com/posts/743933',
  126. 'only_matching': True,
  127. }, {
  128. 'url': 'https://www.patreon.com/posts/kitchen-as-seen-51706779',
  129. 'md5': '96656690071f6d64895866008484251b',
  130. 'info_dict': {
  131. 'id': '555089736',
  132. 'ext': 'mp4',
  133. 'title': 'KITCHEN AS SEEN ON DEEZ NUTS EXTENDED!',
  134. 'uploader': 'Cold Ones',
  135. 'thumbnail': 're:^https?://.*$',
  136. 'upload_date': '20210526',
  137. 'description': 'md5:557a409bd79d3898689419094934ba79',
  138. 'uploader_id': '14936315',
  139. },
  140. 'skip': 'Patron-only content',
  141. }, {
  142. # m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277)
  143. 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882',
  144. 'info_dict': {
  145. 'id': '32452882',
  146. 'ext': 'mp4',
  147. 'comment_count': int,
  148. 'uploader_id': '4301314',
  149. 'like_count': int,
  150. 'timestamp': 1576696962,
  151. 'upload_date': '20191218',
  152. 'thumbnail': r're:^https?://.*$',
  153. 'uploader_url': 'https://www.patreon.com/loish',
  154. 'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
  155. 'title': 'VIDEO // sketchbook flipthrough',
  156. 'uploader': 'Loish ',
  157. 'tags': ['sketchbook', 'video'],
  158. 'channel_id': '1641751',
  159. 'channel_url': 'https://www.patreon.com/loish',
  160. 'channel_follower_count': int,
  161. },
  162. }, {
  163. # bad videos under media (if media is included). Real one is under post_file
  164. 'url': 'https://www.patreon.com/posts/premium-access-70282931',
  165. 'info_dict': {
  166. 'id': '70282931',
  167. 'ext': 'mp4',
  168. 'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
  169. 'channel_url': 'https://www.patreon.com/thenormies',
  170. 'channel_id': '573397',
  171. 'uploader_id': '2929435',
  172. 'uploader': 'The Normies',
  173. 'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
  174. 'comment_count': int,
  175. 'upload_date': '20220809',
  176. 'thumbnail': r're:^https?://.*$',
  177. 'channel_follower_count': int,
  178. 'like_count': int,
  179. 'timestamp': 1660052820,
  180. 'tags': ['The Office', 'early access', 'uncut'],
  181. 'uploader_url': 'https://www.patreon.com/thenormies',
  182. },
  183. 'skip': 'Patron-only content',
  184. }, {
  185. # dead vimeo and embed URLs, need to extract post_file
  186. 'url': 'https://www.patreon.com/posts/hunter-x-hunter-34007913',
  187. 'info_dict': {
  188. 'id': '34007913',
  189. 'ext': 'mp4',
  190. 'title': 'Hunter x Hunter | Kurapika DESTROYS Uvogin!!!',
  191. 'like_count': int,
  192. 'uploader': 'YaBoyRoshi',
  193. 'timestamp': 1581636833,
  194. 'channel_url': 'https://www.patreon.com/yaboyroshi',
  195. 'thumbnail': r're:^https?://.*$',
  196. 'tags': ['Hunter x Hunter'],
  197. 'uploader_id': '14264111',
  198. 'comment_count': int,
  199. 'channel_follower_count': int,
  200. 'description': 'Kurapika is a walking cheat code!',
  201. 'upload_date': '20200213',
  202. 'channel_id': '2147162',
  203. 'uploader_url': 'https://www.patreon.com/yaboyroshi',
  204. },
  205. }, {
  206. # NSFW vimeo embed URL
  207. 'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
  208. 'info_dict': {
  209. 'id': '902250943',
  210. 'ext': 'mp4',
  211. 'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
  212. 'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
  213. 'uploader': 'Npickyeonhwa',
  214. 'uploader_id': '90574422',
  215. 'uploader_url': 'https://www.patreon.com/Yeonhwa726',
  216. 'channel_id': '10237902',
  217. 'channel_url': 'https://www.patreon.com/Yeonhwa726',
  218. 'duration': 70,
  219. 'timestamp': 1705150153,
  220. 'upload_date': '20240113',
  221. 'comment_count': int,
  222. 'like_count': int,
  223. 'thumbnail': r're:^https?://.+',
  224. },
  225. 'params': {'skip_download': 'm3u8'},
  226. 'expected_warnings': ['Failed to parse XML: not well-formed'],
  227. }, {
  228. # multiple attachments/embeds
  229. 'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
  230. 'playlist_count': 3,
  231. 'info_dict': {
  232. 'id': '100601977',
  233. 'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
  234. 'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
  235. 'uploader': 'Bradley Hall',
  236. 'uploader_id': '24401883',
  237. 'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
  238. 'channel_id': '3193932',
  239. 'channel_url': 'https://www.patreon.com/bradleyhallguitar',
  240. 'channel_follower_count': int,
  241. 'timestamp': 1710777855,
  242. 'upload_date': '20240318',
  243. 'like_count': int,
  244. 'comment_count': int,
  245. 'thumbnail': r're:^https?://.+',
  246. },
  247. 'skip': 'Patron-only content',
  248. }, {
  249. # Contains a comment reply in the 'included' section
  250. 'url': 'https://www.patreon.com/posts/114721679',
  251. 'info_dict': {
  252. 'id': '114721679',
  253. 'ext': 'mp4',
  254. 'upload_date': '20241025',
  255. 'uploader': 'Japanalysis',
  256. 'like_count': int,
  257. 'thumbnail': r're:^https?://.+',
  258. 'comment_count': int,
  259. 'title': 'Karasawa Part 2',
  260. 'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
  261. 'uploader_url': 'https://www.patreon.com/japanalysis',
  262. 'uploader_id': '80504268',
  263. 'channel_url': 'https://www.patreon.com/japanalysis',
  264. 'channel_follower_count': int,
  265. 'timestamp': 1729897015,
  266. 'channel_id': '9346307',
  267. },
  268. 'params': {'getcomments': True},
  269. }]
  270. _RETURN_TYPE = 'video'
  271. def _real_extract(self, url):
  272. video_id = self._match_id(url)
  273. post = self._call_api(
  274. f'posts/{video_id}', video_id, query={
  275. 'fields[media]': 'download_url,mimetype,size_bytes',
  276. 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view',
  277. 'fields[user]': 'full_name,url',
  278. 'fields[post_tag]': 'value',
  279. 'fields[campaign]': 'url,name,patron_count',
  280. 'json-api-use-default-includes': 'false',
  281. 'include': 'audio,user,user_defined_tags,campaign,attachments_media',
  282. })
  283. attributes = post['data']['attributes']
  284. info = traverse_obj(attributes, {
  285. 'title': ('title', {str.strip}),
  286. 'description': ('content', {clean_html}),
  287. 'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
  288. 'timestamp': ('published_at', {parse_iso8601}),
  289. 'like_count': ('like_count', {int_or_none}),
  290. 'comment_count': ('comment_count', {int_or_none}),
  291. })
  292. entries = []
  293. idx = 0
  294. for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
  295. include_type = include['type']
  296. if include_type == 'media':
  297. media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
  298. download_url = url_or_none(media_attributes.get('download_url'))
  299. ext = mimetype2ext(media_attributes.get('mimetype'))
  300. # if size_bytes is None, this media file is likely unavailable
  301. # See: https://github.com/yt-dlp/yt-dlp/issues/4608
  302. size_bytes = int_or_none(media_attributes.get('size_bytes'))
  303. if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
  304. idx += 1
  305. entries.append({
  306. 'id': f'{video_id}-{idx}',
  307. 'ext': ext,
  308. 'filesize': size_bytes,
  309. 'url': download_url,
  310. })
  311. elif include_type == 'user':
  312. info.update(traverse_obj(include, {
  313. 'uploader': ('attributes', 'full_name', {str}),
  314. 'uploader_id': ('id', {str_or_none}),
  315. 'uploader_url': ('attributes', 'url', {url_or_none}),
  316. }))
  317. elif include_type == 'post_tag':
  318. if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
  319. info.setdefault('tags', []).append(post_tag)
  320. elif include_type == 'campaign':
  321. info.update(traverse_obj(include, {
  322. 'channel': ('attributes', 'title', {str}),
  323. 'channel_id': ('id', {str_or_none}),
  324. 'channel_url': ('attributes', 'url', {url_or_none}),
  325. 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
  326. }))
  327. # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
  328. headers = {'referer': 'https://patreon.com/'}
  329. # handle Vimeo embeds
  330. if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
  331. v_url = urllib.parse.unquote(self._html_search_regex(
  332. r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
  333. traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
  334. if url_or_none(v_url) and self._request_webpage(
  335. v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
  336. fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
  337. entries.append(self.url_result(
  338. VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
  339. VimeoIE, url_transparent=True))
  340. embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
  341. if embed_url and (urlh := self._request_webpage(
  342. embed_url, video_id, 'Checking embed URL', headers=headers,
  343. fatal=False, errnote=False, expected_status=403)):
  344. # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
  345. # to check for "Sorry, we couldn&amp;rsquo;t find that page" in the meta description tag
  346. meta_description = clean_html(self._html_search_meta(
  347. 'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
  348. # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
  349. if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
  350. or VidsIoIE.suitable(embed_url)):
  351. entries.append(self.url_result(smuggle_url(embed_url, headers)))
  352. post_file = traverse_obj(attributes, ('post_file', {dict}))
  353. if post_file:
  354. name = post_file.get('name')
  355. ext = determine_ext(name)
  356. if ext in KNOWN_EXTENSIONS:
  357. entries.append({
  358. 'id': video_id,
  359. 'ext': ext,
  360. 'url': post_file['url'],
  361. })
  362. elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
  363. formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
  364. entries.append({
  365. 'id': video_id,
  366. 'formats': formats,
  367. 'subtitles': subtitles,
  368. })
  369. can_view_post = traverse_obj(attributes, 'current_user_can_view')
  370. comments = None
  371. if can_view_post and info.get('comment_count'):
  372. comments = self.extract_comments(video_id)
  373. if not entries and can_view_post is False:
  374. self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
  375. elif not entries:
  376. self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
  377. elif len(entries) == 1:
  378. info.update(entries[0])
  379. else:
  380. for entry in entries:
  381. entry.update(info)
  382. return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
  383. info['id'] = video_id
  384. info['__post_extractor'] = comments
  385. return info
  386. def _get_comments(self, post_id):
  387. cursor = None
  388. count = 0
  389. params = {
  390. 'page[count]': 50,
  391. 'include': 'parent.commenter.campaign,parent.post.user,parent.post.campaign.creator,parent.replies.parent,parent.replies.commenter.campaign,parent.replies.post.user,parent.replies.post.campaign.creator,commenter.campaign,post.user,post.campaign.creator,replies.parent,replies.commenter.campaign,replies.post.user,replies.post.campaign.creator,on_behalf_of_campaign',
  392. 'fields[comment]': 'body,created,is_by_creator',
  393. 'fields[user]': 'image_url,full_name,url',
  394. 'filter[flair]': 'image_tiny_url,name',
  395. 'sort': '-created',
  396. 'json-api-version': 1.0,
  397. 'json-api-use-default-includes': 'false',
  398. }
  399. for page in itertools.count(1):
  400. params.update({'page[cursor]': cursor} if cursor else {})
  401. response = self._call_api(
  402. f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
  403. cursor = None
  404. for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
  405. count += 1
  406. author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
  407. yield {
  408. **traverse_obj(comment, {
  409. 'id': ('id', {str_or_none}),
  410. 'text': ('attributes', 'body', {str}),
  411. 'timestamp': ('attributes', 'created', {parse_iso8601}),
  412. 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
  413. 'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
  414. }),
  415. **traverse_obj(response, (
  416. 'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
  417. 'author': ('full_name', {str}),
  418. 'author_thumbnail': ('image_url', {url_or_none}),
  419. }), get_all=False),
  420. 'author_id': author_id,
  421. }
  422. if count < traverse_obj(response, ('meta', 'count')):
  423. cursor = traverse_obj(response, ('data', -1, 'id'))
  424. if cursor is None:
  425. break
  426. class PatreonCampaignIE(PatreonBaseIE):
  427. IE_NAME = 'patreon:campaign'
  428. _VALID_URL = r'''(?x)
  429. https?://(?:www\.)?patreon\.com/(?:
  430. (?:m|api/campaigns)/(?P<campaign_id>\d+)|
  431. (?:c/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
  432. )(?:/posts)?/?(?:$|[?#])'''
  433. _TESTS = [{
  434. 'url': 'https://www.patreon.com/dissonancepod/',
  435. 'info_dict': {
  436. 'title': 'Cognitive Dissonance Podcast',
  437. 'channel_url': 'https://www.patreon.com/dissonancepod',
  438. 'id': '80642',
  439. 'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
  440. 'channel_id': '80642',
  441. 'channel': 'Cognitive Dissonance Podcast',
  442. 'age_limit': 0,
  443. 'channel_follower_count': int,
  444. 'uploader_id': '87145',
  445. 'uploader_url': 'https://www.patreon.com/dissonancepod',
  446. 'uploader': 'Cognitive Dissonance Podcast',
  447. 'thumbnail': r're:^https?://.*$',
  448. },
  449. 'playlist_mincount': 68,
  450. }, {
  451. 'url': 'https://www.patreon.com/m/4767637/posts',
  452. 'info_dict': {
  453. 'title': 'Not Just Bikes',
  454. 'id': '4767637',
  455. 'channel_id': '4767637',
  456. 'channel_url': 'https://www.patreon.com/notjustbikes',
  457. 'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
  458. 'age_limit': 0,
  459. 'channel': 'Not Just Bikes',
  460. 'uploader_url': 'https://www.patreon.com/notjustbikes',
  461. 'uploader': 'Jason',
  462. 'uploader_id': '37306634',
  463. 'thumbnail': r're:^https?://.*$',
  464. },
  465. 'playlist_mincount': 71,
  466. }, {
  467. 'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
  468. 'info_dict': {
  469. 'title': 'Second Thought',
  470. 'channel_follower_count': int,
  471. 'id': '4243769',
  472. 'channel_id': '4243769',
  473. 'channel_url': 'https://www.patreon.com/secondthought',
  474. 'description': r're:(?s).*Second Thought is an educational YouTube channel.*',
  475. 'age_limit': 0,
  476. 'channel': 'Second Thought',
  477. 'uploader_url': 'https://www.patreon.com/secondthought',
  478. 'uploader': 'JT Chapman',
  479. 'uploader_id': '32718287',
  480. 'thumbnail': r're:^https?://.*$',
  481. },
  482. 'playlist_mincount': 201,
  483. }, {
  484. 'url': 'https://www.patreon.com/c/OgSog',
  485. 'info_dict': {
  486. 'id': '8504388',
  487. 'title': 'OGSoG',
  488. 'description': r're:(?s)Hello and welcome to our Patreon page. We are Mari, Lasercorn, .+',
  489. 'channel': 'OGSoG',
  490. 'channel_id': '8504388',
  491. 'channel_url': 'https://www.patreon.com/OgSog',
  492. 'uploader_url': 'https://www.patreon.com/OgSog',
  493. 'uploader_id': '72323575',
  494. 'uploader': 'David Moss',
  495. 'thumbnail': r're:https?://.+/.+',
  496. 'channel_follower_count': int,
  497. 'age_limit': 0,
  498. },
  499. 'playlist_mincount': 331,
  500. }, {
  501. 'url': 'https://www.patreon.com/c/OgSog/posts',
  502. 'only_matching': True,
  503. }, {
  504. 'url': 'https://www.patreon.com/dissonancepod/posts',
  505. 'only_matching': True,
  506. }, {
  507. 'url': 'https://www.patreon.com/m/5932659',
  508. 'only_matching': True,
  509. }, {
  510. 'url': 'https://www.patreon.com/api/campaigns/4243769',
  511. 'only_matching': True,
  512. }]
  513. def _entries(self, campaign_id):
  514. cursor = None
  515. params = {
  516. 'fields[post]': 'patreon_url,url',
  517. 'filter[campaign_id]': campaign_id,
  518. 'filter[is_draft]': 'false',
  519. 'sort': '-published_at',
  520. 'json-api-use-default-includes': 'false',
  521. }
  522. for page in itertools.count(1):
  523. params.update({'page[cursor]': cursor} if cursor else {})
  524. posts_json = self._call_api('posts', campaign_id, query=params, note=f'Downloading posts page {page}')
  525. cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next'))
  526. for post_url in traverse_obj(posts_json, ('data', ..., 'attributes', 'patreon_url')):
  527. yield self.url_result(urljoin('https://www.patreon.com/', post_url), PatreonIE)
  528. if cursor is None:
  529. break
  530. def _real_extract(self, url):
  531. campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
  532. if campaign_id is None:
  533. webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
  534. campaign_id = self._search_nextjs_data(
  535. webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
  536. params = {
  537. 'json-api-use-default-includes': 'false',
  538. 'fields[user]': 'full_name,url',
  539. 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url',
  540. 'include': 'creator',
  541. }
  542. campaign_response = self._call_api(
  543. f'campaigns/{campaign_id}', campaign_id,
  544. note='Downloading campaign info', fatal=False,
  545. query=params) or {}
  546. campaign_info = campaign_response.get('data') or {}
  547. channel_name = traverse_obj(campaign_info, ('attributes', 'name'))
  548. user_info = traverse_obj(
  549. campaign_response, ('included', lambda _, v: v['type'] == 'user'),
  550. default={}, expected_type=dict, get_all=False)
  551. return {
  552. '_type': 'playlist',
  553. 'id': campaign_id,
  554. 'title': channel_name,
  555. 'entries': self._entries(campaign_id),
  556. 'description': clean_html(traverse_obj(campaign_info, ('attributes', 'summary'))),
  557. 'channel_url': traverse_obj(campaign_info, ('attributes', 'url')),
  558. 'channel_follower_count': int_or_none(traverse_obj(campaign_info, ('attributes', 'patron_count'))),
  559. 'channel_id': campaign_id,
  560. 'channel': channel_name,
  561. 'uploader_url': traverse_obj(user_info, ('attributes', 'url')),
  562. 'uploader_id': str_or_none(user_info.get('id')),
  563. 'uploader': traverse_obj(user_info, ('attributes', 'full_name')),
  564. 'playlist_count': traverse_obj(campaign_info, ('attributes', 'creation_count')),
  565. 'age_limit': 18 if traverse_obj(campaign_info, ('attributes', 'is_nsfw')) else 0,
  566. 'thumbnail': url_or_none(traverse_obj(campaign_info, ('attributes', 'avatar_photo_url'))),
  567. }