logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

pinterest.py (10494B)


  1. import json
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. determine_ext,
  5. float_or_none,
  6. int_or_none,
  7. str_or_none,
  8. strip_or_none,
  9. traverse_obj,
  10. unified_timestamp,
  11. url_or_none,
  12. )
  13. class PinterestBaseIE(InfoExtractor):
  14. _VALID_URL_BASE = r'''(?x)
  15. https?://(?:[^/]+\.)?pinterest\.(?:
  16. com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|
  17. dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|
  18. co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'''
  19. def _call_api(self, resource, video_id, options):
  20. return self._download_json(
  21. f'https://www.pinterest.com/resource/{resource}Resource/get/',
  22. video_id, f'Download {resource} JSON metadata', query={
  23. 'data': json.dumps({'options': options}),
  24. })['resource_response']
  25. def _extract_video(self, data, extract_formats=True):
  26. video_id = data['id']
  27. thumbnails = []
  28. images = data.get('images')
  29. if isinstance(images, dict):
  30. for thumbnail in images.values():
  31. if not isinstance(thumbnail, dict):
  32. continue
  33. thumbnail_url = url_or_none(thumbnail.get('url'))
  34. if not thumbnail_url:
  35. continue
  36. thumbnails.append({
  37. 'url': thumbnail_url,
  38. 'width': int_or_none(thumbnail.get('width')),
  39. 'height': int_or_none(thumbnail.get('height')),
  40. })
  41. info = {
  42. 'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')),
  43. 'description': traverse_obj(data, 'seo_description', 'description'),
  44. 'timestamp': unified_timestamp(data.get('created_at')),
  45. 'thumbnails': thumbnails,
  46. 'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')),
  47. 'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))),
  48. 'repost_count': int_or_none(data.get('repin_count')),
  49. 'comment_count': int_or_none(data.get('comment_count')),
  50. 'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list),
  51. 'tags': traverse_obj(data, 'hashtags', expected_type=list),
  52. }
  53. urls = []
  54. formats = []
  55. duration = None
  56. domain = data.get('domain', '')
  57. if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')):
  58. if not info['title']:
  59. info['title'] = None
  60. return {
  61. '_type': 'url_transparent',
  62. 'url': data['embed']['src'],
  63. **info,
  64. }
  65. elif extract_formats:
  66. video_list = traverse_obj(
  67. data, ('videos', 'video_list'),
  68. ('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'),
  69. expected_type=dict, get_all=False, default={})
  70. for format_id, format_dict in video_list.items():
  71. if not isinstance(format_dict, dict):
  72. continue
  73. format_url = url_or_none(format_dict.get('url'))
  74. if not format_url or format_url in urls:
  75. continue
  76. urls.append(format_url)
  77. duration = float_or_none(format_dict.get('duration'), scale=1000)
  78. ext = determine_ext(format_url)
  79. if 'hls' in format_id.lower() or ext == 'm3u8':
  80. formats.extend(self._extract_m3u8_formats(
  81. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  82. m3u8_id=format_id, fatal=False))
  83. else:
  84. formats.append({
  85. 'url': format_url,
  86. 'format_id': format_id,
  87. 'width': int_or_none(format_dict.get('width')),
  88. 'height': int_or_none(format_dict.get('height')),
  89. 'duration': duration,
  90. })
  91. return {
  92. 'id': video_id,
  93. 'formats': formats,
  94. 'duration': duration,
  95. 'webpage_url': f'https://www.pinterest.com/pin/{video_id}/',
  96. 'extractor_key': PinterestIE.ie_key(),
  97. 'extractor': PinterestIE.IE_NAME,
  98. **info,
  99. }
  100. class PinterestIE(PinterestBaseIE):
  101. _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
  102. _TESTS = [{
  103. # formats found in data['videos']
  104. 'url': 'https://www.pinterest.com/pin/664281013778109217/',
  105. 'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
  106. 'info_dict': {
  107. 'id': '664281013778109217',
  108. 'ext': 'mp4',
  109. 'title': 'Origami',
  110. 'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab',
  111. 'duration': 57.7,
  112. 'timestamp': 1593073622,
  113. 'upload_date': '20200625',
  114. 'repost_count': int,
  115. 'comment_count': int,
  116. 'categories': list,
  117. 'tags': list,
  118. 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
  119. },
  120. }, {
  121. # formats found in data['story_pin_data']
  122. 'url': 'https://www.pinterest.com/pin/1084663891475263837/',
  123. 'md5': '069ac19919ab9e1e13fa60de46290b03',
  124. 'info_dict': {
  125. 'id': '1084663891475263837',
  126. 'ext': 'mp4',
  127. 'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets',
  128. 'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13',
  129. 'uploader': 'CoolCrazyGadgets',
  130. 'uploader_id': '1084664028912989237',
  131. 'upload_date': '20211003',
  132. 'timestamp': 1633246654.0,
  133. 'duration': 14.9,
  134. 'comment_count': int,
  135. 'repost_count': int,
  136. 'categories': 'count:9',
  137. 'tags': list,
  138. 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
  139. },
  140. }, {
  141. # vimeo.com embed
  142. 'url': 'https://www.pinterest.ca/pin/441282463481903715/',
  143. 'info_dict': {
  144. 'id': '111691128',
  145. 'ext': 'mp4',
  146. 'title': 'Tonite Let\'s All Make Love In London (1967)',
  147. 'description': 'md5:8190f37b3926807809ec57ec21aa77b2',
  148. 'uploader': 'Vimeo',
  149. 'uploader_id': '473792960706651251',
  150. 'upload_date': '20180120',
  151. 'timestamp': 1516409040,
  152. 'duration': 3404,
  153. 'comment_count': int,
  154. 'repost_count': int,
  155. 'categories': 'count:9',
  156. 'tags': [],
  157. 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
  158. 'uploader_url': 'https://vimeo.com/willardandrade',
  159. },
  160. 'params': {
  161. 'skip_download': 'm3u8',
  162. },
  163. }, {
  164. 'url': 'https://co.pinterest.com/pin/824721750502199491/',
  165. 'only_matching': True,
  166. },
  167. {
  168. 'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
  169. 'info_dict': {
  170. 'id': '2885187256207927',
  171. 'ext': 'mp4',
  172. 'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
  173. 'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
  174. 'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
  175. 'uploader_id': '1142999717836434688',
  176. 'upload_date': '20240702',
  177. 'timestamp': 1719939156,
  178. 'duration': 7.967,
  179. 'comment_count': int,
  180. 'repost_count': int,
  181. 'categories': 'count:9',
  182. 'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
  183. 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
  184. },
  185. }]
  186. def _real_extract(self, url):
  187. video_id = self._match_id(url)
  188. data = self._call_api(
  189. 'Pin', video_id, {
  190. 'field_set_key': 'unauth_react_main_pin',
  191. 'id': video_id,
  192. })['data']
  193. return self._extract_video(data)
  194. class PinterestCollectionIE(PinterestBaseIE):
  195. _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/(?P<username>[^/]+)/(?P<id>[^/?#&]+)'
  196. _TESTS = [{
  197. 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
  198. 'info_dict': {
  199. 'id': '585890301462791043',
  200. 'title': 'cool diys',
  201. },
  202. 'playlist_count': 8,
  203. }, {
  204. 'url': 'https://www.pinterest.ca/fudohub/videos/',
  205. 'info_dict': {
  206. 'id': '682858430939307450',
  207. 'title': 'VIDEOS',
  208. },
  209. 'playlist_mincount': 365,
  210. 'skip': 'Test with extract_formats=False',
  211. }]
  212. @classmethod
  213. def suitable(cls, url):
  214. return False if PinterestIE.suitable(url) else super().suitable(url)
  215. def _real_extract(self, url):
  216. username, slug = self._match_valid_url(url).groups()
  217. board = self._call_api(
  218. 'Board', slug, {
  219. 'slug': slug,
  220. 'username': username,
  221. })['data']
  222. board_id = board['id']
  223. options = {
  224. 'board_id': board_id,
  225. 'page_size': 250,
  226. }
  227. bookmark = None
  228. entries = []
  229. while True:
  230. if bookmark:
  231. options['bookmarks'] = [bookmark]
  232. board_feed = self._call_api('BoardFeed', board_id, options)
  233. for item in (board_feed.get('data') or []):
  234. if not isinstance(item, dict) or item.get('type') != 'pin':
  235. continue
  236. video_id = item.get('id')
  237. if video_id:
  238. # Some pins may not be available anonymously via pin URL
  239. # video = self._extract_video(item, extract_formats=False)
  240. # video.update({
  241. # '_type': 'url_transparent',
  242. # 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
  243. # })
  244. # entries.append(video)
  245. entries.append(self._extract_video(item))
  246. bookmark = board_feed.get('bookmark')
  247. if not bookmark:
  248. break
  249. return self.playlist_result(
  250. entries, playlist_id=board_id, playlist_title=board.get('name'))