patches/net-misc/youtube-dl: Fix bandcamp - etc_portage - Unnamed repository; edit this file 'description' to name the repository.

commit: 45c4bae35045d8a394382128d8d3f5a7ded65396
parent e31f507286b58ec4dcfc281022cecefa66fba56d
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Mon, 26 Oct 2020 05:14:55 +0100

patches/net-misc/youtube-dl: Fix bandcamp

Diffstat:
A patches/net-misc/youtube-dl/youtube-dl-2020.09.20-bandcamp_url_quoted_data.patch 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 129 insertions(+), 0 deletions(-)
diff --git a/patches/net-misc/youtube-dl/youtube-dl-2020.09.20-bandcamp_url_quoted_data.patch b/patches/net-misc/youtube-dl/youtube-dl-2020.09.20-bandcamp_url_quoted_data.patch
@@ -0,0 +1,129 @@
+From f2ecf2b7e6e79d2003c8ef3ddf018430f62eb19f Mon Sep 17 00:00:00 2001
+From: Leonardo Taccari <iamleot@gmail.com>
+Date: Sun, 11 Oct 2020 14:47:25 +0200
+Subject: [PATCH] [bandcamp] Update to handle HTML quoted data
+
+Adjust the extractor to handle JSON data-* attributes by introducing a
+_json_data_extract() method to handle them (and existing existing
+patterns in the code).
+
+Based on Gilles Pietri #26684.
+---
+ youtube_dl/extractor/bandcamp.py | 48 ++++++++++++++------------------
+ 1 file changed, 21 insertions(+), 27 deletions(-)
+
+diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
+index f14b407dc82c..7357b794b275 100644
+--- a/youtube_dl/extractor/bandcamp.py
++++ b/youtube_dl/extractor/bandcamp.py
+@@ -79,6 +79,14 @@ class BandcampIE(InfoExtractor):
+         },
+     }]
+ 
++    def _json_data_extract(self, data_key, video_id, webpage):
++        return self._parse_json(
++            self._search_regex(
++                r'data-' + data_key + r'=(["\'])(?P<data>{.+?})\1',
++                webpage, 'JSON data {data_key}'.format(data_key=data_key),
++                group='data', default=None),
++            video_id, transform_source=unescapeHTML)
++
+     def _real_extract(self, url):
+         mobj = re.match(self._VALID_URL, url)
+         title = mobj.group('title')
+@@ -91,10 +99,9 @@ def _real_extract(self, url):
+         duration = None
+ 
+         formats = []
+-        track_info = self._parse_json(
+-            self._search_regex(
+-                r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
+-                webpage, 'track info', default='{}'), title)
++        tralbum_data = self._json_data_extract('tralbum', title, webpage)
++        embed_data = self._json_data_extract('embed', title, webpage)
++        track_info = tralbum_data['trackinfo'][0]
+         if track_info:
+             file_ = track_info.get('file')
+             if isinstance(file_, dict):
+@@ -116,9 +123,9 @@ def _real_extract(self, url):
+             duration = float_or_none(track_info.get('duration'))
+ 
+         def extract(key):
+-            return self._search_regex(
+-                r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
+-                webpage, key, default=None, group='value')
++            for data in tralbum_data['current'], embed_data, tralbum_data:
++                if key in data and data[key]:
++                    return data[key]
+ 
+         artist = extract('artist')
+         album = extract('album_title')
+@@ -126,9 +133,7 @@ def extract(key):
+             extract('publish_date') or extract('album_publish_date'))
+         release_date = unified_strdate(extract('album_release_date'))
+ 
+-        download_link = self._search_regex(
+-            r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+-            'download link', default=None, group='url')
++        download_link = tralbum_data['freeDownloadPage']
+         if download_link:
+             track_id = self._search_regex(
+                 r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
+@@ -137,11 +142,7 @@ def extract(key):
+             download_webpage = self._download_webpage(
+                 download_link, track_id, 'Downloading free downloads page')
+ 
+-            blob = self._parse_json(
+-                self._search_regex(
+-                    r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
+-                    'blob', group='blob'),
+-                track_id, transform_source=unescapeHTML)
++            blob = self._json_data_extract('blob', track_id, download_webpage)
+ 
+             info = try_get(
+                 blob, (lambda x: x['digital_items'][0],
+@@ -218,7 +219,7 @@ def extract(key):
+         }
+ 
+ 
+-class BandcampAlbumIE(InfoExtractor):
++class BandcampAlbumIE(BandcampIE):
+     IE_NAME = 'Bandcamp:album'
+     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
+ 
+@@ -314,11 +315,8 @@ def _real_extract(self, url):
+             for elem_content, t_path in track_elements
+             if self._html_search_meta('duration', elem_content, default=None)]
+ 
+-        title = self._html_search_regex(
+-            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
+-            webpage, 'title', fatal=False)
+-        if title:
+-            title = title.replace(r'\"', '"')
++        embed_data = self._json_data_extract('embed', album_id, webpage)
++        title = embed_data.get('album_title')
+         return {
+             '_type': 'playlist',
+             'uploader_id': uploader_id,
+@@ -328,7 +326,7 @@ def _real_extract(self, url):
+         }
+ 
+ 
+-class BandcampWeeklyIE(InfoExtractor):
++class BandcampWeeklyIE(BandcampIE):
+     IE_NAME = 'Bandcamp:weekly'
+     _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
+     _TESTS = [{
+@@ -355,11 +353,7 @@ def _real_extract(self, url):
+         video_id = self._match_id(url)
+         webpage = self._download_webpage(url, video_id)
+ 
+-        blob = self._parse_json(
+-            self._search_regex(
+-                r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
+-                'blob', group='blob'),
+-            video_id, transform_source=unescapeHTML)
++        blob = self._json_data_extract('blob', video_id, webpage)
+ 
+         show = blob['bcw_show']
+