vberlier · November 3, 2020 09:48
diff --git a/bandcamp-albums.patch b/bandcamp-albums.patch
 diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
 index f14b407..72693b4 100644
 --- a/youtube_dl/extractor/bandcamp.py
 +++ b/youtube_dl/extractor/bandcamp.py
 @@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
 +import json
 import random
 import re
 import time
 @@ -299,26 +300,16 @@ class BandcampAlbumIE(InfoExtractor):
         album_id = mobj.group('album_id')
         playlist_id = album_id or uploader_id
         webpage = self._download_webpage(url, playlist_id)
 -        track_elements = re.findall(
 -            r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
 -        if not track_elements:
 -            raise ExtractorError('The page doesn\'t contain any tracks')
 -        # Only tracks with duration info have songs
 +        ld = re.findall(r'<script[^>]+type="application/ld\+json"[^>]*>(.*?)</script>', webpage, re.DOTALL)
 +        album = json.loads(ld[0])
         entries = [
             self.url_result(
 -                compat_urlparse.urljoin(url, t_path),
 +                record['item']['@id'],
                 ie=BandcampIE.ie_key(),
 -                video_title=self._search_regex(
 -                    r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
 -                    elem_content, 'track title', fatal=False))
 -            for elem_content, t_path in track_elements
 -            if self._html_search_meta('duration', elem_content, default=None)]
 -
 -        title = self._html_search_regex(
 -            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
 -            webpage, 'title', fatal=False)
 -        if title:
 -            title = title.replace(r'\"', '"')
 +                video_title=record['item']['name'])
 +            for record in album['track']['itemListElement']
 +        ]
 +        title = album['name']
         return {
             '_type': 'playlist',
             'uploader_id': uploader_id,
	diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
	index f14b407..72693b4 100644
	--- a/youtube_dl/extractor/bandcamp.py
	+++ b/youtube_dl/extractor/bandcamp.py
	@@ -1,5 +1,6 @@
	from __future__ import unicode_literals

	+import json
	import random
	import re
	import time
	@@ -299,26 +300,16 @@ class BandcampAlbumIE(InfoExtractor):
	album_id = mobj.group('album_id')
	playlist_id = album_id or uploader_id
	webpage = self._download_webpage(url, playlist_id)
	- track_elements = re.findall(
	- r'(?s)<div[^>]>(.?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]>.?)</div>', webpage)
	- if not track_elements:
	- raise ExtractorError('The page doesn\'t contain any tracks')
	- # Only tracks with duration info have songs
	+ ld = re.findall(r'<script[^>]+type="application/ld\+json"[^>]>(.?)</script>', webpage, re.DOTALL)
	+ album = json.loads(ld[0])
	entries = [
	self.url_result(
	- compat_urlparse.urljoin(url, t_path),
	+ record['item']['@id'],
	ie=BandcampIE.ie_key(),
	- video_title=self._search_regex(
	- r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
	- elem_content, 'track title', fatal=False))
	- for elem_content, t_path in track_elements
	- if self._html_search_meta('duration', elem_content, default=None)]
	-
	- title = self._html_search_regex(
	- r'album_title\s:\s"((?:\\.\|[^"\\])+?)"',
	- webpage, 'title', fatal=False)
	- if title:
	- title = title.replace(r'\"', '"')
	+ video_title=record['item']['name'])
	+ for record in album['track']['itemListElement']
	+ ]
	+ title = album['name']
	return {
	'_type': 'playlist',
	'uploader_id': uploader_id,