Skip to content

Instantly share code, notes, and snippets.

@vberlier
Created November 3, 2020 09:48
Show Gist options
  • Save vberlier/8faf0e0e0e11747dfc2b91a227da34a8 to your computer and use it in GitHub Desktop.
Save vberlier/8faf0e0e0e11747dfc2b91a227da34a8 to your computer and use it in GitHub Desktop.
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index f14b407..72693b4 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import json
import random
import re
import time
@@ -299,26 +300,16 @@ class BandcampAlbumIE(InfoExtractor):
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
- track_elements = re.findall(
- r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
- if not track_elements:
- raise ExtractorError('The page doesn\'t contain any tracks')
- # Only tracks with duration info have songs
+ ld = re.findall(r'<script[^>]+type="application/ld\+json"[^>]*>(.*?)</script>', webpage, re.DOTALL)
+ album = json.loads(ld[0])
entries = [
self.url_result(
- compat_urlparse.urljoin(url, t_path),
+ record['item']['@id'],
ie=BandcampIE.ie_key(),
- video_title=self._search_regex(
- r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
- elem_content, 'track title', fatal=False))
- for elem_content, t_path in track_elements
- if self._html_search_meta('duration', elem_content, default=None)]
-
- title = self._html_search_regex(
- r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
- webpage, 'title', fatal=False)
- if title:
- title = title.replace(r'\"', '"')
+ video_title=record['item']['name'])
+ for record in album['track']['itemListElement']
+ ]
+ title = album['name']
return {
'_type': 'playlist',
'uploader_id': uploader_id,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment