Last active
September 23, 2023 23:37
-
-
Save kylemcdonald/e20ec59273e78d0075bd71a1d08f4c41 to your computer and use it in GitHub Desktop.
Download all the stems from Beatport.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing.dummy import Pool | |
from urllib3 import HTTPConnectionPool | |
from tqdm import tqdm | |
import itertools | |
import os | |
import errno | |
n_connections = 32 | |
domain = 'geo-samples.beatport.com' | |
http_pool = HTTPConnectionPool(domain) | |
total_tracks = 4106 | |
stems_per_track = 5 | |
identifiers = list(itertools.product( | |
range(1, total_tracks + 1), | |
range(0, stems_per_track))) | |
def mkdir_p(path): | |
try: | |
os.makedirs(path) | |
except OSError as exc: | |
if exc.errno == errno.EEXIST and os.path.isdir(path): | |
pass | |
else: | |
raise | |
def download(url, fn): | |
if not os.path.isfile(fn): | |
r = http_pool.urlopen('GET', url) | |
with open(fn, 'wb') as f: | |
if r.status == 200: | |
f.write(r.data) | |
elif r.status != 404: | |
print 'Error: ' + r.status | |
pbar = tqdm(total=len(identifiers), leave=True) | |
def job(identifier): | |
track_id = str(identifier[0]) | |
stem_id = str(identifier[1]) | |
dir_name = os.path.join('stems', track_id) | |
track_fn = os.path.join(dir_name, stem_id + '.mp3') | |
track_url = '/stems/%s.%s.mp3' % (track_id, stem_id) | |
mkdir_p(dir_name) | |
download(track_url, track_fn) | |
pbar.update(1) | |
pool = Pool(n_connections) | |
pool.map(job, identifiers) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment