Created
May 6, 2016 17:10
-
-
Save hexylena/23dadb29a302f77b2a90e4a627b45ded to your computer and use it in GitHub Desktop.
BaseSpace Batch Downloader in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib2 import Request, urlopen, URLError | |
from os.path import expanduser | |
import re | |
import hashlib | |
import json | |
import sys | |
import os | |
import logging | |
logging.basicConfig(level=logging.DEBUG) | |
log = logging.getLogger() | |
CHUNK_SIZE = 128 * 1024 | |
RunID = sys.argv[1] | |
AccessToken = '*' | |
API_BASE = 'https://api.basespace.illumina.com/' | |
ACCESS_TOKEN = '?access_token=%s' % AccessToken | |
def update_progress(progress, title): | |
barLength = 30 # Modify this to change the length of the progress bar | |
status = "" | |
if isinstance(progress, int): | |
progress = float(progress) | |
if not isinstance(progress, float): | |
progress = 0 | |
status = "error: progress var must be float\r\n" | |
if progress < 0: | |
progress = 0 | |
status = "Halt...\r\n" | |
if progress >= 1: | |
progress = 1 | |
status = "Done...\r\n" | |
block = int(round(barLength*progress)) | |
text = "\r{3}: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), int(progress*100), status, title) | |
sys.stdout.write(text) | |
sys.stdout.flush() | |
def restrequest(rawrequest): | |
rawrequest = API_BASE + rawrequest + ACCESS_TOKEN | |
log.debug('Req: ' + rawrequest.replace(AccessToken, '*' * len(AccessToken))) | |
req_url_hash = hashlib.md5(rawrequest).hexdigest() | |
cache_path_basedir = os.path.join(expanduser("~"), '.cache', 'basespace') | |
if not os.path.exists(cache_path_basedir): | |
os.makedirs(cache_path_basedir) | |
cache_path = os.path.join(cache_path_basedir, req_url_hash) | |
if os.path.exists(cache_path): | |
with open(cache_path, 'r') as handle: | |
json_obj = json.load(handle) | |
return json_obj | |
log.debug("Cache miss") | |
request = Request(rawrequest) | |
try: | |
response = urlopen(request) | |
json_string = response.read() | |
json_obj = json.loads(json_string) | |
except URLError, e: | |
print 'Got an error code:', e | |
sys.exit() | |
with open(cache_path, 'w') as handle: | |
json.dump(json_obj, handle) | |
return json_obj | |
json_obj = restrequest('v1pre3/runs/%s/samples' % RunID) | |
for sample in json_obj['Response']['Items']: | |
sample_json_obj = restrequest(sample['Href']) | |
files_obj = restrequest(sample_json_obj['Response']['HrefFiles']) | |
for file in files_obj['Response']['Items']: | |
potential_filename = file['Name'] | |
safe_filename = re.sub('[^A-Za-z0-9._-]', '', potential_filename) | |
fastq_url = file['HrefContent'] | |
response = urlopen(API_BASE + fastq_url + ACCESS_TOKEN, timeout=10) | |
content_length = int(response.info().getheader('Content-Length')) | |
expected_chunks = content_length / CHUNK_SIZE | |
if os.path.exists(safe_filename) and os.path.getsize(safe_filename) == content_length: | |
log.info("%s already downloaded and is correct size" % safe_filename) | |
continue | |
elif os.path.exists(safe_filename): | |
log.warning("%s exists but may be a partial download / different file" % safe_filename) | |
sys.exit() | |
with open(safe_filename, 'wb') as handle: | |
i = 0 | |
while True: | |
i += 1 | |
chunk = response.read(CHUNK_SIZE) | |
if not chunk: | |
break | |
handle.write(chunk) | |
update_progress(min(1, float(i) / expected_chunks), safe_filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Like https://gist.github.com/lh3/54f535b11a9ee5d3be8e / BaseSpace's official one, but: