Last active
July 11, 2016 18:23
-
-
Save sminot/1ff7e5242ef2756405f2 to your computer and use it in GitHub Desktop.
Download all results
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
One Codex CSV Download Script. | |
Simple 1 dependency (requests) Python 2/3 script for downloading | |
One Codex analysis results and saving them to CSVs, as well as read-level results | |
""" | |
from __future__ import print_function | |
import os | |
import requests | |
import argparse | |
import time | |
def api_wrapper(endpoint, api_key): | |
"""Get a result from the API.""" | |
url = "https://app.onecodex.com/api/v0/{}".format(endpoint) | |
auth = (api_key, "") | |
r = requests.get(url, auth=auth) | |
if r.status_code == 200: | |
return r.json() | |
def download_results(api_key, folder, reads=False, retry=False): | |
"""Download all results to a folder.""" | |
if not os.path.exists(folder): | |
os.mkdir(folder) | |
downloaded = set([]) # Keep list of downloaded analyses (uuids) | |
pending = True | |
while pending: | |
pending = False | |
for analysis, analysis_status in all_analyses(api_key): # Download all of the analyses | |
if analysis_status == 'Pending': | |
print("Analysis for {} is pending".format(analysis['sample_filename'])) | |
if retry: | |
pending = True | |
elif analysis_status == 'Success': | |
if analysis['sample_id'] not in downloaded: | |
print("Getting sample information ({})".format(analysis['sample_id'])) | |
sample_information = api_wrapper('samples/{}'.format(analysis['sample_id']), api_key) | |
upload_date = sample_information['upload_date'].split(' ')[0] | |
filename = "{}.{}.{}.csv".format(upload_date, analysis['sample_filename'], analysis['id']) | |
fpo = os.path.join(folder, filename) | |
if not os.path.exists(fpo): | |
analysis['results'] = api_wrapper('analyses/{}/extended_table'.format(analysis['id']), | |
api_key) | |
with open(fpo, 'w') as fo: | |
save_result(analysis, fo) | |
if reads: | |
# Download the raw read-level results | |
filename = "{}.{}.{}.reads.tsv.gz".format(upload_date, analysis['sample_filename'], analysis['id']) | |
fpo = os.path.join(folder, filename) | |
if not os.path.exists(fpo): | |
print("Fetching read-level results ({})".format(fpo)) | |
url = 'https://app.onecodex.com/api/v0/analyses/{}/raw'.format(analysis['id']) | |
r = requests.get(url, auth=(api_key, ''), stream=True) | |
with open(fpo, 'wb') as fd: | |
for chunk in r.iter_content(1000): | |
fd.write(chunk) | |
downloaded.add(analysis['sample_id']) | |
if retry and pending: | |
print("Pending analyses remain, will check again in 60 seconds.") | |
time.sleep(60) | |
def all_analyses(api_key): | |
"""Get the list of analyses that have been performed.""" | |
analyses = api_wrapper("analyses", api_key) | |
for analysis in analyses: | |
if analysis['reference_name'] != 'One Codex Database': # Only save results from OCDB | |
continue | |
yield analysis, analysis['analysis_status'] | |
def save_result(analysis, fo, folder=None): | |
"""Write out the analysis analysis to an open file handle.""" | |
base_string = '{},{},{},{},{},{},{},{}\n' | |
print("Saving results for {} ({})".format(analysis['sample_filename'], analysis['id'])) | |
fo.write(base_string.format( | |
'Filename', 'Analysis ID', 'NCBI Tax ID', 'Organism Name', | |
'Rank', 'Percent of Total', 'Abundance', | |
'Readcount', 'Readcount with Children')) | |
for organism in analysis['results']: | |
fo.write(base_string.format( | |
analysis['sample_filename'], analysis['id'], organism['tax_id'], organism['name'], | |
organism['rank'], organism['pct_of_total'], organism.get('abundance', 0), | |
organism['readcount'], organism['readcount_w_children'])) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description=""" | |
Download readcounts for each sample from One Codex to a single file | |
""") | |
parser.add_argument("api_key", type=str, default=None, | |
help="API key") | |
parser.add_argument("folder", type=str, default=None, | |
help="Write results to individual files in this folder") | |
parser.add_argument("--reads", action='store_true', | |
help="Also write out read-level results") | |
parser.add_argument("--retry", action='store_true', | |
help="Keep retrying every 60 seconds until all analyses are complete") | |
args = parser.parse_args() | |
download_results(args.api_key, args.folder, reads=args.reads, retry=args.retry) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment