nutjob4life · October 12, 2021 22:48
diff --git a/labcas-download.py b/labcas-download.py
 # encoding: utf-8
 #
 # Sample LabCAS Download script
 #
 # To run this, you'll need Python 3 with the `requests` package. The easiest
 # way to do this is with a "virtual environment" by running:
 #
 # $ python3 -m venv venv
 # $ cd venv
 # $ bin/pip install --quiet --upgrade pip requests
 #
 # Now, use bin/python instead of your usual system python.
 #
 # Then, set environment variables as follows:
 #
 # - LABCAS_ID: the name of the collection or dataset you want to download; if
 #   unset, it defaults to `Barrett's_Esophagus_Methylation_Profile_Dataset`
 # - TARGET_DIR: the local directory in which to save the data; if unset, it
 #   defaults to the current directory
 # - EDRN_USERNAME: the username of the EDRN account to use to fetch the data
 # - EDRN_PASSWORD: the credential that authenticates `EDRN_USERNAME`
 #
 # And finally run this script:
 #
 # $ bin/python labcas-download.py
 #
 # Created by Asitang Mishra, [email protected]
 # Refined by https://github.com/nutjob4life

 import requests
 import os
 import urllib.parse

 # Use environment variables
 labcas_id = os.getenv('LABCAS_ID', "Barrett's_Esophagus_Methylation_Profile_Dataset")
 target_dir = os.getenv('TARGET_DIR', '.')
 edrn_username = os.getenv('EDRN_USERNAME', '')
 edrn_password = os.getenv('EDRN_PASSWORD', '')

 # ============= list all files
 if edrn_password == '' or edrn_username == '':
    auth = None
 else:
    auth = (edrn_username, edrn_password)
 url = "https://edrn-labcas.jpl.nasa.gov/data-access-api/"
 request_type = 'collections' if len(labcas_id.split('/')) == 1 else 'datasets'
 request_url = url + request_type + '/list?rows=20000000&q=id:' + labcas_id
 response = requests.get(request_url, timeout=10, auth=auth)
 files_list = [item for item in response.text.split('\n') if item.strip() != '']
 if len(files_list) != 0:
    print('Preparing', len(files_list), 'files to be downloaded....')
 else:
    print('No files present or accessible in LabCAS for:', labcas_id)
    print('Please visit https://edrn-labcas.jpl.nasa.gov/ to find the correct id or check your access to the dataset.')

 # ============= download files
 for file_url in files_list:
    file_rel_path = urllib.parse.unquote(file_url.split('id')[1][1:])
    print('Downloading:', file_rel_path)
    response = requests.get(file_url, stream=True, auth=auth)
    os.makedirs(os.path.join(target_dir, os.path.dirname(file_rel_path)), exist_ok=True)
    handle = open(os.path.join(target_dir, file_rel_path), "wb")
    for chunk in response.iter_content(chunk_size=512):
        if chunk:
            handle.write(chunk)
	# encoding: utf-8
	#
	# Sample LabCAS Download script
	#
	# To run this, you'll need Python 3 with the `requests` package. The easiest
	# way to do this is with a "virtual environment" by running:
	#
	# $ python3 -m venv venv
	# $ cd venv
	# $ bin/pip install --quiet --upgrade pip requests
	#
	# Now, use bin/python instead of your usual system python.
	#
	# Then, set environment variables as follows:
	#
	# - LABCAS_ID: the name of the collection or dataset you want to download; if
	# unset, it defaults to `Barrett's_Esophagus_Methylation_Profile_Dataset`
	# - TARGET_DIR: the local directory in which to save the data; if unset, it
	# defaults to the current directory
	# - EDRN_USERNAME: the username of the EDRN account to use to fetch the data
	# - EDRN_PASSWORD: the credential that authenticates `EDRN_USERNAME`
	#
	# And finally run this script:
	#
	# $ bin/python labcas-download.py
	#
	# Created by Asitang Mishra, [email protected]
	# Refined by https://github.com/nutjob4life

	import requests
	import os
	import urllib.parse

	# Use environment variables
	labcas_id = os.getenv('LABCAS_ID', "Barrett's_Esophagus_Methylation_Profile_Dataset")
	target_dir = os.getenv('TARGET_DIR', '.')
	edrn_username = os.getenv('EDRN_USERNAME', '')
	edrn_password = os.getenv('EDRN_PASSWORD', '')

	# ============= list all files
	if edrn_password == '' or edrn_username == '':
	auth = None
	else:
	auth = (edrn_username, edrn_password)
	url = "https://edrn-labcas.jpl.nasa.gov/data-access-api/"
	request_type = 'collections' if len(labcas_id.split('/')) == 1 else 'datasets'
	request_url = url + request_type + '/list?rows=20000000&q=id:' + labcas_id
	response = requests.get(request_url, timeout=10, auth=auth)
	files_list = [item for item in response.text.split('\n') if item.strip() != '']
	if len(files_list) != 0:
	print('Preparing', len(files_list), 'files to be downloaded....')
	else:
	print('No files present or accessible in LabCAS for:', labcas_id)
	print('Please visit https://edrn-labcas.jpl.nasa.gov/ to find the correct id or check your access to the dataset.')

	# ============= download files
	for file_url in files_list:
	file_rel_path = urllib.parse.unquote(file_url.split('id')[1][1:])
	print('Downloading:', file_rel_path)
	response = requests.get(file_url, stream=True, auth=auth)
	os.makedirs(os.path.join(target_dir, os.path.dirname(file_rel_path)), exist_ok=True)
	handle = open(os.path.join(target_dir, file_rel_path), "wb")
	for chunk in response.iter_content(chunk_size=512):
	if chunk:
	handle.write(chunk)