Last active
March 7, 2018 20:35
-
-
Save isaacmg/e4eedddf2c83ec2cc2ae33264bc5c952 to your computer and use it in GitHub Desktop.
A simple GIST to easily download all of the MIMIC III data from PhysioNet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests.auth import HTTPBasicAuth | |
import lxml.html | |
import requests, zipfile, io | |
user_name = "Replace this with your MIMIC username" | |
your_password = "Replace this with your MIMIC password" | |
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 | |
(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} | |
response = requests.get("https://physionet.org/works/MIMICIIIClinicalDatabase/files/", | |
auth=HTTPBasicAuth(user_name, your_password), headers=headers) | |
def download_file(url): | |
"""Function to download the file""" | |
local_filename = url.split('/')[-1] | |
# NOTE the stream=True parameter | |
r = requests.get(url, stream=True, auth=HTTPBasicAuth(user_name, your_password)) | |
with open(local_filename, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: # filter out keep-alive new chunks | |
f.write(chunk) | |
#f.flush() commented by recommendation from J.F.Sebastian | |
return local_filename | |
html = lxml.html.fromstring(response.content) | |
urls = html.xpath('//ol//li//a/@href') | |
for url in urls: | |
download_file("https://physionet.org/works/MIMICIIIClinicalDatabase/files/" + url) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment