Skip to content

Instantly share code, notes, and snippets.

@sagorbrur
Forked from W4ngatang/download_glue_data.py
Last active May 23, 2021 06:39
Show Gist options
  • Select an option

  • Save sagorbrur/7edbeca06310a19bbf75048067a7068b to your computer and use it in GitHub Desktop.

Select an option

Save sagorbrur/7edbeca06310a19bbf75048067a7068b to your computer and use it in GitHub Desktop.
Script for downloading data of the GLUE benchmark (gluebenchmark.com)
"""Script for downloading all GLUE data.
Modified by: Sagor Sarker
Dependency:
pip install wget
pip install wasabi
"""
import os
import wget
from wasabi import msg
from zipfile import ZipFile
def main():
TASK2PATH = {
"CoLA":'https://dl.fbaipublicfiles.com/glue/data/CoLA.zip',
"SST":'https://dl.fbaipublicfiles.com/glue/data/SST-2.zip',
"QQP":'https://dl.fbaipublicfiles.com/glue/data/STS-B.zip',
"STS":'https://dl.fbaipublicfiles.com/glue/data/QQP-clean.zip',
"MNLI":'https://dl.fbaipublicfiles.com/glue/data/MNLI.zip',
"QNLI":'https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip',
"RTE":'https://dl.fbaipublicfiles.com/glue/data/RTE.zip',
"WNLI":'https://dl.fbaipublicfiles.com/glue/data/WNLI.zip',
"diagnostic":'https://dl.fbaipublicfiles.com/glue/data/AX.tsv'
}
# check and create glue_data directory
download_path = "glue_data"
os.makedirs(download_path, exist_ok=True)
# download MRPC
try:
mrpc_path = os.path.join(download_path, 'MRPC')
os.makedirs(mrpc_path, exist_ok=True)
msg.info(f"Downloading MRPC data")
MRPC_TRAIN = 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt'
MRPC_TEST = 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_test.txt'
wget.download(MRPC_TRAIN, mrpc_path)
wget.download(MRPC_TEST, mrpc_path)
msg.good(f"Download Completed")
except Exception as e:
print(e)
# downloading other task data
for task_name, data_link in TASK2PATH.items():
try:
data_name = data_link.split('/')[-1]
msg.info(f"Downloading {task_name}")
wget.download(data_link, download_path)
msg.good(f"\nDownload completed")
if '.zip' in data_name:
msg.info(f"Extracting {task_name}")
task_file_path = download_path + "/" + data_name
with ZipFile(task_file_path) as zf:
zf.extractall(download_path)
msg.good(f"Extraction completed")
os.remove(task_file_path)
except Exception as e:
print(e)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment