Skip to content

Instantly share code, notes, and snippets.

@thanhleviet
Created July 29, 2019 14:43
Show Gist options
  • Save thanhleviet/e7dbcf9045ce6fa6b9473b960c8dc01e to your computer and use it in GitHub Desktop.
Save thanhleviet/e7dbcf9045ce6fa6b9473b960c8dc01e to your computer and use it in GitHub Desktop.
Read a tabular file including 2 columns: ID and URLs, scan urls to read files for each ID, generate a bash file to download the scanned URLs using ascp
library(readxl)
library(readr)
library(dplyr)
library(stringr)
library(glue)
setwd("~/Documents/bioinformatics/")
ids <- read_excel("NCTC_pacbio.xlsx", col_names = c("strains", "run_acc")) %>%
mutate(run_acc = gsub("https://www.ebi.ac.uk/ena/data/view/", "", run_acc)) %>%
mutate(run_acc_url = glue("https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={run_acc}&result=read_run&fields=run_accession,submitted_ftp,submitted_md5"))
url_to_replace <- "ftp.sra.ebi.ac.uk/"
url_replace_by <- "[email protected]:/"
aspc_command <- "ascp -QT -l 300m -P33001 -i \"/Users/leviet/Applications/Aspera CLI/etc/asperaweb_id_dsa.openssh\""
sh_file <- "download.sh"
if (file.exists(sh_file)) {
file.remove(ssh_file)
}
for (i in seq_along(ids$run_acc_url)) {
output <- ids$strains[i]
tbl <- read.table(ids$run_acc_url[i], header = T) %>%
pull(submitted_ftp) %>%
as.character() %>%
str_split(";") %>%
unlist() %>%
gsub(url_to_replace,url_replace_by,.)
for( i in seq_along(tbl) ){
ext <- case_when(
grepl("1.bax.h5", tbl[i]) ~ ".1.bax.h5",
grepl("2.bax.h5", tbl[i]) ~ ".2.bax.h5",
grepl("3.bax.h5", tbl[i]) ~ ".3.bax.h5",
grepl("bas.h5", tbl[i]) ~ ".bas.h5",
grepl("metadata", tbl[i]) ~ ".metadata.xml"
)
line <- glue("{aspc_command} {tbl[i]} {output}{ext}\n")
write(line,sh_file,append = T)
}
}
NCTC13825 https://www.ebi.ac.uk/ena/data/view/ERS1275662
NCTC13826 https://www.ebi.ac.uk/ena/data/view/ERS1275663
NCTC13827 https://www.ebi.ac.uk/ena/data/view/ERS1295683
NCTC13828 https://www.ebi.ac.uk/ena/data/view/ERS1295507
NCTC13830 https://www.ebi.ac.uk/ena/data/view/ERS1295686
NCTC13831 https://www.ebi.ac.uk/ena/data/view/ERS2419133
NCTC13832 https://www.ebi.ac.uk/ena/data/view/ERS1295513
NCTC13833 https://www.ebi.ac.uk/ena/data/view/ERS1324110
NCTC13834 https://www.ebi.ac.uk/ena/data/view/ERS1295508
NCTC13835 https://www.ebi.ac.uk/ena/data/view/ERS1295684
NCTC13836 https://www.ebi.ac.uk/ena/data/view/ERS1295509
NCTC13837 https://www.ebi.ac.uk/ena/data/view/ERS1502663
NCTC13838 https://www.ebi.ac.uk/ena/data/view/ERS1295788
NCTC13839 https://www.ebi.ac.uk/ena/data/view/ERS1295852
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment